1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "services/memTracker.hpp"
  49 
  50 // Concurrent marking bit map wrapper
  51 
  52 CMBitMapRO::CMBitMapRO(int shifter) :
  53   _bm(),
  54   _shifter(shifter) {
  55   _bmStartWord = 0;
  56   _bmWordSize = 0;
  57 }
  58 
  59 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  60                                                HeapWord* limit) const {
  61   // First we must round addr *up* to a possible object boundary.
  62   addr = (HeapWord*)align_size_up((intptr_t)addr,
  63                                   HeapWordSize << _shifter);
  64   size_t addrOffset = heapWordToOffset(addr);
  65   if (limit == NULL) {
  66     limit = _bmStartWord + _bmWordSize;
  67   }
  68   size_t limitOffset = heapWordToOffset(limit);
  69   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  70   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  71   assert(nextAddr >= addr, "get_next_one postcondition");
  72   assert(nextAddr == limit || isMarked(nextAddr),
  73          "get_next_one postcondition");
  74   return nextAddr;
  75 }
  76 
  77 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  78                                                  HeapWord* limit) const {
  79   size_t addrOffset = heapWordToOffset(addr);
  80   if (limit == NULL) {
  81     limit = _bmStartWord + _bmWordSize;
  82   }
  83   size_t limitOffset = heapWordToOffset(limit);
  84   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  85   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  86   assert(nextAddr >= addr, "get_next_one postcondition");
  87   assert(nextAddr == limit || !isMarked(nextAddr),
  88          "get_next_one postcondition");
  89   return nextAddr;
  90 }
  91 
  92 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  93   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  94   return (int) (diff >> _shifter);
  95 }
  96 
  97 #ifndef PRODUCT
  98 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
  99   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 100   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 101          "size inconsistency");
 102   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 103          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 104 }
 105 #endif
 106 
 107 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 108   _bm.print_on_error(st, prefix);
 109 }
 110 
 111 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 112   _bmStartWord = (HeapWord*)(heap_rs.base());
 113   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 114   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 115                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 116   if (!brs.is_reserved()) {
 117     warning("ConcurrentMark marking bit map allocation failure");
 118     return false;
 119   }
 120   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 121   // For now we'll just commit all of the bit map up front.
 122   // Later on we'll try to be more parsimonious with swap.
 123   if (!_virtual_space.initialize(brs, brs.size())) {
 124     warning("ConcurrentMark marking bit map backing store failure");
 125     return false;
 126   }
 127   assert(_virtual_space.committed_size() == brs.size(),
 128          "didn't reserve backing store for all of concurrent marking bit map?");
 129   _bm.set_map((uintptr_t*)_virtual_space.low());
 130   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 131          _bmWordSize, "inconsistency in bit map sizing");
 132   _bm.set_size(_bmWordSize >> _shifter);
 133   return true;
 134 }
 135 
 136 void CMBitMap::clearAll() {
 137   _bm.clear();
 138   return;
 139 }
 140 
 141 void CMBitMap::markRange(MemRegion mr) {
 142   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 143   assert(!mr.is_empty(), "unexpected empty region");
 144   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 145           ((HeapWord *) mr.end())),
 146          "markRange memory region end is not card aligned");
 147   // convert address range into offset range
 148   _bm.at_put_range(heapWordToOffset(mr.start()),
 149                    heapWordToOffset(mr.end()), true);
 150 }
 151 
 152 void CMBitMap::clearRange(MemRegion mr) {
 153   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 154   assert(!mr.is_empty(), "unexpected empty region");
 155   // convert address range into offset range
 156   _bm.at_put_range(heapWordToOffset(mr.start()),
 157                    heapWordToOffset(mr.end()), false);
 158 }
 159 
 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 161                                             HeapWord* end_addr) {
 162   HeapWord* start = getNextMarkedWordAddress(addr);
 163   start = MIN2(start, end_addr);
 164   HeapWord* end   = getNextUnmarkedWordAddress(start);
 165   end = MIN2(end, end_addr);
 166   assert(start <= end, "Consistency check");
 167   MemRegion mr(start, end);
 168   if (!mr.is_empty()) {
 169     clearRange(mr);
 170   }
 171   return mr;
 172 }
 173 
 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 175   _base(NULL), _cm(cm)
 176 #ifdef ASSERT
 177   , _drain_in_progress(false)
 178   , _drain_in_progress_yields(false)
 179 #endif
 180 {}
 181 
 182 bool CMMarkStack::allocate(size_t capacity) {
 183   // allocate a stack of the requisite depth
 184   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 185   if (!rs.is_reserved()) {
 186     warning("ConcurrentMark MarkStack allocation failure");
 187     return false;
 188   }
 189   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 190   if (!_virtual_space.initialize(rs, rs.size())) {
 191     warning("ConcurrentMark MarkStack backing store failure");
 192     // Release the virtual memory reserved for the marking stack
 193     rs.release();
 194     return false;
 195   }
 196   assert(_virtual_space.committed_size() == rs.size(),
 197          "Didn't reserve backing store for all of ConcurrentMark stack?");
 198   _base = (oop*) _virtual_space.low();
 199   setEmpty();
 200   _capacity = (jint) capacity;
 201   _saved_index = -1;
 202   _should_expand = false;
 203   NOT_PRODUCT(_max_depth = 0);
 204   return true;
 205 }
 206 
 207 void CMMarkStack::expand() {
 208   // Called, during remark, if we've overflown the marking stack during marking.
 209   assert(isEmpty(), "stack should been emptied while handling overflow");
 210   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 211   // Clear expansion flag
 212   _should_expand = false;
 213   if (_capacity == (jint) MarkStackSizeMax) {
 214     if (PrintGCDetails && Verbose) {
 215       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 216     }
 217     return;
 218   }
 219   // Double capacity if possible
 220   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 221   // Do not give up existing stack until we have managed to
 222   // get the double capacity that we desired.
 223   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 224                                                            sizeof(oop)));
 225   if (rs.is_reserved()) {
 226     // Release the backing store associated with old stack
 227     _virtual_space.release();
 228     // Reinitialize virtual space for new stack
 229     if (!_virtual_space.initialize(rs, rs.size())) {
 230       fatal("Not enough swap for expanded marking stack capacity");
 231     }
 232     _base = (oop*)(_virtual_space.low());
 233     _index = 0;
 234     _capacity = new_capacity;
 235   } else {
 236     if (PrintGCDetails && Verbose) {
 237       // Failed to double capacity, continue;
 238       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 239                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 240                           _capacity / K, new_capacity / K);
 241     }
 242   }
 243 }
 244 
 245 void CMMarkStack::set_should_expand() {
 246   // If we're resetting the marking state because of an
 247   // marking stack overflow, record that we should, if
 248   // possible, expand the stack.
 249   _should_expand = _cm->has_overflown();
 250 }
 251 
 252 CMMarkStack::~CMMarkStack() {
 253   if (_base != NULL) {
 254     _base = NULL;
 255     _virtual_space.release();
 256   }
 257 }
 258 
 259 void CMMarkStack::par_push(oop ptr) {
 260   while (true) {
 261     if (isFull()) {
 262       _overflow = true;
 263       return;
 264     }
 265     // Otherwise...
 266     jint index = _index;
 267     jint next_index = index+1;
 268     jint res = Atomic::cmpxchg(next_index, &_index, index);
 269     if (res == index) {
 270       _base[index] = ptr;
 271       // Note that we don't maintain this atomically.  We could, but it
 272       // doesn't seem necessary.
 273       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 274       return;
 275     }
 276     // Otherwise, we need to try again.
 277   }
 278 }
 279 
 280 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 281   while (true) {
 282     if (isFull()) {
 283       _overflow = true;
 284       return;
 285     }
 286     // Otherwise...
 287     jint index = _index;
 288     jint next_index = index + n;
 289     if (next_index > _capacity) {
 290       _overflow = true;
 291       return;
 292     }
 293     jint res = Atomic::cmpxchg(next_index, &_index, index);
 294     if (res == index) {
 295       for (int i = 0; i < n; i++) {
 296         int  ind = index + i;
 297         assert(ind < _capacity, "By overflow test above.");
 298         _base[ind] = ptr_arr[i];
 299       }
 300       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 301       return;
 302     }
 303     // Otherwise, we need to try again.
 304   }
 305 }
 306 
 307 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 308   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 309   jint start = _index;
 310   jint next_index = start + n;
 311   if (next_index > _capacity) {
 312     _overflow = true;
 313     return;
 314   }
 315   // Otherwise.
 316   _index = next_index;
 317   for (int i = 0; i < n; i++) {
 318     int ind = start + i;
 319     assert(ind < _capacity, "By overflow test above.");
 320     _base[ind] = ptr_arr[i];
 321   }
 322   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 323 }
 324 
 325 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 326   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 327   jint index = _index;
 328   if (index == 0) {
 329     *n = 0;
 330     return false;
 331   } else {
 332     int k = MIN2(max, index);
 333     jint  new_ind = index - k;
 334     for (int j = 0; j < k; j++) {
 335       ptr_arr[j] = _base[new_ind + j];
 336     }
 337     _index = new_ind;
 338     *n = k;
 339     return true;
 340   }
 341 }
 342 
 343 template<class OopClosureClass>
 344 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 345   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 346          || SafepointSynchronize::is_at_safepoint(),
 347          "Drain recursion must be yield-safe.");
 348   bool res = true;
 349   debug_only(_drain_in_progress = true);
 350   debug_only(_drain_in_progress_yields = yield_after);
 351   while (!isEmpty()) {
 352     oop newOop = pop();
 353     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 354     assert(newOop->is_oop(), "Expected an oop");
 355     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 356            "only grey objects on this stack");
 357     newOop->oop_iterate(cl);
 358     if (yield_after && _cm->do_yield_check()) {
 359       res = false;
 360       break;
 361     }
 362   }
 363   debug_only(_drain_in_progress = false);
 364   return res;
 365 }
 366 
 367 void CMMarkStack::note_start_of_gc() {
 368   assert(_saved_index == -1,
 369          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 370   _saved_index = _index;
 371 }
 372 
 373 void CMMarkStack::note_end_of_gc() {
 374   // This is intentionally a guarantee, instead of an assert. If we
 375   // accidentally add something to the mark stack during GC, it
 376   // will be a correctness issue so it's better if we crash. we'll
 377   // only check this once per GC anyway, so it won't be a performance
 378   // issue in any way.
 379   guarantee(_saved_index == _index,
 380             err_msg("saved index: %d index: %d", _saved_index, _index));
 381   _saved_index = -1;
 382 }
 383 
 384 void CMMarkStack::oops_do(OopClosure* f) {
 385   assert(_saved_index == _index,
 386          err_msg("saved index: %d index: %d", _saved_index, _index));
 387   for (int i = 0; i < _index; i += 1) {
 388     f->do_oop(&_base[i]);
 389   }
 390 }
 391 
 392 bool ConcurrentMark::not_yet_marked(oop obj) const {
 393   return _g1h->is_obj_ill(obj);
 394 }
 395 
 396 CMRootRegions::CMRootRegions() :
 397   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 398   _should_abort(false),  _next_survivor(NULL) { }
 399 
 400 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 401   _young_list = g1h->young_list();
 402   _cm = cm;
 403 }
 404 
 405 void CMRootRegions::prepare_for_scan() {
 406   assert(!scan_in_progress(), "pre-condition");
 407 
 408   // Currently, only survivors can be root regions.
 409   assert(_next_survivor == NULL, "pre-condition");
 410   _next_survivor = _young_list->first_survivor_region();
 411   _scan_in_progress = (_next_survivor != NULL);
 412   _should_abort = false;
 413 }
 414 
 415 HeapRegion* CMRootRegions::claim_next() {
 416   if (_should_abort) {
 417     // If someone has set the should_abort flag, we return NULL to
 418     // force the caller to bail out of their loop.
 419     return NULL;
 420   }
 421 
 422   // Currently, only survivors can be root regions.
 423   HeapRegion* res = _next_survivor;
 424   if (res != NULL) {
 425     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 426     // Read it again in case it changed while we were waiting for the lock.
 427     res = _next_survivor;
 428     if (res != NULL) {
 429       if (res == _young_list->last_survivor_region()) {
 430         // We just claimed the last survivor so store NULL to indicate
 431         // that we're done.
 432         _next_survivor = NULL;
 433       } else {
 434         _next_survivor = res->get_next_young_region();
 435       }
 436     } else {
 437       // Someone else claimed the last survivor while we were trying
 438       // to take the lock so nothing else to do.
 439     }
 440   }
 441   assert(res == NULL || res->is_survivor(), "post-condition");
 442 
 443   return res;
 444 }
 445 
 446 void CMRootRegions::scan_finished() {
 447   assert(scan_in_progress(), "pre-condition");
 448 
 449   // Currently, only survivors can be root regions.
 450   if (!_should_abort) {
 451     assert(_next_survivor == NULL, "we should have claimed all survivors");
 452   }
 453   _next_survivor = NULL;
 454 
 455   {
 456     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 457     _scan_in_progress = false;
 458     RootRegionScan_lock->notify_all();
 459   }
 460 }
 461 
 462 bool CMRootRegions::wait_until_scan_finished() {
 463   if (!scan_in_progress()) return false;
 464 
 465   {
 466     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 467     while (scan_in_progress()) {
 468       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 469     }
 470   }
 471   return true;
 472 }
 473 
 474 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 475 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 476 #endif // _MSC_VER
 477 
 478 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 479   return MAX2((n_par_threads + 2) / 4, 1U);
 480 }
 481 
 482 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 483   _g1h(g1h),
 484   _markBitMap1(log2_intptr(MinObjAlignment)),
 485   _markBitMap2(log2_intptr(MinObjAlignment)),
 486   _parallel_marking_threads(0),
 487   _max_parallel_marking_threads(0),
 488   _sleep_factor(0.0),
 489   _marking_task_overhead(1.0),
 490   _cleanup_sleep_factor(0.0),
 491   _cleanup_task_overhead(1.0),
 492   _cleanup_list("Cleanup List"),
 493   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 494   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 495             CardTableModRefBS::card_shift,
 496             false /* in_resource_area*/),
 497 
 498   _prevMarkBitMap(&_markBitMap1),
 499   _nextMarkBitMap(&_markBitMap2),
 500 
 501   _markStack(this),
 502   // _finger set in set_non_marking_state
 503 
 504   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 505   // _active_tasks set in set_non_marking_state
 506   // _tasks set inside the constructor
 507   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 508   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 509 
 510   _has_overflown(false),
 511   _concurrent(false),
 512   _has_aborted(false),
 513   _restart_for_overflow(false),
 514   _concurrent_marking_in_progress(false),
 515 
 516   // _verbose_level set below
 517 
 518   _init_times(),
 519   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 520   _cleanup_times(),
 521   _total_counting_time(0.0),
 522   _total_rs_scrub_time(0.0),
 523 
 524   _parallel_workers(NULL),
 525 
 526   _count_card_bitmaps(NULL),
 527   _count_marked_bytes(NULL),
 528   _completed_initialization(false) {
 529   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 530   if (verbose_level < no_verbose) {
 531     verbose_level = no_verbose;
 532   }
 533   if (verbose_level > high_verbose) {
 534     verbose_level = high_verbose;
 535   }
 536   _verbose_level = verbose_level;
 537 
 538   if (verbose_low()) {
 539     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 540                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 541   }
 542 
 543   if (!_markBitMap1.allocate(heap_rs)) {
 544     warning("Failed to allocate first CM bit map");
 545     return;
 546   }
 547   if (!_markBitMap2.allocate(heap_rs)) {
 548     warning("Failed to allocate second CM bit map");
 549     return;
 550   }
 551 
 552   // Create & start a ConcurrentMark thread.
 553   _cmThread = new ConcurrentMarkThread(this);
 554   assert(cmThread() != NULL, "CM Thread should have been created");
 555   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 556   if (_cmThread->osthread() == NULL) {
 557       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 558   }
 559 
 560   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 561   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 562   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 563 
 564   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 565   satb_qs.set_buffer_size(G1SATBBufferSize);
 566 
 567   _root_regions.init(_g1h, this);
 568 
 569   if (ConcGCThreads > ParallelGCThreads) {
 570     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 571             "than ParallelGCThreads (" UINTX_FORMAT ").",
 572             ConcGCThreads, ParallelGCThreads);
 573     return;
 574   }
 575   if (ParallelGCThreads == 0) {
 576     // if we are not running with any parallel GC threads we will not
 577     // spawn any marking threads either
 578     _parallel_marking_threads =       0;
 579     _max_parallel_marking_threads =   0;
 580     _sleep_factor             =     0.0;
 581     _marking_task_overhead    =     1.0;
 582   } else {
 583     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 584       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 585       // if both are set
 586       _sleep_factor             = 0.0;
 587       _marking_task_overhead    = 1.0;
 588     } else if (G1MarkingOverheadPercent > 0) {
 589       // We will calculate the number of parallel marking threads based
 590       // on a target overhead with respect to the soft real-time goal
 591       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 592       double overall_cm_overhead =
 593         (double) MaxGCPauseMillis * marking_overhead /
 594         (double) GCPauseIntervalMillis;
 595       double cpu_ratio = 1.0 / (double) os::processor_count();
 596       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 597       double marking_task_overhead =
 598         overall_cm_overhead / marking_thread_num *
 599                                                 (double) os::processor_count();
 600       double sleep_factor =
 601                          (1.0 - marking_task_overhead) / marking_task_overhead;
 602 
 603       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 604       _sleep_factor             = sleep_factor;
 605       _marking_task_overhead    = marking_task_overhead;
 606     } else {
 607       // Calculate the number of parallel marking threads by scaling
 608       // the number of parallel GC threads.
 609       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 610       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 611       _sleep_factor             = 0.0;
 612       _marking_task_overhead    = 1.0;
 613     }
 614 
 615     assert(ConcGCThreads > 0, "Should have been set");
 616     _parallel_marking_threads = (uint) ConcGCThreads;
 617     _max_parallel_marking_threads = _parallel_marking_threads;
 618 
 619     if (parallel_marking_threads() > 1) {
 620       _cleanup_task_overhead = 1.0;
 621     } else {
 622       _cleanup_task_overhead = marking_task_overhead();
 623     }
 624     _cleanup_sleep_factor =
 625                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 626 
 627 #if 0
 628     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 629     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 630     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 631     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 632     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 633 #endif
 634 
 635     guarantee(parallel_marking_threads() > 0, "peace of mind");
 636     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 637          _max_parallel_marking_threads, false, true);
 638     if (_parallel_workers == NULL) {
 639       vm_exit_during_initialization("Failed necessary allocation.");
 640     } else {
 641       _parallel_workers->initialize_workers();
 642     }
 643   }
 644 
 645   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 646     uintx mark_stack_size =
 647       MIN2(MarkStackSizeMax,
 648           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 649     // Verify that the calculated value for MarkStackSize is in range.
 650     // It would be nice to use the private utility routine from Arguments.
 651     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 652       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 653               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 654               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 655       return;
 656     }
 657     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 658   } else {
 659     // Verify MarkStackSize is in range.
 660     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 661       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 662         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 663           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 664                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 665                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 666           return;
 667         }
 668       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 669         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 670           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 671                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 672                   MarkStackSize, MarkStackSizeMax);
 673           return;
 674         }
 675       }
 676     }
 677   }
 678 
 679   if (!_markStack.allocate(MarkStackSize)) {
 680     warning("Failed to allocate CM marking stack");
 681     return;
 682   }
 683 
 684   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 685   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 686 
 687   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 688   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 689 
 690   BitMap::idx_t card_bm_size = _card_bm.size();
 691 
 692   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 693   _active_tasks = _max_worker_id;
 694 
 695   size_t max_regions = (size_t) _g1h->max_regions();
 696   for (uint i = 0; i < _max_worker_id; ++i) {
 697     CMTaskQueue* task_queue = new CMTaskQueue();
 698     task_queue->initialize();
 699     _task_queues->register_queue(i, task_queue);
 700 
 701     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 702     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 703 
 704     _tasks[i] = new CMTask(i, this,
 705                            _count_marked_bytes[i],
 706                            &_count_card_bitmaps[i],
 707                            task_queue, _task_queues);
 708 
 709     _accum_task_vtime[i] = 0.0;
 710   }
 711 
 712   // Calculate the card number for the bottom of the heap. Used
 713   // in biasing indexes into the accounting card bitmaps.
 714   _heap_bottom_card_num =
 715     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 716                                 CardTableModRefBS::card_shift);
 717 
 718   // Clear all the liveness counting data
 719   clear_all_count_data();
 720 
 721   // so that the call below can read a sensible value
 722   _heap_start = (HeapWord*) heap_rs.base();
 723   set_non_marking_state();
 724   _completed_initialization = true;
 725 }
 726 
 727 void ConcurrentMark::update_g1_committed(bool force) {
 728   // If concurrent marking is not in progress, then we do not need to
 729   // update _heap_end.
 730   if (!concurrent_marking_in_progress() && !force) return;
 731 
 732   MemRegion committed = _g1h->g1_committed();
 733   assert(committed.start() == _heap_start, "start shouldn't change");
 734   HeapWord* new_end = committed.end();
 735   if (new_end > _heap_end) {
 736     // The heap has been expanded.
 737 
 738     _heap_end = new_end;
 739   }
 740   // Notice that the heap can also shrink. However, this only happens
 741   // during a Full GC (at least currently) and the entire marking
 742   // phase will bail out and the task will not be restarted. So, let's
 743   // do nothing.
 744 }
 745 
 746 void ConcurrentMark::reset() {
 747   // Starting values for these two. This should be called in a STW
 748   // phase. CM will be notified of any future g1_committed expansions
 749   // will be at the end of evacuation pauses, when tasks are
 750   // inactive.
 751   MemRegion committed = _g1h->g1_committed();
 752   _heap_start = committed.start();
 753   _heap_end   = committed.end();
 754 
 755   // Separated the asserts so that we know which one fires.
 756   assert(_heap_start != NULL, "heap bounds should look ok");
 757   assert(_heap_end != NULL, "heap bounds should look ok");
 758   assert(_heap_start < _heap_end, "heap bounds should look ok");
 759 
 760   // Reset all the marking data structures and any necessary flags
 761   reset_marking_state();
 762 
 763   if (verbose_low()) {
 764     gclog_or_tty->print_cr("[global] resetting");
 765   }
 766 
 767   // We do reset all of them, since different phases will use
 768   // different number of active threads. So, it's easiest to have all
 769   // of them ready.
 770   for (uint i = 0; i < _max_worker_id; ++i) {
 771     _tasks[i]->reset(_nextMarkBitMap);
 772   }
 773 
 774   // we need this to make sure that the flag is on during the evac
 775   // pause with initial mark piggy-backed
 776   set_concurrent_marking_in_progress();
 777 }
 778 
 779 
 780 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 781   _markStack.set_should_expand();
 782   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 783   if (clear_overflow) {
 784     clear_has_overflown();
 785   } else {
 786     assert(has_overflown(), "pre-condition");
 787   }
 788   _finger = _heap_start;
 789 
 790   for (uint i = 0; i < _max_worker_id; ++i) {
 791     CMTaskQueue* queue = _task_queues->queue(i);
 792     queue->set_empty();
 793   }
 794 }
 795 
 796 void ConcurrentMark::set_concurrency(uint active_tasks) {
 797   assert(active_tasks <= _max_worker_id, "we should not have more");
 798 
 799   _active_tasks = active_tasks;
 800   // Need to update the three data structures below according to the
 801   // number of active threads for this phase.
 802   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 803   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 804   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 805 }
 806 
 807 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 808   set_concurrency(active_tasks);
 809 
 810   _concurrent = concurrent;
 811   // We propagate this to all tasks, not just the active ones.
 812   for (uint i = 0; i < _max_worker_id; ++i)
 813     _tasks[i]->set_concurrent(concurrent);
 814 
 815   if (concurrent) {
 816     set_concurrent_marking_in_progress();
 817   } else {
 818     // We currently assume that the concurrent flag has been set to
 819     // false before we start remark. At this point we should also be
 820     // in a STW phase.
 821     assert(!concurrent_marking_in_progress(), "invariant");
 822     assert(out_of_regions(),
 823            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 824                    p2i(_finger), p2i(_heap_end)));
 825     update_g1_committed(true);
 826   }
 827 }
 828 
 829 void ConcurrentMark::set_non_marking_state() {
 830   // We set the global marking state to some default values when we're
 831   // not doing marking.
 832   reset_marking_state();
 833   _active_tasks = 0;
 834   clear_concurrent_marking_in_progress();
 835 }
 836 
 837 ConcurrentMark::~ConcurrentMark() {
 838   // The ConcurrentMark instance is never freed.
 839   ShouldNotReachHere();
 840 }
 841 
 842 void ConcurrentMark::clearNextBitmap() {
 843   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 844   G1CollectorPolicy* g1p = g1h->g1_policy();
 845 
 846   // Make sure that the concurrent mark thread looks to still be in
 847   // the current cycle.
 848   guarantee(cmThread()->during_cycle(), "invariant");
 849 
 850   // We are finishing up the current cycle by clearing the next
 851   // marking bitmap and getting it ready for the next cycle. During
 852   // this time no other cycle can start. So, let's make sure that this
 853   // is the case.
 854   guarantee(!g1h->mark_in_progress(), "invariant");
 855 
 856   // clear the mark bitmap (no grey objects to start with).
 857   // We need to do this in chunks and offer to yield in between
 858   // each chunk.
 859   HeapWord* start  = _nextMarkBitMap->startWord();
 860   HeapWord* end    = _nextMarkBitMap->endWord();
 861   HeapWord* cur    = start;
 862   size_t chunkSize = M;
 863   while (cur < end) {
 864     HeapWord* next = cur + chunkSize;
 865     if (next > end) {
 866       next = end;
 867     }
 868     MemRegion mr(cur,next);
 869     _nextMarkBitMap->clearRange(mr);
 870     cur = next;
 871     do_yield_check();
 872 
 873     // Repeat the asserts from above. We'll do them as asserts here to
 874     // minimize their overhead on the product. However, we'll have
 875     // them as guarantees at the beginning / end of the bitmap
 876     // clearing to get some checking in the product.
 877     assert(cmThread()->during_cycle(), "invariant");
 878     assert(!g1h->mark_in_progress(), "invariant");
 879   }
 880 
 881   // Clear the liveness counting data
 882   clear_all_count_data();
 883 
 884   // Repeat the asserts from above.
 885   guarantee(cmThread()->during_cycle(), "invariant");
 886   guarantee(!g1h->mark_in_progress(), "invariant");
 887 }
 888 
 889 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 890 public:
 891   bool doHeapRegion(HeapRegion* r) {
 892     if (!r->continuesHumongous()) {
 893       r->note_start_of_marking();
 894     }
 895     return false;
 896   }
 897 };
 898 
 899 void ConcurrentMark::checkpointRootsInitialPre() {
 900   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 901   G1CollectorPolicy* g1p = g1h->g1_policy();
 902 
 903   _has_aborted = false;
 904 
 905 #ifndef PRODUCT
 906   if (G1PrintReachableAtInitialMark) {
 907     print_reachable("at-cycle-start",
 908                     VerifyOption_G1UsePrevMarking, true /* all */);
 909   }
 910 #endif
 911 
 912   // Initialize marking structures. This has to be done in a STW phase.
 913   reset();
 914 
 915   // For each region note start of marking.
 916   NoteStartOfMarkHRClosure startcl;
 917   g1h->heap_region_iterate(&startcl);
 918 }
 919 
 920 
 921 void ConcurrentMark::checkpointRootsInitialPost() {
 922   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 923 
 924   // If we force an overflow during remark, the remark operation will
 925   // actually abort and we'll restart concurrent marking. If we always
 926   // force an overflow during remark we'll never actually complete the
 927   // marking phase. So, we initialize this here, at the start of the
 928   // cycle, so that at the remaining overflow number will decrease at
 929   // every remark and we'll eventually not need to cause one.
 930   force_overflow_stw()->init();
 931 
 932   // Start Concurrent Marking weak-reference discovery.
 933   ReferenceProcessor* rp = g1h->ref_processor_cm();
 934   // enable ("weak") refs discovery
 935   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 936   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 937 
 938   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 939   // This is the start of  the marking cycle, we're expected all
 940   // threads to have SATB queues with active set to false.
 941   satb_mq_set.set_active_all_threads(true, /* new active value */
 942                                      false /* expected_active */);
 943 
 944   _root_regions.prepare_for_scan();
 945 
 946   // update_g1_committed() will be called at the end of an evac pause
 947   // when marking is on. So, it's also called at the end of the
 948   // initial-mark pause to update the heap end, if the heap expands
 949   // during it. No need to call it here.
 950 }
 951 
 952 /*
 953  * Notice that in the next two methods, we actually leave the STS
 954  * during the barrier sync and join it immediately afterwards. If we
 955  * do not do this, the following deadlock can occur: one thread could
 956  * be in the barrier sync code, waiting for the other thread to also
 957  * sync up, whereas another one could be trying to yield, while also
 958  * waiting for the other threads to sync up too.
 959  *
 960  * Note, however, that this code is also used during remark and in
 961  * this case we should not attempt to leave / enter the STS, otherwise
 962  * we'll either hit an assert (debug / fastdebug) or deadlock
 963  * (product). So we should only leave / enter the STS if we are
 964  * operating concurrently.
 965  *
 966  * Because the thread that does the sync barrier has left the STS, it
 967  * is possible to be suspended for a Full GC or an evacuation pause
 968  * could occur. This is actually safe, since the entering the sync
 969  * barrier is one of the last things do_marking_step() does, and it
 970  * doesn't manipulate any data structures afterwards.
 971  */
 972 
 973 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 974   if (verbose_low()) {
 975     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 976   }
 977 
 978   if (concurrent()) {
 979     SuspendibleThreadSet::leave();
 980   }
 981 
 982   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 983 
 984   if (concurrent()) {
 985     SuspendibleThreadSet::join();
 986   }
 987   // at this point everyone should have synced up and not be doing any
 988   // more work
 989 
 990   if (verbose_low()) {
 991     if (barrier_aborted) {
 992       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 993     } else {
 994       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 995     }
 996   }
 997 
 998   if (barrier_aborted) {
 999     // If the barrier aborted we ignore the overflow condition and
1000     // just abort the whole marking phase as quickly as possible.
1001     return;
1002   }
1003 
1004   // If we're executing the concurrent phase of marking, reset the marking
1005   // state; otherwise the marking state is reset after reference processing,
1006   // during the remark pause.
1007   // If we reset here as a result of an overflow during the remark we will
1008   // see assertion failures from any subsequent set_concurrency_and_phase()
1009   // calls.
1010   if (concurrent()) {
1011     // let the task associated with with worker 0 do this
1012     if (worker_id == 0) {
1013       // task 0 is responsible for clearing the global data structures
1014       // We should be here because of an overflow. During STW we should
1015       // not clear the overflow flag since we rely on it being true when
1016       // we exit this method to abort the pause and restart concurrent
1017       // marking.
1018       reset_marking_state(true /* clear_overflow */);
1019       force_overflow()->update();
1020 
1021       if (G1Log::fine()) {
1022         gclog_or_tty->date_stamp(PrintGCDateStamps);
1023         gclog_or_tty->stamp(PrintGCTimeStamps);
1024         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1025       }
1026     }
1027   }
1028 
1029   // after this, each task should reset its own data structures then
1030   // then go into the second barrier
1031 }
1032 
1033 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1034   if (verbose_low()) {
1035     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1036   }
1037 
1038   if (concurrent()) {
1039     SuspendibleThreadSet::leave();
1040   }
1041 
1042   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1043 
1044   if (concurrent()) {
1045     SuspendibleThreadSet::join();
1046   }
1047   // at this point everything should be re-initialized and ready to go
1048 
1049   if (verbose_low()) {
1050     if (barrier_aborted) {
1051       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1052     } else {
1053       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1054     }
1055   }
1056 }
1057 
1058 #ifndef PRODUCT
1059 void ForceOverflowSettings::init() {
1060   _num_remaining = G1ConcMarkForceOverflow;
1061   _force = false;
1062   update();
1063 }
1064 
1065 void ForceOverflowSettings::update() {
1066   if (_num_remaining > 0) {
1067     _num_remaining -= 1;
1068     _force = true;
1069   } else {
1070     _force = false;
1071   }
1072 }
1073 
1074 bool ForceOverflowSettings::should_force() {
1075   if (_force) {
1076     _force = false;
1077     return true;
1078   } else {
1079     return false;
1080   }
1081 }
1082 #endif // !PRODUCT
1083 
1084 class CMConcurrentMarkingTask: public AbstractGangTask {
1085 private:
1086   ConcurrentMark*       _cm;
1087   ConcurrentMarkThread* _cmt;
1088 
1089 public:
1090   void work(uint worker_id) {
1091     assert(Thread::current()->is_ConcurrentGC_thread(),
1092            "this should only be done by a conc GC thread");
1093     ResourceMark rm;
1094 
1095     double start_vtime = os::elapsedVTime();
1096 
1097     SuspendibleThreadSet::join();
1098 
1099     assert(worker_id < _cm->active_tasks(), "invariant");
1100     CMTask* the_task = _cm->task(worker_id);
1101     the_task->record_start_time();
1102     if (!_cm->has_aborted()) {
1103       do {
1104         double start_vtime_sec = os::elapsedVTime();
1105         double start_time_sec = os::elapsedTime();
1106         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1107 
1108         the_task->do_marking_step(mark_step_duration_ms,
1109                                   true  /* do_termination */,
1110                                   false /* is_serial*/);
1111 
1112         double end_time_sec = os::elapsedTime();
1113         double end_vtime_sec = os::elapsedVTime();
1114         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1115         double elapsed_time_sec = end_time_sec - start_time_sec;
1116         _cm->clear_has_overflown();
1117 
1118         bool ret = _cm->do_yield_check(worker_id);
1119 
1120         jlong sleep_time_ms;
1121         if (!_cm->has_aborted() && the_task->has_aborted()) {
1122           sleep_time_ms =
1123             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1124           SuspendibleThreadSet::leave();
1125           os::sleep(Thread::current(), sleep_time_ms, false);
1126           SuspendibleThreadSet::join();
1127         }
1128         double end_time2_sec = os::elapsedTime();
1129         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1130 
1131 #if 0
1132           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1133                                  "overhead %1.4lf",
1134                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1135                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1136           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1137                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1138 #endif
1139       } while (!_cm->has_aborted() && the_task->has_aborted());
1140     }
1141     the_task->record_end_time();
1142     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1143 
1144     SuspendibleThreadSet::leave();
1145 
1146     double end_vtime = os::elapsedVTime();
1147     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1148   }
1149 
1150   CMConcurrentMarkingTask(ConcurrentMark* cm,
1151                           ConcurrentMarkThread* cmt) :
1152       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1153 
1154   ~CMConcurrentMarkingTask() { }
1155 };
1156 
1157 // Calculates the number of active workers for a concurrent
1158 // phase.
1159 uint ConcurrentMark::calc_parallel_marking_threads() {
1160   if (G1CollectedHeap::use_parallel_gc_threads()) {
1161     uint n_conc_workers = 0;
1162     if (!UseDynamicNumberOfGCThreads ||
1163         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1164          !ForceDynamicNumberOfGCThreads)) {
1165       n_conc_workers = max_parallel_marking_threads();
1166     } else {
1167       n_conc_workers =
1168         AdaptiveSizePolicy::calc_default_active_workers(
1169                                      max_parallel_marking_threads(),
1170                                      1, /* Minimum workers */
1171                                      parallel_marking_threads(),
1172                                      Threads::number_of_non_daemon_threads());
1173       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1174       // that scaling has already gone into "_max_parallel_marking_threads".
1175     }
1176     assert(n_conc_workers > 0, "Always need at least 1");
1177     return n_conc_workers;
1178   }
1179   // If we are not running with any parallel GC threads we will not
1180   // have spawned any marking threads either. Hence the number of
1181   // concurrent workers should be 0.
1182   return 0;
1183 }
1184 
1185 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1186   // Currently, only survivors can be root regions.
1187   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1188   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1189 
1190   const uintx interval = PrefetchScanIntervalInBytes;
1191   HeapWord* curr = hr->bottom();
1192   const HeapWord* end = hr->top();
1193   while (curr < end) {
1194     Prefetch::read(curr, interval);
1195     oop obj = oop(curr);
1196     int size = obj->oop_iterate(&cl);
1197     assert(size == obj->size(), "sanity");
1198     curr += size;
1199   }
1200 }
1201 
1202 class CMRootRegionScanTask : public AbstractGangTask {
1203 private:
1204   ConcurrentMark* _cm;
1205 
1206 public:
1207   CMRootRegionScanTask(ConcurrentMark* cm) :
1208     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1209 
1210   void work(uint worker_id) {
1211     assert(Thread::current()->is_ConcurrentGC_thread(),
1212            "this should only be done by a conc GC thread");
1213 
1214     CMRootRegions* root_regions = _cm->root_regions();
1215     HeapRegion* hr = root_regions->claim_next();
1216     while (hr != NULL) {
1217       _cm->scanRootRegion(hr, worker_id);
1218       hr = root_regions->claim_next();
1219     }
1220   }
1221 };
1222 
1223 void ConcurrentMark::scanRootRegions() {
1224   // scan_in_progress() will have been set to true only if there was
1225   // at least one root region to scan. So, if it's false, we
1226   // should not attempt to do any further work.
1227   if (root_regions()->scan_in_progress()) {
1228     _parallel_marking_threads = calc_parallel_marking_threads();
1229     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1230            "Maximum number of marking threads exceeded");
1231     uint active_workers = MAX2(1U, parallel_marking_threads());
1232 
1233     CMRootRegionScanTask task(this);
1234     if (use_parallel_marking_threads()) {
1235       _parallel_workers->set_active_workers((int) active_workers);
1236       _parallel_workers->run_task(&task);
1237     } else {
1238       task.work(0);
1239     }
1240 
1241     // It's possible that has_aborted() is true here without actually
1242     // aborting the survivor scan earlier. This is OK as it's
1243     // mainly used for sanity checking.
1244     root_regions()->scan_finished();
1245   }
1246 }
1247 
1248 void ConcurrentMark::markFromRoots() {
1249   // we might be tempted to assert that:
1250   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1251   //        "inconsistent argument?");
1252   // However that wouldn't be right, because it's possible that
1253   // a safepoint is indeed in progress as a younger generation
1254   // stop-the-world GC happens even as we mark in this generation.
1255 
1256   _restart_for_overflow = false;
1257   force_overflow_conc()->init();
1258 
1259   // _g1h has _n_par_threads
1260   _parallel_marking_threads = calc_parallel_marking_threads();
1261   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1262     "Maximum number of marking threads exceeded");
1263 
1264   uint active_workers = MAX2(1U, parallel_marking_threads());
1265 
1266   // Parallel task terminator is set in "set_concurrency_and_phase()"
1267   set_concurrency_and_phase(active_workers, true /* concurrent */);
1268 
1269   CMConcurrentMarkingTask markingTask(this, cmThread());
1270   if (use_parallel_marking_threads()) {
1271     _parallel_workers->set_active_workers((int)active_workers);
1272     // Don't set _n_par_threads because it affects MT in process_strong_roots()
1273     // and the decisions on that MT processing is made elsewhere.
1274     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1275     _parallel_workers->run_task(&markingTask);
1276   } else {
1277     markingTask.work(0);
1278   }
1279   print_stats();
1280 }
1281 
1282 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1283   // world is stopped at this checkpoint
1284   assert(SafepointSynchronize::is_at_safepoint(),
1285          "world should be stopped");
1286 
1287   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1288 
1289   // If a full collection has happened, we shouldn't do this.
1290   if (has_aborted()) {
1291     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1292     return;
1293   }
1294 
1295   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1296 
1297   if (VerifyDuringGC) {
1298     HandleMark hm;  // handle scope
1299     Universe::heap()->prepare_for_verify();
1300     Universe::verify(VerifyOption_G1UsePrevMarking,
1301                      " VerifyDuringGC:(before)");
1302   }
1303   g1h->check_bitmaps("Remark Start");
1304 
1305   G1CollectorPolicy* g1p = g1h->g1_policy();
1306   g1p->record_concurrent_mark_remark_start();
1307 
1308   double start = os::elapsedTime();
1309 
1310   checkpointRootsFinalWork();
1311 
1312   double mark_work_end = os::elapsedTime();
1313 
1314   weakRefsWork(clear_all_soft_refs);
1315 
1316   if (has_overflown()) {
1317     // Oops.  We overflowed.  Restart concurrent marking.
1318     _restart_for_overflow = true;
1319     if (G1TraceMarkStackOverflow) {
1320       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1321     }
1322 
1323     // Verify the heap w.r.t. the previous marking bitmap.
1324     if (VerifyDuringGC) {
1325       HandleMark hm;  // handle scope
1326       Universe::heap()->prepare_for_verify();
1327       Universe::verify(VerifyOption_G1UsePrevMarking,
1328                        " VerifyDuringGC:(overflow)");
1329     }
1330 
1331     // Clear the marking state because we will be restarting
1332     // marking due to overflowing the global mark stack.
1333     reset_marking_state();
1334   } else {
1335     // Aggregate the per-task counting data that we have accumulated
1336     // while marking.
1337     aggregate_count_data();
1338 
1339     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1340     // We're done with marking.
1341     // This is the end of  the marking cycle, we're expected all
1342     // threads to have SATB queues with active set to true.
1343     satb_mq_set.set_active_all_threads(false, /* new active value */
1344                                        true /* expected_active */);
1345 
1346     if (VerifyDuringGC) {
1347       HandleMark hm;  // handle scope
1348       Universe::heap()->prepare_for_verify();
1349       Universe::verify(VerifyOption_G1UseNextMarking,
1350                        " VerifyDuringGC:(after)");
1351     }
1352     g1h->check_bitmaps("Remark End");
1353     assert(!restart_for_overflow(), "sanity");
1354     // Completely reset the marking state since marking completed
1355     set_non_marking_state();
1356   }
1357 
1358   // Expand the marking stack, if we have to and if we can.
1359   if (_markStack.should_expand()) {
1360     _markStack.expand();
1361   }
1362 
1363   // Statistics
1364   double now = os::elapsedTime();
1365   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1366   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1367   _remark_times.add((now - start) * 1000.0);
1368 
1369   g1p->record_concurrent_mark_remark_end();
1370 
1371   G1CMIsAliveClosure is_alive(g1h);
1372   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1373 }
1374 
1375 // Base class of the closures that finalize and verify the
1376 // liveness counting data.
1377 class CMCountDataClosureBase: public HeapRegionClosure {
1378 protected:
1379   G1CollectedHeap* _g1h;
1380   ConcurrentMark* _cm;
1381   CardTableModRefBS* _ct_bs;
1382 
1383   BitMap* _region_bm;
1384   BitMap* _card_bm;
1385 
1386   // Takes a region that's not empty (i.e., it has at least one
1387   // live object in it and sets its corresponding bit on the region
1388   // bitmap to 1. If the region is "starts humongous" it will also set
1389   // to 1 the bits on the region bitmap that correspond to its
1390   // associated "continues humongous" regions.
1391   void set_bit_for_region(HeapRegion* hr) {
1392     assert(!hr->continuesHumongous(), "should have filtered those out");
1393 
1394     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1395     if (!hr->startsHumongous()) {
1396       // Normal (non-humongous) case: just set the bit.
1397       _region_bm->par_at_put(index, true);
1398     } else {
1399       // Starts humongous case: calculate how many regions are part of
1400       // this humongous region and then set the bit range.
1401       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1402       _region_bm->par_at_put_range(index, end_index, true);
1403     }
1404   }
1405 
1406 public:
1407   CMCountDataClosureBase(G1CollectedHeap* g1h,
1408                          BitMap* region_bm, BitMap* card_bm):
1409     _g1h(g1h), _cm(g1h->concurrent_mark()),
1410     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1411     _region_bm(region_bm), _card_bm(card_bm) { }
1412 };
1413 
1414 // Closure that calculates the # live objects per region. Used
1415 // for verification purposes during the cleanup pause.
1416 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1417   CMBitMapRO* _bm;
1418   size_t _region_marked_bytes;
1419 
1420 public:
1421   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1422                          BitMap* region_bm, BitMap* card_bm) :
1423     CMCountDataClosureBase(g1h, region_bm, card_bm),
1424     _bm(bm), _region_marked_bytes(0) { }
1425 
1426   bool doHeapRegion(HeapRegion* hr) {
1427 
1428     if (hr->continuesHumongous()) {
1429       // We will ignore these here and process them when their
1430       // associated "starts humongous" region is processed (see
1431       // set_bit_for_heap_region()). Note that we cannot rely on their
1432       // associated "starts humongous" region to have their bit set to
1433       // 1 since, due to the region chunking in the parallel region
1434       // iteration, a "continues humongous" region might be visited
1435       // before its associated "starts humongous".
1436       return false;
1437     }
1438 
1439     HeapWord* ntams = hr->next_top_at_mark_start();
1440     HeapWord* start = hr->bottom();
1441 
1442     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1443            err_msg("Preconditions not met - "
1444                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1445                    p2i(start), p2i(ntams), p2i(hr->end())));
1446 
1447     // Find the first marked object at or after "start".
1448     start = _bm->getNextMarkedWordAddress(start, ntams);
1449 
1450     size_t marked_bytes = 0;
1451 
1452     while (start < ntams) {
1453       oop obj = oop(start);
1454       int obj_sz = obj->size();
1455       HeapWord* obj_end = start + obj_sz;
1456 
1457       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1458       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1459 
1460       // Note: if we're looking at the last region in heap - obj_end
1461       // could be actually just beyond the end of the heap; end_idx
1462       // will then correspond to a (non-existent) card that is also
1463       // just beyond the heap.
1464       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1465         // end of object is not card aligned - increment to cover
1466         // all the cards spanned by the object
1467         end_idx += 1;
1468       }
1469 
1470       // Set the bits in the card BM for the cards spanned by this object.
1471       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1472 
1473       // Add the size of this object to the number of marked bytes.
1474       marked_bytes += (size_t)obj_sz * HeapWordSize;
1475 
1476       // Find the next marked object after this one.
1477       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1478     }
1479 
1480     // Mark the allocated-since-marking portion...
1481     HeapWord* top = hr->top();
1482     if (ntams < top) {
1483       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1484       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1485 
1486       // Note: if we're looking at the last region in heap - top
1487       // could be actually just beyond the end of the heap; end_idx
1488       // will then correspond to a (non-existent) card that is also
1489       // just beyond the heap.
1490       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1491         // end of object is not card aligned - increment to cover
1492         // all the cards spanned by the object
1493         end_idx += 1;
1494       }
1495       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1496 
1497       // This definitely means the region has live objects.
1498       set_bit_for_region(hr);
1499     }
1500 
1501     // Update the live region bitmap.
1502     if (marked_bytes > 0) {
1503       set_bit_for_region(hr);
1504     }
1505 
1506     // Set the marked bytes for the current region so that
1507     // it can be queried by a calling verification routine
1508     _region_marked_bytes = marked_bytes;
1509 
1510     return false;
1511   }
1512 
1513   size_t region_marked_bytes() const { return _region_marked_bytes; }
1514 };
1515 
1516 // Heap region closure used for verifying the counting data
1517 // that was accumulated concurrently and aggregated during
1518 // the remark pause. This closure is applied to the heap
1519 // regions during the STW cleanup pause.
1520 
1521 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1522   G1CollectedHeap* _g1h;
1523   ConcurrentMark* _cm;
1524   CalcLiveObjectsClosure _calc_cl;
1525   BitMap* _region_bm;   // Region BM to be verified
1526   BitMap* _card_bm;     // Card BM to be verified
1527   bool _verbose;        // verbose output?
1528 
1529   BitMap* _exp_region_bm; // Expected Region BM values
1530   BitMap* _exp_card_bm;   // Expected card BM values
1531 
1532   int _failures;
1533 
1534 public:
1535   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1536                                 BitMap* region_bm,
1537                                 BitMap* card_bm,
1538                                 BitMap* exp_region_bm,
1539                                 BitMap* exp_card_bm,
1540                                 bool verbose) :
1541     _g1h(g1h), _cm(g1h->concurrent_mark()),
1542     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1543     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1544     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1545     _failures(0) { }
1546 
1547   int failures() const { return _failures; }
1548 
1549   bool doHeapRegion(HeapRegion* hr) {
1550     if (hr->continuesHumongous()) {
1551       // We will ignore these here and process them when their
1552       // associated "starts humongous" region is processed (see
1553       // set_bit_for_heap_region()). Note that we cannot rely on their
1554       // associated "starts humongous" region to have their bit set to
1555       // 1 since, due to the region chunking in the parallel region
1556       // iteration, a "continues humongous" region might be visited
1557       // before its associated "starts humongous".
1558       return false;
1559     }
1560 
1561     int failures = 0;
1562 
1563     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1564     // this region and set the corresponding bits in the expected region
1565     // and card bitmaps.
1566     bool res = _calc_cl.doHeapRegion(hr);
1567     assert(res == false, "should be continuing");
1568 
1569     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1570                     Mutex::_no_safepoint_check_flag);
1571 
1572     // Verify the marked bytes for this region.
1573     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1574     size_t act_marked_bytes = hr->next_marked_bytes();
1575 
1576     // We're not OK if expected marked bytes > actual marked bytes. It means
1577     // we have missed accounting some objects during the actual marking.
1578     if (exp_marked_bytes > act_marked_bytes) {
1579       if (_verbose) {
1580         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1581                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1582                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1583       }
1584       failures += 1;
1585     }
1586 
1587     // Verify the bit, for this region, in the actual and expected
1588     // (which was just calculated) region bit maps.
1589     // We're not OK if the bit in the calculated expected region
1590     // bitmap is set and the bit in the actual region bitmap is not.
1591     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1592 
1593     bool expected = _exp_region_bm->at(index);
1594     bool actual = _region_bm->at(index);
1595     if (expected && !actual) {
1596       if (_verbose) {
1597         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1598                                "expected: %s, actual: %s",
1599                                hr->hrs_index(),
1600                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1601       }
1602       failures += 1;
1603     }
1604 
1605     // Verify that the card bit maps for the cards spanned by the current
1606     // region match. We have an error if we have a set bit in the expected
1607     // bit map and the corresponding bit in the actual bitmap is not set.
1608 
1609     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1610     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1611 
1612     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1613       expected = _exp_card_bm->at(i);
1614       actual = _card_bm->at(i);
1615 
1616       if (expected && !actual) {
1617         if (_verbose) {
1618           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1619                                  "expected: %s, actual: %s",
1620                                  hr->hrs_index(), i,
1621                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1622         }
1623         failures += 1;
1624       }
1625     }
1626 
1627     if (failures > 0 && _verbose)  {
1628       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1629                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1630                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1631                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1632     }
1633 
1634     _failures += failures;
1635 
1636     // We could stop iteration over the heap when we
1637     // find the first violating region by returning true.
1638     return false;
1639   }
1640 };
1641 
1642 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1643 protected:
1644   G1CollectedHeap* _g1h;
1645   ConcurrentMark* _cm;
1646   BitMap* _actual_region_bm;
1647   BitMap* _actual_card_bm;
1648 
1649   uint    _n_workers;
1650 
1651   BitMap* _expected_region_bm;
1652   BitMap* _expected_card_bm;
1653 
1654   int  _failures;
1655   bool _verbose;
1656 
1657 public:
1658   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1659                             BitMap* region_bm, BitMap* card_bm,
1660                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1661     : AbstractGangTask("G1 verify final counting"),
1662       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1663       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1664       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1665       _failures(0), _verbose(false),
1666       _n_workers(0) {
1667     assert(VerifyDuringGC, "don't call this otherwise");
1668 
1669     // Use the value already set as the number of active threads
1670     // in the call to run_task().
1671     if (G1CollectedHeap::use_parallel_gc_threads()) {
1672       assert( _g1h->workers()->active_workers() > 0,
1673         "Should have been previously set");
1674       _n_workers = _g1h->workers()->active_workers();
1675     } else {
1676       _n_workers = 1;
1677     }
1678 
1679     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1680     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1681 
1682     _verbose = _cm->verbose_medium();
1683   }
1684 
1685   void work(uint worker_id) {
1686     assert(worker_id < _n_workers, "invariant");
1687 
1688     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1689                                             _actual_region_bm, _actual_card_bm,
1690                                             _expected_region_bm,
1691                                             _expected_card_bm,
1692                                             _verbose);
1693 
1694     if (G1CollectedHeap::use_parallel_gc_threads()) {
1695       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1696                                             worker_id,
1697                                             _n_workers,
1698                                             HeapRegion::VerifyCountClaimValue);
1699     } else {
1700       _g1h->heap_region_iterate(&verify_cl);
1701     }
1702 
1703     Atomic::add(verify_cl.failures(), &_failures);
1704   }
1705 
1706   int failures() const { return _failures; }
1707 };
1708 
1709 // Closure that finalizes the liveness counting data.
1710 // Used during the cleanup pause.
1711 // Sets the bits corresponding to the interval [NTAMS, top]
1712 // (which contains the implicitly live objects) in the
1713 // card liveness bitmap. Also sets the bit for each region,
1714 // containing live data, in the region liveness bitmap.
1715 
1716 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1717  public:
1718   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1719                               BitMap* region_bm,
1720                               BitMap* card_bm) :
1721     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1722 
1723   bool doHeapRegion(HeapRegion* hr) {
1724 
1725     if (hr->continuesHumongous()) {
1726       // We will ignore these here and process them when their
1727       // associated "starts humongous" region is processed (see
1728       // set_bit_for_heap_region()). Note that we cannot rely on their
1729       // associated "starts humongous" region to have their bit set to
1730       // 1 since, due to the region chunking in the parallel region
1731       // iteration, a "continues humongous" region might be visited
1732       // before its associated "starts humongous".
1733       return false;
1734     }
1735 
1736     HeapWord* ntams = hr->next_top_at_mark_start();
1737     HeapWord* top   = hr->top();
1738 
1739     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1740 
1741     // Mark the allocated-since-marking portion...
1742     if (ntams < top) {
1743       // This definitely means the region has live objects.
1744       set_bit_for_region(hr);
1745 
1746       // Now set the bits in the card bitmap for [ntams, top)
1747       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1748       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1749 
1750       // Note: if we're looking at the last region in heap - top
1751       // could be actually just beyond the end of the heap; end_idx
1752       // will then correspond to a (non-existent) card that is also
1753       // just beyond the heap.
1754       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1755         // end of object is not card aligned - increment to cover
1756         // all the cards spanned by the object
1757         end_idx += 1;
1758       }
1759 
1760       assert(end_idx <= _card_bm->size(),
1761              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1762                      end_idx, _card_bm->size()));
1763       assert(start_idx < _card_bm->size(),
1764              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1765                      start_idx, _card_bm->size()));
1766 
1767       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1768     }
1769 
1770     // Set the bit for the region if it contains live data
1771     if (hr->next_marked_bytes() > 0) {
1772       set_bit_for_region(hr);
1773     }
1774 
1775     return false;
1776   }
1777 };
1778 
1779 class G1ParFinalCountTask: public AbstractGangTask {
1780 protected:
1781   G1CollectedHeap* _g1h;
1782   ConcurrentMark* _cm;
1783   BitMap* _actual_region_bm;
1784   BitMap* _actual_card_bm;
1785 
1786   uint    _n_workers;
1787 
1788 public:
1789   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1790     : AbstractGangTask("G1 final counting"),
1791       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1792       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1793       _n_workers(0) {
1794     // Use the value already set as the number of active threads
1795     // in the call to run_task().
1796     if (G1CollectedHeap::use_parallel_gc_threads()) {
1797       assert( _g1h->workers()->active_workers() > 0,
1798         "Should have been previously set");
1799       _n_workers = _g1h->workers()->active_workers();
1800     } else {
1801       _n_workers = 1;
1802     }
1803   }
1804 
1805   void work(uint worker_id) {
1806     assert(worker_id < _n_workers, "invariant");
1807 
1808     FinalCountDataUpdateClosure final_update_cl(_g1h,
1809                                                 _actual_region_bm,
1810                                                 _actual_card_bm);
1811 
1812     if (G1CollectedHeap::use_parallel_gc_threads()) {
1813       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1814                                             worker_id,
1815                                             _n_workers,
1816                                             HeapRegion::FinalCountClaimValue);
1817     } else {
1818       _g1h->heap_region_iterate(&final_update_cl);
1819     }
1820   }
1821 };
1822 
1823 class G1ParNoteEndTask;
1824 
1825 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1826   G1CollectedHeap* _g1;
1827   size_t _max_live_bytes;
1828   uint _regions_claimed;
1829   size_t _freed_bytes;
1830   FreeRegionList* _local_cleanup_list;
1831   HeapRegionSetCount _old_regions_removed;
1832   HeapRegionSetCount _humongous_regions_removed;
1833   HRRSCleanupTask* _hrrs_cleanup_task;
1834   double _claimed_region_time;
1835   double _max_region_time;
1836 
1837 public:
1838   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1839                              FreeRegionList* local_cleanup_list,
1840                              HRRSCleanupTask* hrrs_cleanup_task) :
1841     _g1(g1),
1842     _max_live_bytes(0), _regions_claimed(0),
1843     _freed_bytes(0),
1844     _claimed_region_time(0.0), _max_region_time(0.0),
1845     _local_cleanup_list(local_cleanup_list),
1846     _old_regions_removed(),
1847     _humongous_regions_removed(),
1848     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1849 
1850   size_t freed_bytes() { return _freed_bytes; }
1851   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1852   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1853 
1854   bool doHeapRegion(HeapRegion *hr) {
1855     if (hr->continuesHumongous()) {
1856       return false;
1857     }
1858     // We use a claim value of zero here because all regions
1859     // were claimed with value 1 in the FinalCount task.
1860     _g1->reset_gc_time_stamps(hr);
1861     double start = os::elapsedTime();
1862     _regions_claimed++;
1863     hr->note_end_of_marking();
1864     _max_live_bytes += hr->max_live_bytes();
1865 
1866     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1867       _freed_bytes += hr->used();
1868       hr->set_containing_set(NULL);
1869       if (hr->isHumongous()) {
1870         assert(hr->startsHumongous(), "we should only see starts humongous");
1871         _humongous_regions_removed.increment(1u, hr->capacity());
1872         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1873       } else {
1874         _old_regions_removed.increment(1u, hr->capacity());
1875         _g1->free_region(hr, _local_cleanup_list, true);
1876       }
1877     } else {
1878       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1879     }
1880 
1881     double region_time = (os::elapsedTime() - start);
1882     _claimed_region_time += region_time;
1883     if (region_time > _max_region_time) {
1884       _max_region_time = region_time;
1885     }
1886     return false;
1887   }
1888 
1889   size_t max_live_bytes() { return _max_live_bytes; }
1890   uint regions_claimed() { return _regions_claimed; }
1891   double claimed_region_time_sec() { return _claimed_region_time; }
1892   double max_region_time_sec() { return _max_region_time; }
1893 };
1894 
1895 class G1ParNoteEndTask: public AbstractGangTask {
1896   friend class G1NoteEndOfConcMarkClosure;
1897 
1898 protected:
1899   G1CollectedHeap* _g1h;
1900   size_t _max_live_bytes;
1901   size_t _freed_bytes;
1902   FreeRegionList* _cleanup_list;
1903 
1904 public:
1905   G1ParNoteEndTask(G1CollectedHeap* g1h,
1906                    FreeRegionList* cleanup_list) :
1907     AbstractGangTask("G1 note end"), _g1h(g1h),
1908     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1909 
1910   void work(uint worker_id) {
1911     double start = os::elapsedTime();
1912     FreeRegionList local_cleanup_list("Local Cleanup List");
1913     HRRSCleanupTask hrrs_cleanup_task;
1914     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1915                                            &hrrs_cleanup_task);
1916     if (G1CollectedHeap::use_parallel_gc_threads()) {
1917       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1918                                             _g1h->workers()->active_workers(),
1919                                             HeapRegion::NoteEndClaimValue);
1920     } else {
1921       _g1h->heap_region_iterate(&g1_note_end);
1922     }
1923     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1924 
1925     // Now update the lists
1926     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1927     {
1928       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1929       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1930       _max_live_bytes += g1_note_end.max_live_bytes();
1931       _freed_bytes += g1_note_end.freed_bytes();
1932 
1933       // If we iterate over the global cleanup list at the end of
1934       // cleanup to do this printing we will not guarantee to only
1935       // generate output for the newly-reclaimed regions (the list
1936       // might not be empty at the beginning of cleanup; we might
1937       // still be working on its previous contents). So we do the
1938       // printing here, before we append the new regions to the global
1939       // cleanup list.
1940 
1941       G1HRPrinter* hr_printer = _g1h->hr_printer();
1942       if (hr_printer->is_active()) {
1943         FreeRegionListIterator iter(&local_cleanup_list);
1944         while (iter.more_available()) {
1945           HeapRegion* hr = iter.get_next();
1946           hr_printer->cleanup(hr);
1947         }
1948       }
1949 
1950       _cleanup_list->add_ordered(&local_cleanup_list);
1951       assert(local_cleanup_list.is_empty(), "post-condition");
1952 
1953       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1954     }
1955   }
1956   size_t max_live_bytes() { return _max_live_bytes; }
1957   size_t freed_bytes() { return _freed_bytes; }
1958 };
1959 
1960 class G1ParScrubRemSetTask: public AbstractGangTask {
1961 protected:
1962   G1RemSet* _g1rs;
1963   BitMap* _region_bm;
1964   BitMap* _card_bm;
1965 public:
1966   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1967                        BitMap* region_bm, BitMap* card_bm) :
1968     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1969     _region_bm(region_bm), _card_bm(card_bm) { }
1970 
1971   void work(uint worker_id) {
1972     if (G1CollectedHeap::use_parallel_gc_threads()) {
1973       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1974                        HeapRegion::ScrubRemSetClaimValue);
1975     } else {
1976       _g1rs->scrub(_region_bm, _card_bm);
1977     }
1978   }
1979 
1980 };
1981 
1982 void ConcurrentMark::cleanup() {
1983   // world is stopped at this checkpoint
1984   assert(SafepointSynchronize::is_at_safepoint(),
1985          "world should be stopped");
1986   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1987 
1988   // If a full collection has happened, we shouldn't do this.
1989   if (has_aborted()) {
1990     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1991     return;
1992   }
1993 
1994   g1h->verify_region_sets_optional();
1995 
1996   if (VerifyDuringGC) {
1997     HandleMark hm;  // handle scope
1998     Universe::heap()->prepare_for_verify();
1999     Universe::verify(VerifyOption_G1UsePrevMarking,
2000                      " VerifyDuringGC:(before)");
2001   }
2002   g1h->check_bitmaps("Cleanup Start");
2003 
2004   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2005   g1p->record_concurrent_mark_cleanup_start();
2006 
2007   double start = os::elapsedTime();
2008 
2009   HeapRegionRemSet::reset_for_cleanup_tasks();
2010 
2011   uint n_workers;
2012 
2013   // Do counting once more with the world stopped for good measure.
2014   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2015 
2016   if (G1CollectedHeap::use_parallel_gc_threads()) {
2017    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2018            "sanity check");
2019 
2020     g1h->set_par_threads();
2021     n_workers = g1h->n_par_threads();
2022     assert(g1h->n_par_threads() == n_workers,
2023            "Should not have been reset");
2024     g1h->workers()->run_task(&g1_par_count_task);
2025     // Done with the parallel phase so reset to 0.
2026     g1h->set_par_threads(0);
2027 
2028     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2029            "sanity check");
2030   } else {
2031     n_workers = 1;
2032     g1_par_count_task.work(0);
2033   }
2034 
2035   if (VerifyDuringGC) {
2036     // Verify that the counting data accumulated during marking matches
2037     // that calculated by walking the marking bitmap.
2038 
2039     // Bitmaps to hold expected values
2040     BitMap expected_region_bm(_region_bm.size(), true);
2041     BitMap expected_card_bm(_card_bm.size(), true);
2042 
2043     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2044                                                  &_region_bm,
2045                                                  &_card_bm,
2046                                                  &expected_region_bm,
2047                                                  &expected_card_bm);
2048 
2049     if (G1CollectedHeap::use_parallel_gc_threads()) {
2050       g1h->set_par_threads((int)n_workers);
2051       g1h->workers()->run_task(&g1_par_verify_task);
2052       // Done with the parallel phase so reset to 0.
2053       g1h->set_par_threads(0);
2054 
2055       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2056              "sanity check");
2057     } else {
2058       g1_par_verify_task.work(0);
2059     }
2060 
2061     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2062   }
2063 
2064   size_t start_used_bytes = g1h->used();
2065   g1h->set_marking_complete();
2066 
2067   double count_end = os::elapsedTime();
2068   double this_final_counting_time = (count_end - start);
2069   _total_counting_time += this_final_counting_time;
2070 
2071   if (G1PrintRegionLivenessInfo) {
2072     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2073     _g1h->heap_region_iterate(&cl);
2074   }
2075 
2076   // Install newly created mark bitMap as "prev".
2077   swapMarkBitMaps();
2078 
2079   g1h->reset_gc_time_stamp();
2080 
2081   // Note end of marking in all heap regions.
2082   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2083   if (G1CollectedHeap::use_parallel_gc_threads()) {
2084     g1h->set_par_threads((int)n_workers);
2085     g1h->workers()->run_task(&g1_par_note_end_task);
2086     g1h->set_par_threads(0);
2087 
2088     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2089            "sanity check");
2090   } else {
2091     g1_par_note_end_task.work(0);
2092   }
2093   g1h->check_gc_time_stamps();
2094 
2095   if (!cleanup_list_is_empty()) {
2096     // The cleanup list is not empty, so we'll have to process it
2097     // concurrently. Notify anyone else that might be wanting free
2098     // regions that there will be more free regions coming soon.
2099     g1h->set_free_regions_coming();
2100   }
2101 
2102   // call below, since it affects the metric by which we sort the heap
2103   // regions.
2104   if (G1ScrubRemSets) {
2105     double rs_scrub_start = os::elapsedTime();
2106     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2107     if (G1CollectedHeap::use_parallel_gc_threads()) {
2108       g1h->set_par_threads((int)n_workers);
2109       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2110       g1h->set_par_threads(0);
2111 
2112       assert(g1h->check_heap_region_claim_values(
2113                                             HeapRegion::ScrubRemSetClaimValue),
2114              "sanity check");
2115     } else {
2116       g1_par_scrub_rs_task.work(0);
2117     }
2118 
2119     double rs_scrub_end = os::elapsedTime();
2120     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2121     _total_rs_scrub_time += this_rs_scrub_time;
2122   }
2123 
2124   // this will also free any regions totally full of garbage objects,
2125   // and sort the regions.
2126   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2127 
2128   // Statistics.
2129   double end = os::elapsedTime();
2130   _cleanup_times.add((end - start) * 1000.0);
2131 
2132   if (G1Log::fine()) {
2133     g1h->print_size_transition(gclog_or_tty,
2134                                start_used_bytes,
2135                                g1h->used(),
2136                                g1h->capacity());
2137   }
2138 
2139   // Clean up will have freed any regions completely full of garbage.
2140   // Update the soft reference policy with the new heap occupancy.
2141   Universe::update_heap_info_at_gc();
2142 
2143   // We need to make this be a "collection" so any collection pause that
2144   // races with it goes around and waits for completeCleanup to finish.
2145   g1h->increment_total_collections();
2146 
2147   // We reclaimed old regions so we should calculate the sizes to make
2148   // sure we update the old gen/space data.
2149   g1h->g1mm()->update_sizes();
2150 
2151   if (VerifyDuringGC) {
2152     HandleMark hm;  // handle scope
2153     Universe::heap()->prepare_for_verify();
2154     Universe::verify(VerifyOption_G1UsePrevMarking,
2155                      " VerifyDuringGC:(after)");
2156   }
2157   g1h->check_bitmaps("Cleanup End");
2158 
2159   g1h->verify_region_sets_optional();
2160   g1h->trace_heap_after_concurrent_cycle();
2161 }
2162 
2163 void ConcurrentMark::completeCleanup() {
2164   if (has_aborted()) return;
2165 
2166   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2167 
2168   _cleanup_list.verify_optional();
2169   FreeRegionList tmp_free_list("Tmp Free List");
2170 
2171   if (G1ConcRegionFreeingVerbose) {
2172     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2173                            "cleanup list has %u entries",
2174                            _cleanup_list.length());
2175   }
2176 
2177   // Noone else should be accessing the _cleanup_list at this point,
2178   // so it's not necessary to take any locks
2179   while (!_cleanup_list.is_empty()) {
2180     HeapRegion* hr = _cleanup_list.remove_head();
2181     assert(hr != NULL, "Got NULL from a non-empty list");
2182     hr->par_clear();
2183     tmp_free_list.add_ordered(hr);
2184 
2185     // Instead of adding one region at a time to the secondary_free_list,
2186     // we accumulate them in the local list and move them a few at a
2187     // time. This also cuts down on the number of notify_all() calls
2188     // we do during this process. We'll also append the local list when
2189     // _cleanup_list is empty (which means we just removed the last
2190     // region from the _cleanup_list).
2191     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2192         _cleanup_list.is_empty()) {
2193       if (G1ConcRegionFreeingVerbose) {
2194         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2195                                "appending %u entries to the secondary_free_list, "
2196                                "cleanup list still has %u entries",
2197                                tmp_free_list.length(),
2198                                _cleanup_list.length());
2199       }
2200 
2201       {
2202         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2203         g1h->secondary_free_list_add(&tmp_free_list);
2204         SecondaryFreeList_lock->notify_all();
2205       }
2206 
2207       if (G1StressConcRegionFreeing) {
2208         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2209           os::sleep(Thread::current(), (jlong) 1, false);
2210         }
2211       }
2212     }
2213   }
2214   assert(tmp_free_list.is_empty(), "post-condition");
2215 }
2216 
2217 // Supporting Object and Oop closures for reference discovery
2218 // and processing in during marking
2219 
2220 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2221   HeapWord* addr = (HeapWord*)obj;
2222   return addr != NULL &&
2223          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2224 }
2225 
2226 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2227 // Uses the CMTask associated with a worker thread (for serial reference
2228 // processing the CMTask for worker 0 is used) to preserve (mark) and
2229 // trace referent objects.
2230 //
2231 // Using the CMTask and embedded local queues avoids having the worker
2232 // threads operating on the global mark stack. This reduces the risk
2233 // of overflowing the stack - which we would rather avoid at this late
2234 // state. Also using the tasks' local queues removes the potential
2235 // of the workers interfering with each other that could occur if
2236 // operating on the global stack.
2237 
2238 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2239   ConcurrentMark* _cm;
2240   CMTask*         _task;
2241   int             _ref_counter_limit;
2242   int             _ref_counter;
2243   bool            _is_serial;
2244  public:
2245   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2246     _cm(cm), _task(task), _is_serial(is_serial),
2247     _ref_counter_limit(G1RefProcDrainInterval) {
2248     assert(_ref_counter_limit > 0, "sanity");
2249     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2250     _ref_counter = _ref_counter_limit;
2251   }
2252 
2253   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2254   virtual void do_oop(      oop* p) { do_oop_work(p); }
2255 
2256   template <class T> void do_oop_work(T* p) {
2257     if (!_cm->has_overflown()) {
2258       oop obj = oopDesc::load_decode_heap_oop(p);
2259       if (_cm->verbose_high()) {
2260         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2261                                "*"PTR_FORMAT" = "PTR_FORMAT,
2262                                _task->worker_id(), p2i(p), p2i((void*) obj));
2263       }
2264 
2265       _task->deal_with_reference(obj);
2266       _ref_counter--;
2267 
2268       if (_ref_counter == 0) {
2269         // We have dealt with _ref_counter_limit references, pushing them
2270         // and objects reachable from them on to the local stack (and
2271         // possibly the global stack). Call CMTask::do_marking_step() to
2272         // process these entries.
2273         //
2274         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2275         // there's nothing more to do (i.e. we're done with the entries that
2276         // were pushed as a result of the CMTask::deal_with_reference() calls
2277         // above) or we overflow.
2278         //
2279         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2280         // flag while there may still be some work to do. (See the comment at
2281         // the beginning of CMTask::do_marking_step() for those conditions -
2282         // one of which is reaching the specified time target.) It is only
2283         // when CMTask::do_marking_step() returns without setting the
2284         // has_aborted() flag that the marking step has completed.
2285         do {
2286           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2287           _task->do_marking_step(mark_step_duration_ms,
2288                                  false      /* do_termination */,
2289                                  _is_serial);
2290         } while (_task->has_aborted() && !_cm->has_overflown());
2291         _ref_counter = _ref_counter_limit;
2292       }
2293     } else {
2294       if (_cm->verbose_high()) {
2295          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2296       }
2297     }
2298   }
2299 };
2300 
2301 // 'Drain' oop closure used by both serial and parallel reference processing.
2302 // Uses the CMTask associated with a given worker thread (for serial
2303 // reference processing the CMtask for worker 0 is used). Calls the
2304 // do_marking_step routine, with an unbelievably large timeout value,
2305 // to drain the marking data structures of the remaining entries
2306 // added by the 'keep alive' oop closure above.
2307 
2308 class G1CMDrainMarkingStackClosure: public VoidClosure {
2309   ConcurrentMark* _cm;
2310   CMTask*         _task;
2311   bool            _is_serial;
2312  public:
2313   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2314     _cm(cm), _task(task), _is_serial(is_serial) {
2315     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2316   }
2317 
2318   void do_void() {
2319     do {
2320       if (_cm->verbose_high()) {
2321         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2322                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2323       }
2324 
2325       // We call CMTask::do_marking_step() to completely drain the local
2326       // and global marking stacks of entries pushed by the 'keep alive'
2327       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2328       //
2329       // CMTask::do_marking_step() is called in a loop, which we'll exit
2330       // if there's nothing more to do (i.e. we've completely drained the
2331       // entries that were pushed as a a result of applying the 'keep alive'
2332       // closure to the entries on the discovered ref lists) or we overflow
2333       // the global marking stack.
2334       //
2335       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2336       // flag while there may still be some work to do. (See the comment at
2337       // the beginning of CMTask::do_marking_step() for those conditions -
2338       // one of which is reaching the specified time target.) It is only
2339       // when CMTask::do_marking_step() returns without setting the
2340       // has_aborted() flag that the marking step has completed.
2341 
2342       _task->do_marking_step(1000000000.0 /* something very large */,
2343                              true         /* do_termination */,
2344                              _is_serial);
2345     } while (_task->has_aborted() && !_cm->has_overflown());
2346   }
2347 };
2348 
2349 // Implementation of AbstractRefProcTaskExecutor for parallel
2350 // reference processing at the end of G1 concurrent marking
2351 
2352 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2353 private:
2354   G1CollectedHeap* _g1h;
2355   ConcurrentMark*  _cm;
2356   WorkGang*        _workers;
2357   int              _active_workers;
2358 
2359 public:
2360   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2361                         ConcurrentMark* cm,
2362                         WorkGang* workers,
2363                         int n_workers) :
2364     _g1h(g1h), _cm(cm),
2365     _workers(workers), _active_workers(n_workers) { }
2366 
2367   // Executes the given task using concurrent marking worker threads.
2368   virtual void execute(ProcessTask& task);
2369   virtual void execute(EnqueueTask& task);
2370 };
2371 
2372 class G1CMRefProcTaskProxy: public AbstractGangTask {
2373   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2374   ProcessTask&     _proc_task;
2375   G1CollectedHeap* _g1h;
2376   ConcurrentMark*  _cm;
2377 
2378 public:
2379   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2380                      G1CollectedHeap* g1h,
2381                      ConcurrentMark* cm) :
2382     AbstractGangTask("Process reference objects in parallel"),
2383     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2384     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2385     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2386   }
2387 
2388   virtual void work(uint worker_id) {
2389     CMTask* task = _cm->task(worker_id);
2390     G1CMIsAliveClosure g1_is_alive(_g1h);
2391     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2392     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2393 
2394     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2395   }
2396 };
2397 
2398 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2399   assert(_workers != NULL, "Need parallel worker threads.");
2400   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2401 
2402   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2403 
2404   // We need to reset the concurrency level before each
2405   // proxy task execution, so that the termination protocol
2406   // and overflow handling in CMTask::do_marking_step() knows
2407   // how many workers to wait for.
2408   _cm->set_concurrency(_active_workers);
2409   _g1h->set_par_threads(_active_workers);
2410   _workers->run_task(&proc_task_proxy);
2411   _g1h->set_par_threads(0);
2412 }
2413 
2414 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2415   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2416   EnqueueTask& _enq_task;
2417 
2418 public:
2419   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2420     AbstractGangTask("Enqueue reference objects in parallel"),
2421     _enq_task(enq_task) { }
2422 
2423   virtual void work(uint worker_id) {
2424     _enq_task.work(worker_id);
2425   }
2426 };
2427 
2428 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2429   assert(_workers != NULL, "Need parallel worker threads.");
2430   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2431 
2432   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2433 
2434   // Not strictly necessary but...
2435   //
2436   // We need to reset the concurrency level before each
2437   // proxy task execution, so that the termination protocol
2438   // and overflow handling in CMTask::do_marking_step() knows
2439   // how many workers to wait for.
2440   _cm->set_concurrency(_active_workers);
2441   _g1h->set_par_threads(_active_workers);
2442   _workers->run_task(&enq_task_proxy);
2443   _g1h->set_par_threads(0);
2444 }
2445 
2446 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2447   if (has_overflown()) {
2448     // Skip processing the discovered references if we have
2449     // overflown the global marking stack. Reference objects
2450     // only get discovered once so it is OK to not
2451     // de-populate the discovered reference lists. We could have,
2452     // but the only benefit would be that, when marking restarts,
2453     // less reference objects are discovered.
2454     return;
2455   }
2456 
2457   ResourceMark rm;
2458   HandleMark   hm;
2459 
2460   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2461 
2462   // Is alive closure.
2463   G1CMIsAliveClosure g1_is_alive(g1h);
2464 
2465   // Inner scope to exclude the cleaning of the string and symbol
2466   // tables from the displayed time.
2467   {
2468     if (G1Log::finer()) {
2469       gclog_or_tty->put(' ');
2470     }
2471     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2472 
2473     ReferenceProcessor* rp = g1h->ref_processor_cm();
2474 
2475     // See the comment in G1CollectedHeap::ref_processing_init()
2476     // about how reference processing currently works in G1.
2477 
2478     // Set the soft reference policy
2479     rp->setup_policy(clear_all_soft_refs);
2480     assert(_markStack.isEmpty(), "mark stack should be empty");
2481 
2482     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2483     // in serial reference processing. Note these closures are also
2484     // used for serially processing (by the the current thread) the
2485     // JNI references during parallel reference processing.
2486     //
2487     // These closures do not need to synchronize with the worker
2488     // threads involved in parallel reference processing as these
2489     // instances are executed serially by the current thread (e.g.
2490     // reference processing is not multi-threaded and is thus
2491     // performed by the current thread instead of a gang worker).
2492     //
2493     // The gang tasks involved in parallel reference processing create
2494     // their own instances of these closures, which do their own
2495     // synchronization among themselves.
2496     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2497     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2498 
2499     // We need at least one active thread. If reference processing
2500     // is not multi-threaded we use the current (VMThread) thread,
2501     // otherwise we use the work gang from the G1CollectedHeap and
2502     // we utilize all the worker threads we can.
2503     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2504     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2505     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2506 
2507     // Parallel processing task executor.
2508     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2509                                               g1h->workers(), active_workers);
2510     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2511 
2512     // Set the concurrency level. The phase was already set prior to
2513     // executing the remark task.
2514     set_concurrency(active_workers);
2515 
2516     // Set the degree of MT processing here.  If the discovery was done MT,
2517     // the number of threads involved during discovery could differ from
2518     // the number of active workers.  This is OK as long as the discovered
2519     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2520     rp->set_active_mt_degree(active_workers);
2521 
2522     // Process the weak references.
2523     const ReferenceProcessorStats& stats =
2524         rp->process_discovered_references(&g1_is_alive,
2525                                           &g1_keep_alive,
2526                                           &g1_drain_mark_stack,
2527                                           executor,
2528                                           g1h->gc_timer_cm());
2529     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2530 
2531     // The do_oop work routines of the keep_alive and drain_marking_stack
2532     // oop closures will set the has_overflown flag if we overflow the
2533     // global marking stack.
2534 
2535     assert(_markStack.overflow() || _markStack.isEmpty(),
2536             "mark stack should be empty (unless it overflowed)");
2537 
2538     if (_markStack.overflow()) {
2539       // This should have been done already when we tried to push an
2540       // entry on to the global mark stack. But let's do it again.
2541       set_has_overflown();
2542     }
2543 
2544     assert(rp->num_q() == active_workers, "why not");
2545 
2546     rp->enqueue_discovered_references(executor);
2547 
2548     rp->verify_no_references_recorded();
2549     assert(!rp->discovery_enabled(), "Post condition");
2550   }
2551 
2552   if (has_overflown()) {
2553     // We can not trust g1_is_alive if the marking stack overflowed
2554     return;
2555   }
2556 
2557   g1h->unlink_string_and_symbol_table(&g1_is_alive,
2558                                       /* process_strings */ false, // currently strings are always roots
2559                                       /* process_symbols */ true);
2560 }
2561 
2562 void ConcurrentMark::swapMarkBitMaps() {
2563   CMBitMapRO* temp = _prevMarkBitMap;
2564   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2565   _nextMarkBitMap  = (CMBitMap*)  temp;
2566 }
2567 
2568 class CMRemarkTask: public AbstractGangTask {
2569 private:
2570   ConcurrentMark* _cm;
2571   bool            _is_serial;
2572 public:
2573   void work(uint worker_id) {
2574     // Since all available tasks are actually started, we should
2575     // only proceed if we're supposed to be active.
2576     if (worker_id < _cm->active_tasks()) {
2577       CMTask* task = _cm->task(worker_id);
2578       task->record_start_time();
2579       do {
2580         task->do_marking_step(1000000000.0 /* something very large */,
2581                               true         /* do_termination       */,
2582                               _is_serial);
2583       } while (task->has_aborted() && !_cm->has_overflown());
2584       // If we overflow, then we do not want to restart. We instead
2585       // want to abort remark and do concurrent marking again.
2586       task->record_end_time();
2587     }
2588   }
2589 
2590   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2591     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2592     _cm->terminator()->reset_for_reuse(active_workers);
2593   }
2594 };
2595 
2596 void ConcurrentMark::checkpointRootsFinalWork() {
2597   ResourceMark rm;
2598   HandleMark   hm;
2599   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2600 
2601   g1h->ensure_parsability(false);
2602 
2603   if (G1CollectedHeap::use_parallel_gc_threads()) {
2604     G1CollectedHeap::StrongRootsScope srs(g1h);
2605     // this is remark, so we'll use up all active threads
2606     uint active_workers = g1h->workers()->active_workers();
2607     if (active_workers == 0) {
2608       assert(active_workers > 0, "Should have been set earlier");
2609       active_workers = (uint) ParallelGCThreads;
2610       g1h->workers()->set_active_workers(active_workers);
2611     }
2612     set_concurrency_and_phase(active_workers, false /* concurrent */);
2613     // Leave _parallel_marking_threads at it's
2614     // value originally calculated in the ConcurrentMark
2615     // constructor and pass values of the active workers
2616     // through the gang in the task.
2617 
2618     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2619     // We will start all available threads, even if we decide that the
2620     // active_workers will be fewer. The extra ones will just bail out
2621     // immediately.
2622     g1h->set_par_threads(active_workers);
2623     g1h->workers()->run_task(&remarkTask);
2624     g1h->set_par_threads(0);
2625   } else {
2626     G1CollectedHeap::StrongRootsScope srs(g1h);
2627     uint active_workers = 1;
2628     set_concurrency_and_phase(active_workers, false /* concurrent */);
2629 
2630     // Note - if there's no work gang then the VMThread will be
2631     // the thread to execute the remark - serially. We have
2632     // to pass true for the is_serial parameter so that
2633     // CMTask::do_marking_step() doesn't enter the sync
2634     // barriers in the event of an overflow. Doing so will
2635     // cause an assert that the current thread is not a
2636     // concurrent GC thread.
2637     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2638     remarkTask.work(0);
2639   }
2640   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2641   guarantee(has_overflown() ||
2642             satb_mq_set.completed_buffers_num() == 0,
2643             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2644                     BOOL_TO_STR(has_overflown()),
2645                     satb_mq_set.completed_buffers_num()));
2646 
2647   print_stats();
2648 }
2649 
2650 #ifndef PRODUCT
2651 
2652 class PrintReachableOopClosure: public OopClosure {
2653 private:
2654   G1CollectedHeap* _g1h;
2655   outputStream*    _out;
2656   VerifyOption     _vo;
2657   bool             _all;
2658 
2659 public:
2660   PrintReachableOopClosure(outputStream* out,
2661                            VerifyOption  vo,
2662                            bool          all) :
2663     _g1h(G1CollectedHeap::heap()),
2664     _out(out), _vo(vo), _all(all) { }
2665 
2666   void do_oop(narrowOop* p) { do_oop_work(p); }
2667   void do_oop(      oop* p) { do_oop_work(p); }
2668 
2669   template <class T> void do_oop_work(T* p) {
2670     oop         obj = oopDesc::load_decode_heap_oop(p);
2671     const char* str = NULL;
2672     const char* str2 = "";
2673 
2674     if (obj == NULL) {
2675       str = "";
2676     } else if (!_g1h->is_in_g1_reserved(obj)) {
2677       str = " O";
2678     } else {
2679       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2680       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2681       bool marked = _g1h->is_marked(obj, _vo);
2682 
2683       if (over_tams) {
2684         str = " >";
2685         if (marked) {
2686           str2 = " AND MARKED";
2687         }
2688       } else if (marked) {
2689         str = " M";
2690       } else {
2691         str = " NOT";
2692       }
2693     }
2694 
2695     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2696                    p2i(p), p2i((void*) obj), str, str2);
2697   }
2698 };
2699 
2700 class PrintReachableObjectClosure : public ObjectClosure {
2701 private:
2702   G1CollectedHeap* _g1h;
2703   outputStream*    _out;
2704   VerifyOption     _vo;
2705   bool             _all;
2706   HeapRegion*      _hr;
2707 
2708 public:
2709   PrintReachableObjectClosure(outputStream* out,
2710                               VerifyOption  vo,
2711                               bool          all,
2712                               HeapRegion*   hr) :
2713     _g1h(G1CollectedHeap::heap()),
2714     _out(out), _vo(vo), _all(all), _hr(hr) { }
2715 
2716   void do_object(oop o) {
2717     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2718     bool marked = _g1h->is_marked(o, _vo);
2719     bool print_it = _all || over_tams || marked;
2720 
2721     if (print_it) {
2722       _out->print_cr(" "PTR_FORMAT"%s",
2723                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2724       PrintReachableOopClosure oopCl(_out, _vo, _all);
2725       o->oop_iterate_no_header(&oopCl);
2726     }
2727   }
2728 };
2729 
2730 class PrintReachableRegionClosure : public HeapRegionClosure {
2731 private:
2732   G1CollectedHeap* _g1h;
2733   outputStream*    _out;
2734   VerifyOption     _vo;
2735   bool             _all;
2736 
2737 public:
2738   bool doHeapRegion(HeapRegion* hr) {
2739     HeapWord* b = hr->bottom();
2740     HeapWord* e = hr->end();
2741     HeapWord* t = hr->top();
2742     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2743     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2744                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2745     _out->cr();
2746 
2747     HeapWord* from = b;
2748     HeapWord* to   = t;
2749 
2750     if (to > from) {
2751       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2752       _out->cr();
2753       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2754       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2755       _out->cr();
2756     }
2757 
2758     return false;
2759   }
2760 
2761   PrintReachableRegionClosure(outputStream* out,
2762                               VerifyOption  vo,
2763                               bool          all) :
2764     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2765 };
2766 
2767 void ConcurrentMark::print_reachable(const char* str,
2768                                      VerifyOption vo,
2769                                      bool all) {
2770   gclog_or_tty->cr();
2771   gclog_or_tty->print_cr("== Doing heap dump... ");
2772 
2773   if (G1PrintReachableBaseFile == NULL) {
2774     gclog_or_tty->print_cr("  #### error: no base file defined");
2775     return;
2776   }
2777 
2778   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2779       (JVM_MAXPATHLEN - 1)) {
2780     gclog_or_tty->print_cr("  #### error: file name too long");
2781     return;
2782   }
2783 
2784   char file_name[JVM_MAXPATHLEN];
2785   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2786   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2787 
2788   fileStream fout(file_name);
2789   if (!fout.is_open()) {
2790     gclog_or_tty->print_cr("  #### error: could not open file");
2791     return;
2792   }
2793 
2794   outputStream* out = &fout;
2795   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2796   out->cr();
2797 
2798   out->print_cr("--- ITERATING OVER REGIONS");
2799   out->cr();
2800   PrintReachableRegionClosure rcl(out, vo, all);
2801   _g1h->heap_region_iterate(&rcl);
2802   out->cr();
2803 
2804   gclog_or_tty->print_cr("  done");
2805   gclog_or_tty->flush();
2806 }
2807 
2808 #endif // PRODUCT
2809 
2810 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2811   // Note we are overriding the read-only view of the prev map here, via
2812   // the cast.
2813   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2814 }
2815 
2816 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2817   _nextMarkBitMap->clearRange(mr);
2818 }
2819 
2820 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2821   clearRangePrevBitmap(mr);
2822   clearRangeNextBitmap(mr);
2823 }
2824 
2825 HeapRegion*
2826 ConcurrentMark::claim_region(uint worker_id) {
2827   // "checkpoint" the finger
2828   HeapWord* finger = _finger;
2829 
2830   // _heap_end will not change underneath our feet; it only changes at
2831   // yield points.
2832   while (finger < _heap_end) {
2833     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2834 
2835     // Note on how this code handles humongous regions. In the
2836     // normal case the finger will reach the start of a "starts
2837     // humongous" (SH) region. Its end will either be the end of the
2838     // last "continues humongous" (CH) region in the sequence, or the
2839     // standard end of the SH region (if the SH is the only region in
2840     // the sequence). That way claim_region() will skip over the CH
2841     // regions. However, there is a subtle race between a CM thread
2842     // executing this method and a mutator thread doing a humongous
2843     // object allocation. The two are not mutually exclusive as the CM
2844     // thread does not need to hold the Heap_lock when it gets
2845     // here. So there is a chance that claim_region() will come across
2846     // a free region that's in the progress of becoming a SH or a CH
2847     // region. In the former case, it will either
2848     //   a) Miss the update to the region's end, in which case it will
2849     //      visit every subsequent CH region, will find their bitmaps
2850     //      empty, and do nothing, or
2851     //   b) Will observe the update of the region's end (in which case
2852     //      it will skip the subsequent CH regions).
2853     // If it comes across a region that suddenly becomes CH, the
2854     // scenario will be similar to b). So, the race between
2855     // claim_region() and a humongous object allocation might force us
2856     // to do a bit of unnecessary work (due to some unnecessary bitmap
2857     // iterations) but it should not introduce and correctness issues.
2858     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2859     HeapWord*   bottom        = curr_region->bottom();
2860     HeapWord*   end           = curr_region->end();
2861     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2862 
2863     if (verbose_low()) {
2864       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2865                              "["PTR_FORMAT", "PTR_FORMAT"), "
2866                              "limit = "PTR_FORMAT,
2867                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2868     }
2869 
2870     // Is the gap between reading the finger and doing the CAS too long?
2871     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2872     if (res == finger) {
2873       // we succeeded
2874 
2875       // notice that _finger == end cannot be guaranteed here since,
2876       // someone else might have moved the finger even further
2877       assert(_finger >= end, "the finger should have moved forward");
2878 
2879       if (verbose_low()) {
2880         gclog_or_tty->print_cr("[%u] we were successful with region = "
2881                                PTR_FORMAT, worker_id, p2i(curr_region));
2882       }
2883 
2884       if (limit > bottom) {
2885         if (verbose_low()) {
2886           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2887                                  "returning it ", worker_id, p2i(curr_region));
2888         }
2889         return curr_region;
2890       } else {
2891         assert(limit == bottom,
2892                "the region limit should be at bottom");
2893         if (verbose_low()) {
2894           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2895                                  "returning NULL", worker_id, p2i(curr_region));
2896         }
2897         // we return NULL and the caller should try calling
2898         // claim_region() again.
2899         return NULL;
2900       }
2901     } else {
2902       assert(_finger > finger, "the finger should have moved forward");
2903       if (verbose_low()) {
2904         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2905                                "global finger = "PTR_FORMAT", "
2906                                "our finger = "PTR_FORMAT,
2907                                worker_id, p2i(_finger), p2i(finger));
2908       }
2909 
2910       // read it again
2911       finger = _finger;
2912     }
2913   }
2914 
2915   return NULL;
2916 }
2917 
2918 #ifndef PRODUCT
2919 enum VerifyNoCSetOopsPhase {
2920   VerifyNoCSetOopsStack,
2921   VerifyNoCSetOopsQueues,
2922   VerifyNoCSetOopsSATBCompleted,
2923   VerifyNoCSetOopsSATBThread
2924 };
2925 
2926 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2927 private:
2928   G1CollectedHeap* _g1h;
2929   VerifyNoCSetOopsPhase _phase;
2930   int _info;
2931 
2932   const char* phase_str() {
2933     switch (_phase) {
2934     case VerifyNoCSetOopsStack:         return "Stack";
2935     case VerifyNoCSetOopsQueues:        return "Queue";
2936     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2937     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2938     default:                            ShouldNotReachHere();
2939     }
2940     return NULL;
2941   }
2942 
2943   void do_object_work(oop obj) {
2944     guarantee(!_g1h->obj_in_cs(obj),
2945               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2946                       p2i((void*) obj), phase_str(), _info));
2947   }
2948 
2949 public:
2950   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2951 
2952   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2953     _phase = phase;
2954     _info = info;
2955   }
2956 
2957   virtual void do_oop(oop* p) {
2958     oop obj = oopDesc::load_decode_heap_oop(p);
2959     do_object_work(obj);
2960   }
2961 
2962   virtual void do_oop(narrowOop* p) {
2963     // We should not come across narrow oops while scanning marking
2964     // stacks and SATB buffers.
2965     ShouldNotReachHere();
2966   }
2967 
2968   virtual void do_object(oop obj) {
2969     do_object_work(obj);
2970   }
2971 };
2972 
2973 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2974                                          bool verify_enqueued_buffers,
2975                                          bool verify_thread_buffers,
2976                                          bool verify_fingers) {
2977   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2978   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2979     return;
2980   }
2981 
2982   VerifyNoCSetOopsClosure cl;
2983 
2984   if (verify_stacks) {
2985     // Verify entries on the global mark stack
2986     cl.set_phase(VerifyNoCSetOopsStack);
2987     _markStack.oops_do(&cl);
2988 
2989     // Verify entries on the task queues
2990     for (uint i = 0; i < _max_worker_id; i += 1) {
2991       cl.set_phase(VerifyNoCSetOopsQueues, i);
2992       CMTaskQueue* queue = _task_queues->queue(i);
2993       queue->oops_do(&cl);
2994     }
2995   }
2996 
2997   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2998 
2999   // Verify entries on the enqueued SATB buffers
3000   if (verify_enqueued_buffers) {
3001     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3002     satb_qs.iterate_completed_buffers_read_only(&cl);
3003   }
3004 
3005   // Verify entries on the per-thread SATB buffers
3006   if (verify_thread_buffers) {
3007     cl.set_phase(VerifyNoCSetOopsSATBThread);
3008     satb_qs.iterate_thread_buffers_read_only(&cl);
3009   }
3010 
3011   if (verify_fingers) {
3012     // Verify the global finger
3013     HeapWord* global_finger = finger();
3014     if (global_finger != NULL && global_finger < _heap_end) {
3015       // The global finger always points to a heap region boundary. We
3016       // use heap_region_containing_raw() to get the containing region
3017       // given that the global finger could be pointing to a free region
3018       // which subsequently becomes continues humongous. If that
3019       // happens, heap_region_containing() will return the bottom of the
3020       // corresponding starts humongous region and the check below will
3021       // not hold any more.
3022       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3023       guarantee(global_finger == global_hr->bottom(),
3024                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3025                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3026     }
3027 
3028     // Verify the task fingers
3029     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3030     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3031       CMTask* task = _tasks[i];
3032       HeapWord* task_finger = task->finger();
3033       if (task_finger != NULL && task_finger < _heap_end) {
3034         // See above note on the global finger verification.
3035         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3036         guarantee(task_finger == task_hr->bottom() ||
3037                   !task_hr->in_collection_set(),
3038                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3039                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3040       }
3041     }
3042   }
3043 }
3044 #endif // PRODUCT
3045 
3046 // Aggregate the counting data that was constructed concurrently
3047 // with marking.
3048 class AggregateCountDataHRClosure: public HeapRegionClosure {
3049   G1CollectedHeap* _g1h;
3050   ConcurrentMark* _cm;
3051   CardTableModRefBS* _ct_bs;
3052   BitMap* _cm_card_bm;
3053   uint _max_worker_id;
3054 
3055  public:
3056   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3057                               BitMap* cm_card_bm,
3058                               uint max_worker_id) :
3059     _g1h(g1h), _cm(g1h->concurrent_mark()),
3060     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3061     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3062 
3063   bool doHeapRegion(HeapRegion* hr) {
3064     if (hr->continuesHumongous()) {
3065       // We will ignore these here and process them when their
3066       // associated "starts humongous" region is processed.
3067       // Note that we cannot rely on their associated
3068       // "starts humongous" region to have their bit set to 1
3069       // since, due to the region chunking in the parallel region
3070       // iteration, a "continues humongous" region might be visited
3071       // before its associated "starts humongous".
3072       return false;
3073     }
3074 
3075     HeapWord* start = hr->bottom();
3076     HeapWord* limit = hr->next_top_at_mark_start();
3077     HeapWord* end = hr->end();
3078 
3079     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3080            err_msg("Preconditions not met - "
3081                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3082                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3083                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3084 
3085     assert(hr->next_marked_bytes() == 0, "Precondition");
3086 
3087     if (start == limit) {
3088       // NTAMS of this region has not been set so nothing to do.
3089       return false;
3090     }
3091 
3092     // 'start' should be in the heap.
3093     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3094     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3095     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3096 
3097     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3098     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3099     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3100 
3101     // If ntams is not card aligned then we bump card bitmap index
3102     // for limit so that we get the all the cards spanned by
3103     // the object ending at ntams.
3104     // Note: if this is the last region in the heap then ntams
3105     // could be actually just beyond the end of the the heap;
3106     // limit_idx will then  correspond to a (non-existent) card
3107     // that is also outside the heap.
3108     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3109       limit_idx += 1;
3110     }
3111 
3112     assert(limit_idx <= end_idx, "or else use atomics");
3113 
3114     // Aggregate the "stripe" in the count data associated with hr.
3115     uint hrs_index = hr->hrs_index();
3116     size_t marked_bytes = 0;
3117 
3118     for (uint i = 0; i < _max_worker_id; i += 1) {
3119       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3120       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3121 
3122       // Fetch the marked_bytes in this region for task i and
3123       // add it to the running total for this region.
3124       marked_bytes += marked_bytes_array[hrs_index];
3125 
3126       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3127       // into the global card bitmap.
3128       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3129 
3130       while (scan_idx < limit_idx) {
3131         assert(task_card_bm->at(scan_idx) == true, "should be");
3132         _cm_card_bm->set_bit(scan_idx);
3133         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3134 
3135         // BitMap::get_next_one_offset() can handle the case when
3136         // its left_offset parameter is greater than its right_offset
3137         // parameter. It does, however, have an early exit if
3138         // left_offset == right_offset. So let's limit the value
3139         // passed in for left offset here.
3140         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3141         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3142       }
3143     }
3144 
3145     // Update the marked bytes for this region.
3146     hr->add_to_marked_bytes(marked_bytes);
3147 
3148     // Next heap region
3149     return false;
3150   }
3151 };
3152 
3153 class G1AggregateCountDataTask: public AbstractGangTask {
3154 protected:
3155   G1CollectedHeap* _g1h;
3156   ConcurrentMark* _cm;
3157   BitMap* _cm_card_bm;
3158   uint _max_worker_id;
3159   int _active_workers;
3160 
3161 public:
3162   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3163                            ConcurrentMark* cm,
3164                            BitMap* cm_card_bm,
3165                            uint max_worker_id,
3166                            int n_workers) :
3167     AbstractGangTask("Count Aggregation"),
3168     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3169     _max_worker_id(max_worker_id),
3170     _active_workers(n_workers) { }
3171 
3172   void work(uint worker_id) {
3173     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3174 
3175     if (G1CollectedHeap::use_parallel_gc_threads()) {
3176       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3177                                             _active_workers,
3178                                             HeapRegion::AggregateCountClaimValue);
3179     } else {
3180       _g1h->heap_region_iterate(&cl);
3181     }
3182   }
3183 };
3184 
3185 
3186 void ConcurrentMark::aggregate_count_data() {
3187   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3188                         _g1h->workers()->active_workers() :
3189                         1);
3190 
3191   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3192                                            _max_worker_id, n_workers);
3193 
3194   if (G1CollectedHeap::use_parallel_gc_threads()) {
3195     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3196            "sanity check");
3197     _g1h->set_par_threads(n_workers);
3198     _g1h->workers()->run_task(&g1_par_agg_task);
3199     _g1h->set_par_threads(0);
3200 
3201     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3202            "sanity check");
3203     _g1h->reset_heap_region_claim_values();
3204   } else {
3205     g1_par_agg_task.work(0);
3206   }
3207 }
3208 
3209 // Clear the per-worker arrays used to store the per-region counting data
3210 void ConcurrentMark::clear_all_count_data() {
3211   // Clear the global card bitmap - it will be filled during
3212   // liveness count aggregation (during remark) and the
3213   // final counting task.
3214   _card_bm.clear();
3215 
3216   // Clear the global region bitmap - it will be filled as part
3217   // of the final counting task.
3218   _region_bm.clear();
3219 
3220   uint max_regions = _g1h->max_regions();
3221   assert(_max_worker_id > 0, "uninitialized");
3222 
3223   for (uint i = 0; i < _max_worker_id; i += 1) {
3224     BitMap* task_card_bm = count_card_bitmap_for(i);
3225     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3226 
3227     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3228     assert(marked_bytes_array != NULL, "uninitialized");
3229 
3230     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3231     task_card_bm->clear();
3232   }
3233 }
3234 
3235 void ConcurrentMark::print_stats() {
3236   if (verbose_stats()) {
3237     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3238     for (size_t i = 0; i < _active_tasks; ++i) {
3239       _tasks[i]->print_stats();
3240       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3241     }
3242   }
3243 }
3244 
3245 // abandon current marking iteration due to a Full GC
3246 void ConcurrentMark::abort() {
3247   // Clear all marks to force marking thread to do nothing
3248   _nextMarkBitMap->clearAll();
3249 
3250   // Note we cannot clear the previous marking bitmap here
3251   // since VerifyDuringGC verifies the objects marked during
3252   // a full GC against the previous bitmap.
3253 
3254   // Clear the liveness counting data
3255   clear_all_count_data();
3256   // Empty mark stack
3257   reset_marking_state();
3258   for (uint i = 0; i < _max_worker_id; ++i) {
3259     _tasks[i]->clear_region_fields();
3260   }
3261   _first_overflow_barrier_sync.abort();
3262   _second_overflow_barrier_sync.abort();
3263   _has_aborted = true;
3264 
3265   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3266   satb_mq_set.abandon_partial_marking();
3267   // This can be called either during or outside marking, we'll read
3268   // the expected_active value from the SATB queue set.
3269   satb_mq_set.set_active_all_threads(
3270                                  false, /* new active value */
3271                                  satb_mq_set.is_active() /* expected_active */);
3272 
3273   _g1h->trace_heap_after_concurrent_cycle();
3274   _g1h->register_concurrent_cycle_end();
3275 }
3276 
3277 static void print_ms_time_info(const char* prefix, const char* name,
3278                                NumberSeq& ns) {
3279   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3280                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3281   if (ns.num() > 0) {
3282     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3283                            prefix, ns.sd(), ns.maximum());
3284   }
3285 }
3286 
3287 void ConcurrentMark::print_summary_info() {
3288   gclog_or_tty->print_cr(" Concurrent marking:");
3289   print_ms_time_info("  ", "init marks", _init_times);
3290   print_ms_time_info("  ", "remarks", _remark_times);
3291   {
3292     print_ms_time_info("     ", "final marks", _remark_mark_times);
3293     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3294 
3295   }
3296   print_ms_time_info("  ", "cleanups", _cleanup_times);
3297   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3298                          _total_counting_time,
3299                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3300                           (double)_cleanup_times.num()
3301                          : 0.0));
3302   if (G1ScrubRemSets) {
3303     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3304                            _total_rs_scrub_time,
3305                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3306                             (double)_cleanup_times.num()
3307                            : 0.0));
3308   }
3309   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3310                          (_init_times.sum() + _remark_times.sum() +
3311                           _cleanup_times.sum())/1000.0);
3312   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3313                 "(%8.2f s marking).",
3314                 cmThread()->vtime_accum(),
3315                 cmThread()->vtime_mark_accum());
3316 }
3317 
3318 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3319   if (use_parallel_marking_threads()) {
3320     _parallel_workers->print_worker_threads_on(st);
3321   }
3322 }
3323 
3324 void ConcurrentMark::print_on_error(outputStream* st) const {
3325   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3326       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3327   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3328   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3329 }
3330 
3331 // We take a break if someone is trying to stop the world.
3332 bool ConcurrentMark::do_yield_check(uint worker_id) {
3333   if (SuspendibleThreadSet::should_yield()) {
3334     if (worker_id == 0) {
3335       _g1h->g1_policy()->record_concurrent_pause();
3336     }
3337     SuspendibleThreadSet::yield();
3338     return true;
3339   } else {
3340     return false;
3341   }
3342 }
3343 
3344 bool ConcurrentMark::containing_card_is_marked(void* p) {
3345   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3346   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3347 }
3348 
3349 bool ConcurrentMark::containing_cards_are_marked(void* start,
3350                                                  void* last) {
3351   return containing_card_is_marked(start) &&
3352          containing_card_is_marked(last);
3353 }
3354 
3355 #ifndef PRODUCT
3356 // for debugging purposes
3357 void ConcurrentMark::print_finger() {
3358   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3359                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3360   for (uint i = 0; i < _max_worker_id; ++i) {
3361     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3362   }
3363   gclog_or_tty->cr();
3364 }
3365 #endif
3366 
3367 void CMTask::scan_object(oop obj) {
3368   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3369 
3370   if (_cm->verbose_high()) {
3371     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3372                            _worker_id, p2i((void*) obj));
3373   }
3374 
3375   size_t obj_size = obj->size();
3376   _words_scanned += obj_size;
3377 
3378   obj->oop_iterate(_cm_oop_closure);
3379   statsOnly( ++_objs_scanned );
3380   check_limits();
3381 }
3382 
3383 // Closure for iteration over bitmaps
3384 class CMBitMapClosure : public BitMapClosure {
3385 private:
3386   // the bitmap that is being iterated over
3387   CMBitMap*                   _nextMarkBitMap;
3388   ConcurrentMark*             _cm;
3389   CMTask*                     _task;
3390 
3391 public:
3392   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3393     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3394 
3395   bool do_bit(size_t offset) {
3396     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3397     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3398     assert( addr < _cm->finger(), "invariant");
3399 
3400     statsOnly( _task->increase_objs_found_on_bitmap() );
3401     assert(addr >= _task->finger(), "invariant");
3402 
3403     // We move that task's local finger along.
3404     _task->move_finger_to(addr);
3405 
3406     _task->scan_object(oop(addr));
3407     // we only partially drain the local queue and global stack
3408     _task->drain_local_queue(true);
3409     _task->drain_global_stack(true);
3410 
3411     // if the has_aborted flag has been raised, we need to bail out of
3412     // the iteration
3413     return !_task->has_aborted();
3414   }
3415 };
3416 
3417 // Closure for iterating over objects, currently only used for
3418 // processing SATB buffers.
3419 class CMObjectClosure : public ObjectClosure {
3420 private:
3421   CMTask* _task;
3422 
3423 public:
3424   void do_object(oop obj) {
3425     _task->deal_with_reference(obj);
3426   }
3427 
3428   CMObjectClosure(CMTask* task) : _task(task) { }
3429 };
3430 
3431 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3432                                ConcurrentMark* cm,
3433                                CMTask* task)
3434   : _g1h(g1h), _cm(cm), _task(task) {
3435   assert(_ref_processor == NULL, "should be initialized to NULL");
3436 
3437   if (G1UseConcMarkReferenceProcessing) {
3438     _ref_processor = g1h->ref_processor_cm();
3439     assert(_ref_processor != NULL, "should not be NULL");
3440   }
3441 }
3442 
3443 void CMTask::setup_for_region(HeapRegion* hr) {
3444   assert(hr != NULL,
3445         "claim_region() should have filtered out NULL regions");
3446   assert(!hr->continuesHumongous(),
3447         "claim_region() should have filtered out continues humongous regions");
3448 
3449   if (_cm->verbose_low()) {
3450     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3451                            _worker_id, p2i(hr));
3452   }
3453 
3454   _curr_region  = hr;
3455   _finger       = hr->bottom();
3456   update_region_limit();
3457 }
3458 
3459 void CMTask::update_region_limit() {
3460   HeapRegion* hr            = _curr_region;
3461   HeapWord* bottom          = hr->bottom();
3462   HeapWord* limit           = hr->next_top_at_mark_start();
3463 
3464   if (limit == bottom) {
3465     if (_cm->verbose_low()) {
3466       gclog_or_tty->print_cr("[%u] found an empty region "
3467                              "["PTR_FORMAT", "PTR_FORMAT")",
3468                              _worker_id, p2i(bottom), p2i(limit));
3469     }
3470     // The region was collected underneath our feet.
3471     // We set the finger to bottom to ensure that the bitmap
3472     // iteration that will follow this will not do anything.
3473     // (this is not a condition that holds when we set the region up,
3474     // as the region is not supposed to be empty in the first place)
3475     _finger = bottom;
3476   } else if (limit >= _region_limit) {
3477     assert(limit >= _finger, "peace of mind");
3478   } else {
3479     assert(limit < _region_limit, "only way to get here");
3480     // This can happen under some pretty unusual circumstances.  An
3481     // evacuation pause empties the region underneath our feet (NTAMS
3482     // at bottom). We then do some allocation in the region (NTAMS
3483     // stays at bottom), followed by the region being used as a GC
3484     // alloc region (NTAMS will move to top() and the objects
3485     // originally below it will be grayed). All objects now marked in
3486     // the region are explicitly grayed, if below the global finger,
3487     // and we do not need in fact to scan anything else. So, we simply
3488     // set _finger to be limit to ensure that the bitmap iteration
3489     // doesn't do anything.
3490     _finger = limit;
3491   }
3492 
3493   _region_limit = limit;
3494 }
3495 
3496 void CMTask::giveup_current_region() {
3497   assert(_curr_region != NULL, "invariant");
3498   if (_cm->verbose_low()) {
3499     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3500                            _worker_id, p2i(_curr_region));
3501   }
3502   clear_region_fields();
3503 }
3504 
3505 void CMTask::clear_region_fields() {
3506   // Values for these three fields that indicate that we're not
3507   // holding on to a region.
3508   _curr_region   = NULL;
3509   _finger        = NULL;
3510   _region_limit  = NULL;
3511 }
3512 
3513 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3514   if (cm_oop_closure == NULL) {
3515     assert(_cm_oop_closure != NULL, "invariant");
3516   } else {
3517     assert(_cm_oop_closure == NULL, "invariant");
3518   }
3519   _cm_oop_closure = cm_oop_closure;
3520 }
3521 
3522 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3523   guarantee(nextMarkBitMap != NULL, "invariant");
3524 
3525   if (_cm->verbose_low()) {
3526     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3527   }
3528 
3529   _nextMarkBitMap                = nextMarkBitMap;
3530   clear_region_fields();
3531 
3532   _calls                         = 0;
3533   _elapsed_time_ms               = 0.0;
3534   _termination_time_ms           = 0.0;
3535   _termination_start_time_ms     = 0.0;
3536 
3537 #if _MARKING_STATS_
3538   _local_pushes                  = 0;
3539   _local_pops                    = 0;
3540   _local_max_size                = 0;
3541   _objs_scanned                  = 0;
3542   _global_pushes                 = 0;
3543   _global_pops                   = 0;
3544   _global_max_size               = 0;
3545   _global_transfers_to           = 0;
3546   _global_transfers_from         = 0;
3547   _regions_claimed               = 0;
3548   _objs_found_on_bitmap          = 0;
3549   _satb_buffers_processed        = 0;
3550   _steal_attempts                = 0;
3551   _steals                        = 0;
3552   _aborted                       = 0;
3553   _aborted_overflow              = 0;
3554   _aborted_cm_aborted            = 0;
3555   _aborted_yield                 = 0;
3556   _aborted_timed_out             = 0;
3557   _aborted_satb                  = 0;
3558   _aborted_termination           = 0;
3559 #endif // _MARKING_STATS_
3560 }
3561 
3562 bool CMTask::should_exit_termination() {
3563   regular_clock_call();
3564   // This is called when we are in the termination protocol. We should
3565   // quit if, for some reason, this task wants to abort or the global
3566   // stack is not empty (this means that we can get work from it).
3567   return !_cm->mark_stack_empty() || has_aborted();
3568 }
3569 
3570 void CMTask::reached_limit() {
3571   assert(_words_scanned >= _words_scanned_limit ||
3572          _refs_reached >= _refs_reached_limit ,
3573          "shouldn't have been called otherwise");
3574   regular_clock_call();
3575 }
3576 
3577 void CMTask::regular_clock_call() {
3578   if (has_aborted()) return;
3579 
3580   // First, we need to recalculate the words scanned and refs reached
3581   // limits for the next clock call.
3582   recalculate_limits();
3583 
3584   // During the regular clock call we do the following
3585 
3586   // (1) If an overflow has been flagged, then we abort.
3587   if (_cm->has_overflown()) {
3588     set_has_aborted();
3589     return;
3590   }
3591 
3592   // If we are not concurrent (i.e. we're doing remark) we don't need
3593   // to check anything else. The other steps are only needed during
3594   // the concurrent marking phase.
3595   if (!concurrent()) return;
3596 
3597   // (2) If marking has been aborted for Full GC, then we also abort.
3598   if (_cm->has_aborted()) {
3599     set_has_aborted();
3600     statsOnly( ++_aborted_cm_aborted );
3601     return;
3602   }
3603 
3604   double curr_time_ms = os::elapsedVTime() * 1000.0;
3605 
3606   // (3) If marking stats are enabled, then we update the step history.
3607 #if _MARKING_STATS_
3608   if (_words_scanned >= _words_scanned_limit) {
3609     ++_clock_due_to_scanning;
3610   }
3611   if (_refs_reached >= _refs_reached_limit) {
3612     ++_clock_due_to_marking;
3613   }
3614 
3615   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3616   _interval_start_time_ms = curr_time_ms;
3617   _all_clock_intervals_ms.add(last_interval_ms);
3618 
3619   if (_cm->verbose_medium()) {
3620       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3621                         "scanned = %d%s, refs reached = %d%s",
3622                         _worker_id, last_interval_ms,
3623                         _words_scanned,
3624                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3625                         _refs_reached,
3626                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3627   }
3628 #endif // _MARKING_STATS_
3629 
3630   // (4) We check whether we should yield. If we have to, then we abort.
3631   if (SuspendibleThreadSet::should_yield()) {
3632     // We should yield. To do this we abort the task. The caller is
3633     // responsible for yielding.
3634     set_has_aborted();
3635     statsOnly( ++_aborted_yield );
3636     return;
3637   }
3638 
3639   // (5) We check whether we've reached our time quota. If we have,
3640   // then we abort.
3641   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3642   if (elapsed_time_ms > _time_target_ms) {
3643     set_has_aborted();
3644     _has_timed_out = true;
3645     statsOnly( ++_aborted_timed_out );
3646     return;
3647   }
3648 
3649   // (6) Finally, we check whether there are enough completed STAB
3650   // buffers available for processing. If there are, we abort.
3651   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3652   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3653     if (_cm->verbose_low()) {
3654       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3655                              _worker_id);
3656     }
3657     // we do need to process SATB buffers, we'll abort and restart
3658     // the marking task to do so
3659     set_has_aborted();
3660     statsOnly( ++_aborted_satb );
3661     return;
3662   }
3663 }
3664 
3665 void CMTask::recalculate_limits() {
3666   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3667   _words_scanned_limit      = _real_words_scanned_limit;
3668 
3669   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3670   _refs_reached_limit       = _real_refs_reached_limit;
3671 }
3672 
3673 void CMTask::decrease_limits() {
3674   // This is called when we believe that we're going to do an infrequent
3675   // operation which will increase the per byte scanned cost (i.e. move
3676   // entries to/from the global stack). It basically tries to decrease the
3677   // scanning limit so that the clock is called earlier.
3678 
3679   if (_cm->verbose_medium()) {
3680     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3681   }
3682 
3683   _words_scanned_limit = _real_words_scanned_limit -
3684     3 * words_scanned_period / 4;
3685   _refs_reached_limit  = _real_refs_reached_limit -
3686     3 * refs_reached_period / 4;
3687 }
3688 
3689 void CMTask::move_entries_to_global_stack() {
3690   // local array where we'll store the entries that will be popped
3691   // from the local queue
3692   oop buffer[global_stack_transfer_size];
3693 
3694   int n = 0;
3695   oop obj;
3696   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3697     buffer[n] = obj;
3698     ++n;
3699   }
3700 
3701   if (n > 0) {
3702     // we popped at least one entry from the local queue
3703 
3704     statsOnly( ++_global_transfers_to; _local_pops += n );
3705 
3706     if (!_cm->mark_stack_push(buffer, n)) {
3707       if (_cm->verbose_low()) {
3708         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3709                                _worker_id);
3710       }
3711       set_has_aborted();
3712     } else {
3713       // the transfer was successful
3714 
3715       if (_cm->verbose_medium()) {
3716         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3717                                _worker_id, n);
3718       }
3719       statsOnly( int tmp_size = _cm->mark_stack_size();
3720                  if (tmp_size > _global_max_size) {
3721                    _global_max_size = tmp_size;
3722                  }
3723                  _global_pushes += n );
3724     }
3725   }
3726 
3727   // this operation was quite expensive, so decrease the limits
3728   decrease_limits();
3729 }
3730 
3731 void CMTask::get_entries_from_global_stack() {
3732   // local array where we'll store the entries that will be popped
3733   // from the global stack.
3734   oop buffer[global_stack_transfer_size];
3735   int n;
3736   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3737   assert(n <= global_stack_transfer_size,
3738          "we should not pop more than the given limit");
3739   if (n > 0) {
3740     // yes, we did actually pop at least one entry
3741 
3742     statsOnly( ++_global_transfers_from; _global_pops += n );
3743     if (_cm->verbose_medium()) {
3744       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3745                              _worker_id, n);
3746     }
3747     for (int i = 0; i < n; ++i) {
3748       bool success = _task_queue->push(buffer[i]);
3749       // We only call this when the local queue is empty or under a
3750       // given target limit. So, we do not expect this push to fail.
3751       assert(success, "invariant");
3752     }
3753 
3754     statsOnly( int tmp_size = _task_queue->size();
3755                if (tmp_size > _local_max_size) {
3756                  _local_max_size = tmp_size;
3757                }
3758                _local_pushes += n );
3759   }
3760 
3761   // this operation was quite expensive, so decrease the limits
3762   decrease_limits();
3763 }
3764 
3765 void CMTask::drain_local_queue(bool partially) {
3766   if (has_aborted()) return;
3767 
3768   // Decide what the target size is, depending whether we're going to
3769   // drain it partially (so that other tasks can steal if they run out
3770   // of things to do) or totally (at the very end).
3771   size_t target_size;
3772   if (partially) {
3773     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3774   } else {
3775     target_size = 0;
3776   }
3777 
3778   if (_task_queue->size() > target_size) {
3779     if (_cm->verbose_high()) {
3780       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3781                              _worker_id, target_size);
3782     }
3783 
3784     oop obj;
3785     bool ret = _task_queue->pop_local(obj);
3786     while (ret) {
3787       statsOnly( ++_local_pops );
3788 
3789       if (_cm->verbose_high()) {
3790         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3791                                p2i((void*) obj));
3792       }
3793 
3794       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3795       assert(!_g1h->is_on_master_free_list(
3796                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3797 
3798       scan_object(obj);
3799 
3800       if (_task_queue->size() <= target_size || has_aborted()) {
3801         ret = false;
3802       } else {
3803         ret = _task_queue->pop_local(obj);
3804       }
3805     }
3806 
3807     if (_cm->verbose_high()) {
3808       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3809                              _worker_id, _task_queue->size());
3810     }
3811   }
3812 }
3813 
3814 void CMTask::drain_global_stack(bool partially) {
3815   if (has_aborted()) return;
3816 
3817   // We have a policy to drain the local queue before we attempt to
3818   // drain the global stack.
3819   assert(partially || _task_queue->size() == 0, "invariant");
3820 
3821   // Decide what the target size is, depending whether we're going to
3822   // drain it partially (so that other tasks can steal if they run out
3823   // of things to do) or totally (at the very end).  Notice that,
3824   // because we move entries from the global stack in chunks or
3825   // because another task might be doing the same, we might in fact
3826   // drop below the target. But, this is not a problem.
3827   size_t target_size;
3828   if (partially) {
3829     target_size = _cm->partial_mark_stack_size_target();
3830   } else {
3831     target_size = 0;
3832   }
3833 
3834   if (_cm->mark_stack_size() > target_size) {
3835     if (_cm->verbose_low()) {
3836       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3837                              _worker_id, target_size);
3838     }
3839 
3840     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3841       get_entries_from_global_stack();
3842       drain_local_queue(partially);
3843     }
3844 
3845     if (_cm->verbose_low()) {
3846       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3847                              _worker_id, _cm->mark_stack_size());
3848     }
3849   }
3850 }
3851 
3852 // SATB Queue has several assumptions on whether to call the par or
3853 // non-par versions of the methods. this is why some of the code is
3854 // replicated. We should really get rid of the single-threaded version
3855 // of the code to simplify things.
3856 void CMTask::drain_satb_buffers() {
3857   if (has_aborted()) return;
3858 
3859   // We set this so that the regular clock knows that we're in the
3860   // middle of draining buffers and doesn't set the abort flag when it
3861   // notices that SATB buffers are available for draining. It'd be
3862   // very counter productive if it did that. :-)
3863   _draining_satb_buffers = true;
3864 
3865   CMObjectClosure oc(this);
3866   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3867   if (G1CollectedHeap::use_parallel_gc_threads()) {
3868     satb_mq_set.set_par_closure(_worker_id, &oc);
3869   } else {
3870     satb_mq_set.set_closure(&oc);
3871   }
3872 
3873   // This keeps claiming and applying the closure to completed buffers
3874   // until we run out of buffers or we need to abort.
3875   if (G1CollectedHeap::use_parallel_gc_threads()) {
3876     while (!has_aborted() &&
3877            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3878       if (_cm->verbose_medium()) {
3879         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3880       }
3881       statsOnly( ++_satb_buffers_processed );
3882       regular_clock_call();
3883     }
3884   } else {
3885     while (!has_aborted() &&
3886            satb_mq_set.apply_closure_to_completed_buffer()) {
3887       if (_cm->verbose_medium()) {
3888         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3889       }
3890       statsOnly( ++_satb_buffers_processed );
3891       regular_clock_call();
3892     }
3893   }
3894 
3895   if (!concurrent() && !has_aborted()) {
3896     // We should only do this during remark.
3897     if (G1CollectedHeap::use_parallel_gc_threads()) {
3898       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3899     } else {
3900       satb_mq_set.iterate_closure_all_threads();
3901     }
3902   }
3903 
3904   _draining_satb_buffers = false;
3905 
3906   assert(has_aborted() ||
3907          concurrent() ||
3908          satb_mq_set.completed_buffers_num() == 0, "invariant");
3909 
3910   if (G1CollectedHeap::use_parallel_gc_threads()) {
3911     satb_mq_set.set_par_closure(_worker_id, NULL);
3912   } else {
3913     satb_mq_set.set_closure(NULL);
3914   }
3915 
3916   // again, this was a potentially expensive operation, decrease the
3917   // limits to get the regular clock call early
3918   decrease_limits();
3919 }
3920 
3921 void CMTask::print_stats() {
3922   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3923                          _worker_id, _calls);
3924   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3925                          _elapsed_time_ms, _termination_time_ms);
3926   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3927                          _step_times_ms.num(), _step_times_ms.avg(),
3928                          _step_times_ms.sd());
3929   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3930                          _step_times_ms.maximum(), _step_times_ms.sum());
3931 
3932 #if _MARKING_STATS_
3933   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3934                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3935                          _all_clock_intervals_ms.sd());
3936   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3937                          _all_clock_intervals_ms.maximum(),
3938                          _all_clock_intervals_ms.sum());
3939   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3940                          _clock_due_to_scanning, _clock_due_to_marking);
3941   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3942                          _objs_scanned, _objs_found_on_bitmap);
3943   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3944                          _local_pushes, _local_pops, _local_max_size);
3945   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3946                          _global_pushes, _global_pops, _global_max_size);
3947   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3948                          _global_transfers_to,_global_transfers_from);
3949   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3950   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3951   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3952                          _steal_attempts, _steals);
3953   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3954   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3955                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3956   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3957                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3958 #endif // _MARKING_STATS_
3959 }
3960 
3961 /*****************************************************************************
3962 
3963     The do_marking_step(time_target_ms, ...) method is the building
3964     block of the parallel marking framework. It can be called in parallel
3965     with other invocations of do_marking_step() on different tasks
3966     (but only one per task, obviously) and concurrently with the
3967     mutator threads, or during remark, hence it eliminates the need
3968     for two versions of the code. When called during remark, it will
3969     pick up from where the task left off during the concurrent marking
3970     phase. Interestingly, tasks are also claimable during evacuation
3971     pauses too, since do_marking_step() ensures that it aborts before
3972     it needs to yield.
3973 
3974     The data structures that it uses to do marking work are the
3975     following:
3976 
3977       (1) Marking Bitmap. If there are gray objects that appear only
3978       on the bitmap (this happens either when dealing with an overflow
3979       or when the initial marking phase has simply marked the roots
3980       and didn't push them on the stack), then tasks claim heap
3981       regions whose bitmap they then scan to find gray objects. A
3982       global finger indicates where the end of the last claimed region
3983       is. A local finger indicates how far into the region a task has
3984       scanned. The two fingers are used to determine how to gray an
3985       object (i.e. whether simply marking it is OK, as it will be
3986       visited by a task in the future, or whether it needs to be also
3987       pushed on a stack).
3988 
3989       (2) Local Queue. The local queue of the task which is accessed
3990       reasonably efficiently by the task. Other tasks can steal from
3991       it when they run out of work. Throughout the marking phase, a
3992       task attempts to keep its local queue short but not totally
3993       empty, so that entries are available for stealing by other
3994       tasks. Only when there is no more work, a task will totally
3995       drain its local queue.
3996 
3997       (3) Global Mark Stack. This handles local queue overflow. During
3998       marking only sets of entries are moved between it and the local
3999       queues, as access to it requires a mutex and more fine-grain
4000       interaction with it which might cause contention. If it
4001       overflows, then the marking phase should restart and iterate
4002       over the bitmap to identify gray objects. Throughout the marking
4003       phase, tasks attempt to keep the global mark stack at a small
4004       length but not totally empty, so that entries are available for
4005       popping by other tasks. Only when there is no more work, tasks
4006       will totally drain the global mark stack.
4007 
4008       (4) SATB Buffer Queue. This is where completed SATB buffers are
4009       made available. Buffers are regularly removed from this queue
4010       and scanned for roots, so that the queue doesn't get too
4011       long. During remark, all completed buffers are processed, as
4012       well as the filled in parts of any uncompleted buffers.
4013 
4014     The do_marking_step() method tries to abort when the time target
4015     has been reached. There are a few other cases when the
4016     do_marking_step() method also aborts:
4017 
4018       (1) When the marking phase has been aborted (after a Full GC).
4019 
4020       (2) When a global overflow (on the global stack) has been
4021       triggered. Before the task aborts, it will actually sync up with
4022       the other tasks to ensure that all the marking data structures
4023       (local queues, stacks, fingers etc.)  are re-initialized so that
4024       when do_marking_step() completes, the marking phase can
4025       immediately restart.
4026 
4027       (3) When enough completed SATB buffers are available. The
4028       do_marking_step() method only tries to drain SATB buffers right
4029       at the beginning. So, if enough buffers are available, the
4030       marking step aborts and the SATB buffers are processed at
4031       the beginning of the next invocation.
4032 
4033       (4) To yield. when we have to yield then we abort and yield
4034       right at the end of do_marking_step(). This saves us from a lot
4035       of hassle as, by yielding we might allow a Full GC. If this
4036       happens then objects will be compacted underneath our feet, the
4037       heap might shrink, etc. We save checking for this by just
4038       aborting and doing the yield right at the end.
4039 
4040     From the above it follows that the do_marking_step() method should
4041     be called in a loop (or, otherwise, regularly) until it completes.
4042 
4043     If a marking step completes without its has_aborted() flag being
4044     true, it means it has completed the current marking phase (and
4045     also all other marking tasks have done so and have all synced up).
4046 
4047     A method called regular_clock_call() is invoked "regularly" (in
4048     sub ms intervals) throughout marking. It is this clock method that
4049     checks all the abort conditions which were mentioned above and
4050     decides when the task should abort. A work-based scheme is used to
4051     trigger this clock method: when the number of object words the
4052     marking phase has scanned or the number of references the marking
4053     phase has visited reach a given limit. Additional invocations to
4054     the method clock have been planted in a few other strategic places
4055     too. The initial reason for the clock method was to avoid calling
4056     vtime too regularly, as it is quite expensive. So, once it was in
4057     place, it was natural to piggy-back all the other conditions on it
4058     too and not constantly check them throughout the code.
4059 
4060     If do_termination is true then do_marking_step will enter its
4061     termination protocol.
4062 
4063     The value of is_serial must be true when do_marking_step is being
4064     called serially (i.e. by the VMThread) and do_marking_step should
4065     skip any synchronization in the termination and overflow code.
4066     Examples include the serial remark code and the serial reference
4067     processing closures.
4068 
4069     The value of is_serial must be false when do_marking_step is
4070     being called by any of the worker threads in a work gang.
4071     Examples include the concurrent marking code (CMMarkingTask),
4072     the MT remark code, and the MT reference processing closures.
4073 
4074  *****************************************************************************/
4075 
4076 void CMTask::do_marking_step(double time_target_ms,
4077                              bool do_termination,
4078                              bool is_serial) {
4079   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4080   assert(concurrent() == _cm->concurrent(), "they should be the same");
4081 
4082   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4083   assert(_task_queues != NULL, "invariant");
4084   assert(_task_queue != NULL, "invariant");
4085   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4086 
4087   assert(!_claimed,
4088          "only one thread should claim this task at any one time");
4089 
4090   // OK, this doesn't safeguard again all possible scenarios, as it is
4091   // possible for two threads to set the _claimed flag at the same
4092   // time. But it is only for debugging purposes anyway and it will
4093   // catch most problems.
4094   _claimed = true;
4095 
4096   _start_time_ms = os::elapsedVTime() * 1000.0;
4097   statsOnly( _interval_start_time_ms = _start_time_ms );
4098 
4099   // If do_stealing is true then do_marking_step will attempt to
4100   // steal work from the other CMTasks. It only makes sense to
4101   // enable stealing when the termination protocol is enabled
4102   // and do_marking_step() is not being called serially.
4103   bool do_stealing = do_termination && !is_serial;
4104 
4105   double diff_prediction_ms =
4106     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4107   _time_target_ms = time_target_ms - diff_prediction_ms;
4108 
4109   // set up the variables that are used in the work-based scheme to
4110   // call the regular clock method
4111   _words_scanned = 0;
4112   _refs_reached  = 0;
4113   recalculate_limits();
4114 
4115   // clear all flags
4116   clear_has_aborted();
4117   _has_timed_out = false;
4118   _draining_satb_buffers = false;
4119 
4120   ++_calls;
4121 
4122   if (_cm->verbose_low()) {
4123     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4124                            "target = %1.2lfms >>>>>>>>>>",
4125                            _worker_id, _calls, _time_target_ms);
4126   }
4127 
4128   // Set up the bitmap and oop closures. Anything that uses them is
4129   // eventually called from this method, so it is OK to allocate these
4130   // statically.
4131   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4132   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4133   set_cm_oop_closure(&cm_oop_closure);
4134 
4135   if (_cm->has_overflown()) {
4136     // This can happen if the mark stack overflows during a GC pause
4137     // and this task, after a yield point, restarts. We have to abort
4138     // as we need to get into the overflow protocol which happens
4139     // right at the end of this task.
4140     set_has_aborted();
4141   }
4142 
4143   // First drain any available SATB buffers. After this, we will not
4144   // look at SATB buffers before the next invocation of this method.
4145   // If enough completed SATB buffers are queued up, the regular clock
4146   // will abort this task so that it restarts.
4147   drain_satb_buffers();
4148   // ...then partially drain the local queue and the global stack
4149   drain_local_queue(true);
4150   drain_global_stack(true);
4151 
4152   do {
4153     if (!has_aborted() && _curr_region != NULL) {
4154       // This means that we're already holding on to a region.
4155       assert(_finger != NULL, "if region is not NULL, then the finger "
4156              "should not be NULL either");
4157 
4158       // We might have restarted this task after an evacuation pause
4159       // which might have evacuated the region we're holding on to
4160       // underneath our feet. Let's read its limit again to make sure
4161       // that we do not iterate over a region of the heap that
4162       // contains garbage (update_region_limit() will also move
4163       // _finger to the start of the region if it is found empty).
4164       update_region_limit();
4165       // We will start from _finger not from the start of the region,
4166       // as we might be restarting this task after aborting half-way
4167       // through scanning this region. In this case, _finger points to
4168       // the address where we last found a marked object. If this is a
4169       // fresh region, _finger points to start().
4170       MemRegion mr = MemRegion(_finger, _region_limit);
4171 
4172       if (_cm->verbose_low()) {
4173         gclog_or_tty->print_cr("[%u] we're scanning part "
4174                                "["PTR_FORMAT", "PTR_FORMAT") "
4175                                "of region "HR_FORMAT,
4176                                _worker_id, p2i(_finger), p2i(_region_limit),
4177                                HR_FORMAT_PARAMS(_curr_region));
4178       }
4179 
4180       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4181              "humongous regions should go around loop once only");
4182 
4183       // Some special cases:
4184       // If the memory region is empty, we can just give up the region.
4185       // If the current region is humongous then we only need to check
4186       // the bitmap for the bit associated with the start of the object,
4187       // scan the object if it's live, and give up the region.
4188       // Otherwise, let's iterate over the bitmap of the part of the region
4189       // that is left.
4190       // If the iteration is successful, give up the region.
4191       if (mr.is_empty()) {
4192         giveup_current_region();
4193         regular_clock_call();
4194       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4195         if (_nextMarkBitMap->isMarked(mr.start())) {
4196           // The object is marked - apply the closure
4197           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4198           bitmap_closure.do_bit(offset);
4199         }
4200         // Even if this task aborted while scanning the humongous object
4201         // we can (and should) give up the current region.
4202         giveup_current_region();
4203         regular_clock_call();
4204       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4205         giveup_current_region();
4206         regular_clock_call();
4207       } else {
4208         assert(has_aborted(), "currently the only way to do so");
4209         // The only way to abort the bitmap iteration is to return
4210         // false from the do_bit() method. However, inside the
4211         // do_bit() method we move the _finger to point to the
4212         // object currently being looked at. So, if we bail out, we
4213         // have definitely set _finger to something non-null.
4214         assert(_finger != NULL, "invariant");
4215 
4216         // Region iteration was actually aborted. So now _finger
4217         // points to the address of the object we last scanned. If we
4218         // leave it there, when we restart this task, we will rescan
4219         // the object. It is easy to avoid this. We move the finger by
4220         // enough to point to the next possible object header (the
4221         // bitmap knows by how much we need to move it as it knows its
4222         // granularity).
4223         assert(_finger < _region_limit, "invariant");
4224         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4225         // Check if bitmap iteration was aborted while scanning the last object
4226         if (new_finger >= _region_limit) {
4227           giveup_current_region();
4228         } else {
4229           move_finger_to(new_finger);
4230         }
4231       }
4232     }
4233     // At this point we have either completed iterating over the
4234     // region we were holding on to, or we have aborted.
4235 
4236     // We then partially drain the local queue and the global stack.
4237     // (Do we really need this?)
4238     drain_local_queue(true);
4239     drain_global_stack(true);
4240 
4241     // Read the note on the claim_region() method on why it might
4242     // return NULL with potentially more regions available for
4243     // claiming and why we have to check out_of_regions() to determine
4244     // whether we're done or not.
4245     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4246       // We are going to try to claim a new region. We should have
4247       // given up on the previous one.
4248       // Separated the asserts so that we know which one fires.
4249       assert(_curr_region  == NULL, "invariant");
4250       assert(_finger       == NULL, "invariant");
4251       assert(_region_limit == NULL, "invariant");
4252       if (_cm->verbose_low()) {
4253         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4254       }
4255       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4256       if (claimed_region != NULL) {
4257         // Yes, we managed to claim one
4258         statsOnly( ++_regions_claimed );
4259 
4260         if (_cm->verbose_low()) {
4261           gclog_or_tty->print_cr("[%u] we successfully claimed "
4262                                  "region "PTR_FORMAT,
4263                                  _worker_id, p2i(claimed_region));
4264         }
4265 
4266         setup_for_region(claimed_region);
4267         assert(_curr_region == claimed_region, "invariant");
4268       }
4269       // It is important to call the regular clock here. It might take
4270       // a while to claim a region if, for example, we hit a large
4271       // block of empty regions. So we need to call the regular clock
4272       // method once round the loop to make sure it's called
4273       // frequently enough.
4274       regular_clock_call();
4275     }
4276 
4277     if (!has_aborted() && _curr_region == NULL) {
4278       assert(_cm->out_of_regions(),
4279              "at this point we should be out of regions");
4280     }
4281   } while ( _curr_region != NULL && !has_aborted());
4282 
4283   if (!has_aborted()) {
4284     // We cannot check whether the global stack is empty, since other
4285     // tasks might be pushing objects to it concurrently.
4286     assert(_cm->out_of_regions(),
4287            "at this point we should be out of regions");
4288 
4289     if (_cm->verbose_low()) {
4290       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4291     }
4292 
4293     // Try to reduce the number of available SATB buffers so that
4294     // remark has less work to do.
4295     drain_satb_buffers();
4296   }
4297 
4298   // Since we've done everything else, we can now totally drain the
4299   // local queue and global stack.
4300   drain_local_queue(false);
4301   drain_global_stack(false);
4302 
4303   // Attempt at work stealing from other task's queues.
4304   if (do_stealing && !has_aborted()) {
4305     // We have not aborted. This means that we have finished all that
4306     // we could. Let's try to do some stealing...
4307 
4308     // We cannot check whether the global stack is empty, since other
4309     // tasks might be pushing objects to it concurrently.
4310     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4311            "only way to reach here");
4312 
4313     if (_cm->verbose_low()) {
4314       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4315     }
4316 
4317     while (!has_aborted()) {
4318       oop obj;
4319       statsOnly( ++_steal_attempts );
4320 
4321       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4322         if (_cm->verbose_medium()) {
4323           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4324                                  _worker_id, p2i((void*) obj));
4325         }
4326 
4327         statsOnly( ++_steals );
4328 
4329         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4330                "any stolen object should be marked");
4331         scan_object(obj);
4332 
4333         // And since we're towards the end, let's totally drain the
4334         // local queue and global stack.
4335         drain_local_queue(false);
4336         drain_global_stack(false);
4337       } else {
4338         break;
4339       }
4340     }
4341   }
4342 
4343   // If we are about to wrap up and go into termination, check if we
4344   // should raise the overflow flag.
4345   if (do_termination && !has_aborted()) {
4346     if (_cm->force_overflow()->should_force()) {
4347       _cm->set_has_overflown();
4348       regular_clock_call();
4349     }
4350   }
4351 
4352   // We still haven't aborted. Now, let's try to get into the
4353   // termination protocol.
4354   if (do_termination && !has_aborted()) {
4355     // We cannot check whether the global stack is empty, since other
4356     // tasks might be concurrently pushing objects on it.
4357     // Separated the asserts so that we know which one fires.
4358     assert(_cm->out_of_regions(), "only way to reach here");
4359     assert(_task_queue->size() == 0, "only way to reach here");
4360 
4361     if (_cm->verbose_low()) {
4362       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4363     }
4364 
4365     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4366 
4367     // The CMTask class also extends the TerminatorTerminator class,
4368     // hence its should_exit_termination() method will also decide
4369     // whether to exit the termination protocol or not.
4370     bool finished = (is_serial ||
4371                      _cm->terminator()->offer_termination(this));
4372     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4373     _termination_time_ms +=
4374       termination_end_time_ms - _termination_start_time_ms;
4375 
4376     if (finished) {
4377       // We're all done.
4378 
4379       if (_worker_id == 0) {
4380         // let's allow task 0 to do this
4381         if (concurrent()) {
4382           assert(_cm->concurrent_marking_in_progress(), "invariant");
4383           // we need to set this to false before the next
4384           // safepoint. This way we ensure that the marking phase
4385           // doesn't observe any more heap expansions.
4386           _cm->clear_concurrent_marking_in_progress();
4387         }
4388       }
4389 
4390       // We can now guarantee that the global stack is empty, since
4391       // all other tasks have finished. We separated the guarantees so
4392       // that, if a condition is false, we can immediately find out
4393       // which one.
4394       guarantee(_cm->out_of_regions(), "only way to reach here");
4395       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4396       guarantee(_task_queue->size() == 0, "only way to reach here");
4397       guarantee(!_cm->has_overflown(), "only way to reach here");
4398       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4399 
4400       if (_cm->verbose_low()) {
4401         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4402       }
4403     } else {
4404       // Apparently there's more work to do. Let's abort this task. It
4405       // will restart it and we can hopefully find more things to do.
4406 
4407       if (_cm->verbose_low()) {
4408         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4409                                _worker_id);
4410       }
4411 
4412       set_has_aborted();
4413       statsOnly( ++_aborted_termination );
4414     }
4415   }
4416 
4417   // Mainly for debugging purposes to make sure that a pointer to the
4418   // closure which was statically allocated in this frame doesn't
4419   // escape it by accident.
4420   set_cm_oop_closure(NULL);
4421   double end_time_ms = os::elapsedVTime() * 1000.0;
4422   double elapsed_time_ms = end_time_ms - _start_time_ms;
4423   // Update the step history.
4424   _step_times_ms.add(elapsed_time_ms);
4425 
4426   if (has_aborted()) {
4427     // The task was aborted for some reason.
4428 
4429     statsOnly( ++_aborted );
4430 
4431     if (_has_timed_out) {
4432       double diff_ms = elapsed_time_ms - _time_target_ms;
4433       // Keep statistics of how well we did with respect to hitting
4434       // our target only if we actually timed out (if we aborted for
4435       // other reasons, then the results might get skewed).
4436       _marking_step_diffs_ms.add(diff_ms);
4437     }
4438 
4439     if (_cm->has_overflown()) {
4440       // This is the interesting one. We aborted because a global
4441       // overflow was raised. This means we have to restart the
4442       // marking phase and start iterating over regions. However, in
4443       // order to do this we have to make sure that all tasks stop
4444       // what they are doing and re-initialize in a safe manner. We
4445       // will achieve this with the use of two barrier sync points.
4446 
4447       if (_cm->verbose_low()) {
4448         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4449       }
4450 
4451       if (!is_serial) {
4452         // We only need to enter the sync barrier if being called
4453         // from a parallel context
4454         _cm->enter_first_sync_barrier(_worker_id);
4455 
4456         // When we exit this sync barrier we know that all tasks have
4457         // stopped doing marking work. So, it's now safe to
4458         // re-initialize our data structures. At the end of this method,
4459         // task 0 will clear the global data structures.
4460       }
4461 
4462       statsOnly( ++_aborted_overflow );
4463 
4464       // We clear the local state of this task...
4465       clear_region_fields();
4466 
4467       if (!is_serial) {
4468         // ...and enter the second barrier.
4469         _cm->enter_second_sync_barrier(_worker_id);
4470       }
4471       // At this point, if we're during the concurrent phase of
4472       // marking, everything has been re-initialized and we're
4473       // ready to restart.
4474     }
4475 
4476     if (_cm->verbose_low()) {
4477       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4478                              "elapsed = %1.2lfms <<<<<<<<<<",
4479                              _worker_id, _time_target_ms, elapsed_time_ms);
4480       if (_cm->has_aborted()) {
4481         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4482                                _worker_id);
4483       }
4484     }
4485   } else {
4486     if (_cm->verbose_low()) {
4487       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4488                              "elapsed = %1.2lfms <<<<<<<<<<",
4489                              _worker_id, _time_target_ms, elapsed_time_ms);
4490     }
4491   }
4492 
4493   _claimed = false;
4494 }
4495 
4496 CMTask::CMTask(uint worker_id,
4497                ConcurrentMark* cm,
4498                size_t* marked_bytes,
4499                BitMap* card_bm,
4500                CMTaskQueue* task_queue,
4501                CMTaskQueueSet* task_queues)
4502   : _g1h(G1CollectedHeap::heap()),
4503     _worker_id(worker_id), _cm(cm),
4504     _claimed(false),
4505     _nextMarkBitMap(NULL), _hash_seed(17),
4506     _task_queue(task_queue),
4507     _task_queues(task_queues),
4508     _cm_oop_closure(NULL),
4509     _marked_bytes_array(marked_bytes),
4510     _card_bm(card_bm) {
4511   guarantee(task_queue != NULL, "invariant");
4512   guarantee(task_queues != NULL, "invariant");
4513 
4514   statsOnly( _clock_due_to_scanning = 0;
4515              _clock_due_to_marking  = 0 );
4516 
4517   _marking_step_diffs_ms.add(0.5);
4518 }
4519 
4520 // These are formatting macros that are used below to ensure
4521 // consistent formatting. The *_H_* versions are used to format the
4522 // header for a particular value and they should be kept consistent
4523 // with the corresponding macro. Also note that most of the macros add
4524 // the necessary white space (as a prefix) which makes them a bit
4525 // easier to compose.
4526 
4527 // All the output lines are prefixed with this string to be able to
4528 // identify them easily in a large log file.
4529 #define G1PPRL_LINE_PREFIX            "###"
4530 
4531 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4532 #ifdef _LP64
4533 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4534 #else // _LP64
4535 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4536 #endif // _LP64
4537 
4538 // For per-region info
4539 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4540 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4541 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4542 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4543 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4544 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4545 
4546 // For summary info
4547 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4548 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4549 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4550 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4551 
4552 G1PrintRegionLivenessInfoClosure::
4553 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4554   : _out(out),
4555     _total_used_bytes(0), _total_capacity_bytes(0),
4556     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4557     _hum_used_bytes(0), _hum_capacity_bytes(0),
4558     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4559     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4560   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4561   MemRegion g1_committed = g1h->g1_committed();
4562   MemRegion g1_reserved = g1h->g1_reserved();
4563   double now = os::elapsedTime();
4564 
4565   // Print the header of the output.
4566   _out->cr();
4567   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4568   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4569                  G1PPRL_SUM_ADDR_FORMAT("committed")
4570                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4571                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4572                  p2i(g1_committed.start()), p2i(g1_committed.end()),
4573                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4574                  HeapRegion::GrainBytes);
4575   _out->print_cr(G1PPRL_LINE_PREFIX);
4576   _out->print_cr(G1PPRL_LINE_PREFIX
4577                 G1PPRL_TYPE_H_FORMAT
4578                 G1PPRL_ADDR_BASE_H_FORMAT
4579                 G1PPRL_BYTE_H_FORMAT
4580                 G1PPRL_BYTE_H_FORMAT
4581                 G1PPRL_BYTE_H_FORMAT
4582                 G1PPRL_DOUBLE_H_FORMAT
4583                 G1PPRL_BYTE_H_FORMAT
4584                 G1PPRL_BYTE_H_FORMAT,
4585                 "type", "address-range",
4586                 "used", "prev-live", "next-live", "gc-eff",
4587                 "remset", "code-roots");
4588   _out->print_cr(G1PPRL_LINE_PREFIX
4589                 G1PPRL_TYPE_H_FORMAT
4590                 G1PPRL_ADDR_BASE_H_FORMAT
4591                 G1PPRL_BYTE_H_FORMAT
4592                 G1PPRL_BYTE_H_FORMAT
4593                 G1PPRL_BYTE_H_FORMAT
4594                 G1PPRL_DOUBLE_H_FORMAT
4595                 G1PPRL_BYTE_H_FORMAT
4596                 G1PPRL_BYTE_H_FORMAT,
4597                 "", "",
4598                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4599                 "(bytes)", "(bytes)");
4600 }
4601 
4602 // It takes as a parameter a reference to one of the _hum_* fields, it
4603 // deduces the corresponding value for a region in a humongous region
4604 // series (either the region size, or what's left if the _hum_* field
4605 // is < the region size), and updates the _hum_* field accordingly.
4606 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4607   size_t bytes = 0;
4608   // The > 0 check is to deal with the prev and next live bytes which
4609   // could be 0.
4610   if (*hum_bytes > 0) {
4611     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4612     *hum_bytes -= bytes;
4613   }
4614   return bytes;
4615 }
4616 
4617 // It deduces the values for a region in a humongous region series
4618 // from the _hum_* fields and updates those accordingly. It assumes
4619 // that that _hum_* fields have already been set up from the "starts
4620 // humongous" region and we visit the regions in address order.
4621 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4622                                                      size_t* capacity_bytes,
4623                                                      size_t* prev_live_bytes,
4624                                                      size_t* next_live_bytes) {
4625   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4626   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4627   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4628   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4629   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4630 }
4631 
4632 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4633   const char* type = "";
4634   HeapWord* bottom       = r->bottom();
4635   HeapWord* end          = r->end();
4636   size_t capacity_bytes  = r->capacity();
4637   size_t used_bytes      = r->used();
4638   size_t prev_live_bytes = r->live_bytes();
4639   size_t next_live_bytes = r->next_live_bytes();
4640   double gc_eff          = r->gc_efficiency();
4641   size_t remset_bytes    = r->rem_set()->mem_size();
4642   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4643 
4644   if (r->used() == 0) {
4645     type = "FREE";
4646   } else if (r->is_survivor()) {
4647     type = "SURV";
4648   } else if (r->is_young()) {
4649     type = "EDEN";
4650   } else if (r->startsHumongous()) {
4651     type = "HUMS";
4652 
4653     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4654            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4655            "they should have been zeroed after the last time we used them");
4656     // Set up the _hum_* fields.
4657     _hum_capacity_bytes  = capacity_bytes;
4658     _hum_used_bytes      = used_bytes;
4659     _hum_prev_live_bytes = prev_live_bytes;
4660     _hum_next_live_bytes = next_live_bytes;
4661     get_hum_bytes(&used_bytes, &capacity_bytes,
4662                   &prev_live_bytes, &next_live_bytes);
4663     end = bottom + HeapRegion::GrainWords;
4664   } else if (r->continuesHumongous()) {
4665     type = "HUMC";
4666     get_hum_bytes(&used_bytes, &capacity_bytes,
4667                   &prev_live_bytes, &next_live_bytes);
4668     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4669   } else {
4670     type = "OLD";
4671   }
4672 
4673   _total_used_bytes      += used_bytes;
4674   _total_capacity_bytes  += capacity_bytes;
4675   _total_prev_live_bytes += prev_live_bytes;
4676   _total_next_live_bytes += next_live_bytes;
4677   _total_remset_bytes    += remset_bytes;
4678   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4679 
4680   // Print a line for this particular region.
4681   _out->print_cr(G1PPRL_LINE_PREFIX
4682                  G1PPRL_TYPE_FORMAT
4683                  G1PPRL_ADDR_BASE_FORMAT
4684                  G1PPRL_BYTE_FORMAT
4685                  G1PPRL_BYTE_FORMAT
4686                  G1PPRL_BYTE_FORMAT
4687                  G1PPRL_DOUBLE_FORMAT
4688                  G1PPRL_BYTE_FORMAT
4689                  G1PPRL_BYTE_FORMAT,
4690                  type, p2i(bottom), p2i(end),
4691                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4692                  remset_bytes, strong_code_roots_bytes);
4693 
4694   return false;
4695 }
4696 
4697 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4698   // add static memory usages to remembered set sizes
4699   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4700   // Print the footer of the output.
4701   _out->print_cr(G1PPRL_LINE_PREFIX);
4702   _out->print_cr(G1PPRL_LINE_PREFIX
4703                  " SUMMARY"
4704                  G1PPRL_SUM_MB_FORMAT("capacity")
4705                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4706                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4707                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4708                  G1PPRL_SUM_MB_FORMAT("remset")
4709                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4710                  bytes_to_mb(_total_capacity_bytes),
4711                  bytes_to_mb(_total_used_bytes),
4712                  perc(_total_used_bytes, _total_capacity_bytes),
4713                  bytes_to_mb(_total_prev_live_bytes),
4714                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4715                  bytes_to_mb(_total_next_live_bytes),
4716                  perc(_total_next_live_bytes, _total_capacity_bytes),
4717                  bytes_to_mb(_total_remset_bytes),
4718                  bytes_to_mb(_total_strong_code_roots_bytes));
4719   _out->cr();
4720 }