1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "runtime/prefetch.inline.hpp"
  49 #include "services/memTracker.hpp"
  50 
  51 // Concurrent marking bit map wrapper
  52 
  53 CMBitMapRO::CMBitMapRO(int shifter) :
  54   _bm(),
  55   _shifter(shifter) {
  56   _bmStartWord = 0;
  57   _bmWordSize = 0;
  58 }
  59 
  60 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  61                                                HeapWord* limit) const {
  62   // First we must round addr *up* to a possible object boundary.
  63   addr = (HeapWord*)align_size_up((intptr_t)addr,
  64                                   HeapWordSize << _shifter);
  65   size_t addrOffset = heapWordToOffset(addr);
  66   if (limit == NULL) {
  67     limit = _bmStartWord + _bmWordSize;
  68   }
  69   size_t limitOffset = heapWordToOffset(limit);
  70   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  71   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  72   assert(nextAddr >= addr, "get_next_one postcondition");
  73   assert(nextAddr == limit || isMarked(nextAddr),
  74          "get_next_one postcondition");
  75   return nextAddr;
  76 }
  77 
  78 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  79                                                  HeapWord* limit) const {
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || !isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  94   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  95   return (int) (diff >> _shifter);
  96 }
  97 
  98 #ifndef PRODUCT
  99 bool CMBitMapRO::covers(ReservedSpace heap_rs) const {
 100   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 101   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 102          "size inconsistency");
 103   return _bmStartWord == (HeapWord*)(heap_rs.base()) &&
 104          _bmWordSize  == heap_rs.size()>>LogHeapWordSize;
 105 }
 106 #endif
 107 
 108 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 109   _bm.print_on_error(st, prefix);
 110 }
 111 
 112 bool CMBitMap::allocate(ReservedSpace heap_rs) {
 113   _bmStartWord = (HeapWord*)(heap_rs.base());
 114   _bmWordSize  = heap_rs.size()/HeapWordSize;    // heap_rs.size() is in bytes
 115   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
 116                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
 117   if (!brs.is_reserved()) {
 118     warning("ConcurrentMark marking bit map allocation failure");
 119     return false;
 120   }
 121   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
 122   // For now we'll just commit all of the bit map up front.
 123   // Later on we'll try to be more parsimonious with swap.
 124   if (!_virtual_space.initialize(brs, brs.size())) {
 125     warning("ConcurrentMark marking bit map backing store failure");
 126     return false;
 127   }
 128   assert(_virtual_space.committed_size() == brs.size(),
 129          "didn't reserve backing store for all of concurrent marking bit map?");
 130   _bm.set_map((uintptr_t*)_virtual_space.low());
 131   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
 132          _bmWordSize, "inconsistency in bit map sizing");
 133   _bm.set_size(_bmWordSize >> _shifter);
 134   return true;
 135 }
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 bool CMMarkStack::allocate(size_t capacity) {
 184   // allocate a stack of the requisite depth
 185   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 186   if (!rs.is_reserved()) {
 187     warning("ConcurrentMark MarkStack allocation failure");
 188     return false;
 189   }
 190   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 191   if (!_virtual_space.initialize(rs, rs.size())) {
 192     warning("ConcurrentMark MarkStack backing store failure");
 193     // Release the virtual memory reserved for the marking stack
 194     rs.release();
 195     return false;
 196   }
 197   assert(_virtual_space.committed_size() == rs.size(),
 198          "Didn't reserve backing store for all of ConcurrentMark stack?");
 199   _base = (oop*) _virtual_space.low();
 200   setEmpty();
 201   _capacity = (jint) capacity;
 202   _saved_index = -1;
 203   _should_expand = false;
 204   NOT_PRODUCT(_max_depth = 0);
 205   return true;
 206 }
 207 
 208 void CMMarkStack::expand() {
 209   // Called, during remark, if we've overflown the marking stack during marking.
 210   assert(isEmpty(), "stack should been emptied while handling overflow");
 211   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 212   // Clear expansion flag
 213   _should_expand = false;
 214   if (_capacity == (jint) MarkStackSizeMax) {
 215     if (PrintGCDetails && Verbose) {
 216       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 217     }
 218     return;
 219   }
 220   // Double capacity if possible
 221   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 222   // Do not give up existing stack until we have managed to
 223   // get the double capacity that we desired.
 224   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 225                                                            sizeof(oop)));
 226   if (rs.is_reserved()) {
 227     // Release the backing store associated with old stack
 228     _virtual_space.release();
 229     // Reinitialize virtual space for new stack
 230     if (!_virtual_space.initialize(rs, rs.size())) {
 231       fatal("Not enough swap for expanded marking stack capacity");
 232     }
 233     _base = (oop*)(_virtual_space.low());
 234     _index = 0;
 235     _capacity = new_capacity;
 236   } else {
 237     if (PrintGCDetails && Verbose) {
 238       // Failed to double capacity, continue;
 239       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 240                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 241                           _capacity / K, new_capacity / K);
 242     }
 243   }
 244 }
 245 
 246 void CMMarkStack::set_should_expand() {
 247   // If we're resetting the marking state because of an
 248   // marking stack overflow, record that we should, if
 249   // possible, expand the stack.
 250   _should_expand = _cm->has_overflown();
 251 }
 252 
 253 CMMarkStack::~CMMarkStack() {
 254   if (_base != NULL) {
 255     _base = NULL;
 256     _virtual_space.release();
 257   }
 258 }
 259 
 260 void CMMarkStack::par_push(oop ptr) {
 261   while (true) {
 262     if (isFull()) {
 263       _overflow = true;
 264       return;
 265     }
 266     // Otherwise...
 267     jint index = _index;
 268     jint next_index = index+1;
 269     jint res = Atomic::cmpxchg(next_index, &_index, index);
 270     if (res == index) {
 271       _base[index] = ptr;
 272       // Note that we don't maintain this atomically.  We could, but it
 273       // doesn't seem necessary.
 274       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 275       return;
 276     }
 277     // Otherwise, we need to try again.
 278   }
 279 }
 280 
 281 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 282   while (true) {
 283     if (isFull()) {
 284       _overflow = true;
 285       return;
 286     }
 287     // Otherwise...
 288     jint index = _index;
 289     jint next_index = index + n;
 290     if (next_index > _capacity) {
 291       _overflow = true;
 292       return;
 293     }
 294     jint res = Atomic::cmpxchg(next_index, &_index, index);
 295     if (res == index) {
 296       for (int i = 0; i < n; i++) {
 297         int  ind = index + i;
 298         assert(ind < _capacity, "By overflow test above.");
 299         _base[ind] = ptr_arr[i];
 300       }
 301       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 302       return;
 303     }
 304     // Otherwise, we need to try again.
 305   }
 306 }
 307 
 308 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 309   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 310   jint start = _index;
 311   jint next_index = start + n;
 312   if (next_index > _capacity) {
 313     _overflow = true;
 314     return;
 315   }
 316   // Otherwise.
 317   _index = next_index;
 318   for (int i = 0; i < n; i++) {
 319     int ind = start + i;
 320     assert(ind < _capacity, "By overflow test above.");
 321     _base[ind] = ptr_arr[i];
 322   }
 323   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 324 }
 325 
 326 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 327   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 328   jint index = _index;
 329   if (index == 0) {
 330     *n = 0;
 331     return false;
 332   } else {
 333     int k = MIN2(max, index);
 334     jint  new_ind = index - k;
 335     for (int j = 0; j < k; j++) {
 336       ptr_arr[j] = _base[new_ind + j];
 337     }
 338     _index = new_ind;
 339     *n = k;
 340     return true;
 341   }
 342 }
 343 
 344 template<class OopClosureClass>
 345 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 346   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 347          || SafepointSynchronize::is_at_safepoint(),
 348          "Drain recursion must be yield-safe.");
 349   bool res = true;
 350   debug_only(_drain_in_progress = true);
 351   debug_only(_drain_in_progress_yields = yield_after);
 352   while (!isEmpty()) {
 353     oop newOop = pop();
 354     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 355     assert(newOop->is_oop(), "Expected an oop");
 356     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 357            "only grey objects on this stack");
 358     newOop->oop_iterate(cl);
 359     if (yield_after && _cm->do_yield_check()) {
 360       res = false;
 361       break;
 362     }
 363   }
 364   debug_only(_drain_in_progress = false);
 365   return res;
 366 }
 367 
 368 void CMMarkStack::note_start_of_gc() {
 369   assert(_saved_index == -1,
 370          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 371   _saved_index = _index;
 372 }
 373 
 374 void CMMarkStack::note_end_of_gc() {
 375   // This is intentionally a guarantee, instead of an assert. If we
 376   // accidentally add something to the mark stack during GC, it
 377   // will be a correctness issue so it's better if we crash. we'll
 378   // only check this once per GC anyway, so it won't be a performance
 379   // issue in any way.
 380   guarantee(_saved_index == _index,
 381             err_msg("saved index: %d index: %d", _saved_index, _index));
 382   _saved_index = -1;
 383 }
 384 
 385 void CMMarkStack::oops_do(OopClosure* f) {
 386   assert(_saved_index == _index,
 387          err_msg("saved index: %d index: %d", _saved_index, _index));
 388   for (int i = 0; i < _index; i += 1) {
 389     f->do_oop(&_base[i]);
 390   }
 391 }
 392 
 393 bool ConcurrentMark::not_yet_marked(oop obj) const {
 394   return _g1h->is_obj_ill(obj);
 395 }
 396 
 397 CMRootRegions::CMRootRegions() :
 398   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 399   _should_abort(false),  _next_survivor(NULL) { }
 400 
 401 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 402   _young_list = g1h->young_list();
 403   _cm = cm;
 404 }
 405 
 406 void CMRootRegions::prepare_for_scan() {
 407   assert(!scan_in_progress(), "pre-condition");
 408 
 409   // Currently, only survivors can be root regions.
 410   assert(_next_survivor == NULL, "pre-condition");
 411   _next_survivor = _young_list->first_survivor_region();
 412   _scan_in_progress = (_next_survivor != NULL);
 413   _should_abort = false;
 414 }
 415 
 416 HeapRegion* CMRootRegions::claim_next() {
 417   if (_should_abort) {
 418     // If someone has set the should_abort flag, we return NULL to
 419     // force the caller to bail out of their loop.
 420     return NULL;
 421   }
 422 
 423   // Currently, only survivors can be root regions.
 424   HeapRegion* res = _next_survivor;
 425   if (res != NULL) {
 426     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 427     // Read it again in case it changed while we were waiting for the lock.
 428     res = _next_survivor;
 429     if (res != NULL) {
 430       if (res == _young_list->last_survivor_region()) {
 431         // We just claimed the last survivor so store NULL to indicate
 432         // that we're done.
 433         _next_survivor = NULL;
 434       } else {
 435         _next_survivor = res->get_next_young_region();
 436       }
 437     } else {
 438       // Someone else claimed the last survivor while we were trying
 439       // to take the lock so nothing else to do.
 440     }
 441   }
 442   assert(res == NULL || res->is_survivor(), "post-condition");
 443 
 444   return res;
 445 }
 446 
 447 void CMRootRegions::scan_finished() {
 448   assert(scan_in_progress(), "pre-condition");
 449 
 450   // Currently, only survivors can be root regions.
 451   if (!_should_abort) {
 452     assert(_next_survivor == NULL, "we should have claimed all survivors");
 453   }
 454   _next_survivor = NULL;
 455 
 456   {
 457     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 458     _scan_in_progress = false;
 459     RootRegionScan_lock->notify_all();
 460   }
 461 }
 462 
 463 bool CMRootRegions::wait_until_scan_finished() {
 464   if (!scan_in_progress()) return false;
 465 
 466   {
 467     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 468     while (scan_in_progress()) {
 469       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 470     }
 471   }
 472   return true;
 473 }
 474 
 475 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 476 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 477 #endif // _MSC_VER
 478 
 479 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 480   return MAX2((n_par_threads + 2) / 4, 1U);
 481 }
 482 
 483 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, ReservedSpace heap_rs) :
 484   _g1h(g1h),
 485   _markBitMap1(log2_intptr(MinObjAlignment)),
 486   _markBitMap2(log2_intptr(MinObjAlignment)),
 487   _parallel_marking_threads(0),
 488   _max_parallel_marking_threads(0),
 489   _sleep_factor(0.0),
 490   _marking_task_overhead(1.0),
 491   _cleanup_sleep_factor(0.0),
 492   _cleanup_task_overhead(1.0),
 493   _cleanup_list("Cleanup List"),
 494   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 495   _card_bm((heap_rs.size() + CardTableModRefBS::card_size - 1) >>
 496             CardTableModRefBS::card_shift,
 497             false /* in_resource_area*/),
 498 
 499   _prevMarkBitMap(&_markBitMap1),
 500   _nextMarkBitMap(&_markBitMap2),
 501 
 502   _markStack(this),
 503   // _finger set in set_non_marking_state
 504 
 505   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 506   // _active_tasks set in set_non_marking_state
 507   // _tasks set inside the constructor
 508   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 509   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 510 
 511   _has_overflown(false),
 512   _concurrent(false),
 513   _has_aborted(false),
 514   _restart_for_overflow(false),
 515   _concurrent_marking_in_progress(false),
 516 
 517   // _verbose_level set below
 518 
 519   _init_times(),
 520   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 521   _cleanup_times(),
 522   _total_counting_time(0.0),
 523   _total_rs_scrub_time(0.0),
 524 
 525   _parallel_workers(NULL),
 526 
 527   _count_card_bitmaps(NULL),
 528   _count_marked_bytes(NULL),
 529   _completed_initialization(false) {
 530   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 531   if (verbose_level < no_verbose) {
 532     verbose_level = no_verbose;
 533   }
 534   if (verbose_level > high_verbose) {
 535     verbose_level = high_verbose;
 536   }
 537   _verbose_level = verbose_level;
 538 
 539   if (verbose_low()) {
 540     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 541                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 542   }
 543 
 544   if (!_markBitMap1.allocate(heap_rs)) {
 545     warning("Failed to allocate first CM bit map");
 546     return;
 547   }
 548   if (!_markBitMap2.allocate(heap_rs)) {
 549     warning("Failed to allocate second CM bit map");
 550     return;
 551   }
 552 
 553   // Create & start a ConcurrentMark thread.
 554   _cmThread = new ConcurrentMarkThread(this);
 555   assert(cmThread() != NULL, "CM Thread should have been created");
 556   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 557   if (_cmThread->osthread() == NULL) {
 558       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 559   }
 560 
 561   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 562   assert(_markBitMap1.covers(heap_rs), "_markBitMap1 inconsistency");
 563   assert(_markBitMap2.covers(heap_rs), "_markBitMap2 inconsistency");
 564 
 565   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 566   satb_qs.set_buffer_size(G1SATBBufferSize);
 567 
 568   _root_regions.init(_g1h, this);
 569 
 570   if (ConcGCThreads > ParallelGCThreads) {
 571     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 572             "than ParallelGCThreads (" UINTX_FORMAT ").",
 573             ConcGCThreads, ParallelGCThreads);
 574     return;
 575   }
 576   if (ParallelGCThreads == 0) {
 577     // if we are not running with any parallel GC threads we will not
 578     // spawn any marking threads either
 579     _parallel_marking_threads =       0;
 580     _max_parallel_marking_threads =   0;
 581     _sleep_factor             =     0.0;
 582     _marking_task_overhead    =     1.0;
 583   } else {
 584     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 585       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 586       // if both are set
 587       _sleep_factor             = 0.0;
 588       _marking_task_overhead    = 1.0;
 589     } else if (G1MarkingOverheadPercent > 0) {
 590       // We will calculate the number of parallel marking threads based
 591       // on a target overhead with respect to the soft real-time goal
 592       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 593       double overall_cm_overhead =
 594         (double) MaxGCPauseMillis * marking_overhead /
 595         (double) GCPauseIntervalMillis;
 596       double cpu_ratio = 1.0 / (double) os::processor_count();
 597       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 598       double marking_task_overhead =
 599         overall_cm_overhead / marking_thread_num *
 600                                                 (double) os::processor_count();
 601       double sleep_factor =
 602                          (1.0 - marking_task_overhead) / marking_task_overhead;
 603 
 604       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 605       _sleep_factor             = sleep_factor;
 606       _marking_task_overhead    = marking_task_overhead;
 607     } else {
 608       // Calculate the number of parallel marking threads by scaling
 609       // the number of parallel GC threads.
 610       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 611       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 612       _sleep_factor             = 0.0;
 613       _marking_task_overhead    = 1.0;
 614     }
 615 
 616     assert(ConcGCThreads > 0, "Should have been set");
 617     _parallel_marking_threads = (uint) ConcGCThreads;
 618     _max_parallel_marking_threads = _parallel_marking_threads;
 619 
 620     if (parallel_marking_threads() > 1) {
 621       _cleanup_task_overhead = 1.0;
 622     } else {
 623       _cleanup_task_overhead = marking_task_overhead();
 624     }
 625     _cleanup_sleep_factor =
 626                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 627 
 628 #if 0
 629     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 630     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 631     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 632     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 633     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 634 #endif
 635 
 636     guarantee(parallel_marking_threads() > 0, "peace of mind");
 637     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 638          _max_parallel_marking_threads, false, true);
 639     if (_parallel_workers == NULL) {
 640       vm_exit_during_initialization("Failed necessary allocation.");
 641     } else {
 642       _parallel_workers->initialize_workers();
 643     }
 644   }
 645 
 646   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 647     uintx mark_stack_size =
 648       MIN2(MarkStackSizeMax,
 649           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 650     // Verify that the calculated value for MarkStackSize is in range.
 651     // It would be nice to use the private utility routine from Arguments.
 652     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 653       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 654               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 655               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 656       return;
 657     }
 658     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 659   } else {
 660     // Verify MarkStackSize is in range.
 661     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 662       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 663         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 664           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 665                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 666                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 667           return;
 668         }
 669       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 670         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 671           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 672                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 673                   MarkStackSize, MarkStackSizeMax);
 674           return;
 675         }
 676       }
 677     }
 678   }
 679 
 680   if (!_markStack.allocate(MarkStackSize)) {
 681     warning("Failed to allocate CM marking stack");
 682     return;
 683   }
 684 
 685   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 686   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 687 
 688   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 689   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 690 
 691   BitMap::idx_t card_bm_size = _card_bm.size();
 692 
 693   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 694   _active_tasks = _max_worker_id;
 695 
 696   size_t max_regions = (size_t) _g1h->max_regions();
 697   for (uint i = 0; i < _max_worker_id; ++i) {
 698     CMTaskQueue* task_queue = new CMTaskQueue();
 699     task_queue->initialize();
 700     _task_queues->register_queue(i, task_queue);
 701 
 702     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 703     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 704 
 705     _tasks[i] = new CMTask(i, this,
 706                            _count_marked_bytes[i],
 707                            &_count_card_bitmaps[i],
 708                            task_queue, _task_queues);
 709 
 710     _accum_task_vtime[i] = 0.0;
 711   }
 712 
 713   // Calculate the card number for the bottom of the heap. Used
 714   // in biasing indexes into the accounting card bitmaps.
 715   _heap_bottom_card_num =
 716     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 717                                 CardTableModRefBS::card_shift);
 718 
 719   // Clear all the liveness counting data
 720   clear_all_count_data();
 721 
 722   // so that the call below can read a sensible value
 723   _heap_start = (HeapWord*) heap_rs.base();
 724   set_non_marking_state();
 725   _completed_initialization = true;
 726 }
 727 
 728 void ConcurrentMark::update_g1_committed(bool force) {
 729   // If concurrent marking is not in progress, then we do not need to
 730   // update _heap_end.
 731   if (!concurrent_marking_in_progress() && !force) return;
 732 
 733   MemRegion committed = _g1h->g1_committed();
 734   assert(committed.start() == _heap_start, "start shouldn't change");
 735   HeapWord* new_end = committed.end();
 736   if (new_end > _heap_end) {
 737     // The heap has been expanded.
 738 
 739     _heap_end = new_end;
 740   }
 741   // Notice that the heap can also shrink. However, this only happens
 742   // during a Full GC (at least currently) and the entire marking
 743   // phase will bail out and the task will not be restarted. So, let's
 744   // do nothing.
 745 }
 746 
 747 void ConcurrentMark::reset() {
 748   // Starting values for these two. This should be called in a STW
 749   // phase. CM will be notified of any future g1_committed expansions
 750   // will be at the end of evacuation pauses, when tasks are
 751   // inactive.
 752   MemRegion committed = _g1h->g1_committed();
 753   _heap_start = committed.start();
 754   _heap_end   = committed.end();
 755 
 756   // Separated the asserts so that we know which one fires.
 757   assert(_heap_start != NULL, "heap bounds should look ok");
 758   assert(_heap_end != NULL, "heap bounds should look ok");
 759   assert(_heap_start < _heap_end, "heap bounds should look ok");
 760 
 761   // Reset all the marking data structures and any necessary flags
 762   reset_marking_state();
 763 
 764   if (verbose_low()) {
 765     gclog_or_tty->print_cr("[global] resetting");
 766   }
 767 
 768   // We do reset all of them, since different phases will use
 769   // different number of active threads. So, it's easiest to have all
 770   // of them ready.
 771   for (uint i = 0; i < _max_worker_id; ++i) {
 772     _tasks[i]->reset(_nextMarkBitMap);
 773   }
 774 
 775   // we need this to make sure that the flag is on during the evac
 776   // pause with initial mark piggy-backed
 777   set_concurrent_marking_in_progress();
 778 }
 779 
 780 
 781 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 782   _markStack.set_should_expand();
 783   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 784   if (clear_overflow) {
 785     clear_has_overflown();
 786   } else {
 787     assert(has_overflown(), "pre-condition");
 788   }
 789   _finger = _heap_start;
 790 
 791   for (uint i = 0; i < _max_worker_id; ++i) {
 792     CMTaskQueue* queue = _task_queues->queue(i);
 793     queue->set_empty();
 794   }
 795 }
 796 
 797 void ConcurrentMark::set_concurrency(uint active_tasks) {
 798   assert(active_tasks <= _max_worker_id, "we should not have more");
 799 
 800   _active_tasks = active_tasks;
 801   // Need to update the three data structures below according to the
 802   // number of active threads for this phase.
 803   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 804   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 805   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 806 }
 807 
 808 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 809   set_concurrency(active_tasks);
 810 
 811   _concurrent = concurrent;
 812   // We propagate this to all tasks, not just the active ones.
 813   for (uint i = 0; i < _max_worker_id; ++i)
 814     _tasks[i]->set_concurrent(concurrent);
 815 
 816   if (concurrent) {
 817     set_concurrent_marking_in_progress();
 818   } else {
 819     // We currently assume that the concurrent flag has been set to
 820     // false before we start remark. At this point we should also be
 821     // in a STW phase.
 822     assert(!concurrent_marking_in_progress(), "invariant");
 823     assert(out_of_regions(),
 824            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 825                    p2i(_finger), p2i(_heap_end)));
 826     update_g1_committed(true);
 827   }
 828 }
 829 
 830 void ConcurrentMark::set_non_marking_state() {
 831   // We set the global marking state to some default values when we're
 832   // not doing marking.
 833   reset_marking_state();
 834   _active_tasks = 0;
 835   clear_concurrent_marking_in_progress();
 836 }
 837 
 838 ConcurrentMark::~ConcurrentMark() {
 839   // The ConcurrentMark instance is never freed.
 840   ShouldNotReachHere();
 841 }
 842 
 843 void ConcurrentMark::clearNextBitmap() {
 844   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 845   G1CollectorPolicy* g1p = g1h->g1_policy();
 846 
 847   // Make sure that the concurrent mark thread looks to still be in
 848   // the current cycle.
 849   guarantee(cmThread()->during_cycle(), "invariant");
 850 
 851   // We are finishing up the current cycle by clearing the next
 852   // marking bitmap and getting it ready for the next cycle. During
 853   // this time no other cycle can start. So, let's make sure that this
 854   // is the case.
 855   guarantee(!g1h->mark_in_progress(), "invariant");
 856 
 857   // clear the mark bitmap (no grey objects to start with).
 858   // We need to do this in chunks and offer to yield in between
 859   // each chunk.
 860   HeapWord* start  = _nextMarkBitMap->startWord();
 861   HeapWord* end    = _nextMarkBitMap->endWord();
 862   HeapWord* cur    = start;
 863   size_t chunkSize = M;
 864   while (cur < end) {
 865     HeapWord* next = cur + chunkSize;
 866     if (next > end) {
 867       next = end;
 868     }
 869     MemRegion mr(cur,next);
 870     _nextMarkBitMap->clearRange(mr);
 871     cur = next;
 872     do_yield_check();
 873 
 874     // Repeat the asserts from above. We'll do them as asserts here to
 875     // minimize their overhead on the product. However, we'll have
 876     // them as guarantees at the beginning / end of the bitmap
 877     // clearing to get some checking in the product.
 878     assert(cmThread()->during_cycle(), "invariant");
 879     assert(!g1h->mark_in_progress(), "invariant");
 880   }
 881 
 882   // Clear the liveness counting data
 883   clear_all_count_data();
 884 
 885   // Repeat the asserts from above.
 886   guarantee(cmThread()->during_cycle(), "invariant");
 887   guarantee(!g1h->mark_in_progress(), "invariant");
 888 }
 889 
 890 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 891 public:
 892   bool doHeapRegion(HeapRegion* r) {
 893     if (!r->continuesHumongous()) {
 894       r->note_start_of_marking();
 895     }
 896     return false;
 897   }
 898 };
 899 
 900 void ConcurrentMark::checkpointRootsInitialPre() {
 901   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 902   G1CollectorPolicy* g1p = g1h->g1_policy();
 903 
 904   _has_aborted = false;
 905 
 906 #ifndef PRODUCT
 907   if (G1PrintReachableAtInitialMark) {
 908     print_reachable("at-cycle-start",
 909                     VerifyOption_G1UsePrevMarking, true /* all */);
 910   }
 911 #endif
 912 
 913   // Initialize marking structures. This has to be done in a STW phase.
 914   reset();
 915 
 916   // For each region note start of marking.
 917   NoteStartOfMarkHRClosure startcl;
 918   g1h->heap_region_iterate(&startcl);
 919 }
 920 
 921 
 922 void ConcurrentMark::checkpointRootsInitialPost() {
 923   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 924 
 925   // If we force an overflow during remark, the remark operation will
 926   // actually abort and we'll restart concurrent marking. If we always
 927   // force an overflow during remark we'll never actually complete the
 928   // marking phase. So, we initialize this here, at the start of the
 929   // cycle, so that at the remaining overflow number will decrease at
 930   // every remark and we'll eventually not need to cause one.
 931   force_overflow_stw()->init();
 932 
 933   // Start Concurrent Marking weak-reference discovery.
 934   ReferenceProcessor* rp = g1h->ref_processor_cm();
 935   // enable ("weak") refs discovery
 936   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 937   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 938 
 939   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 940   // This is the start of  the marking cycle, we're expected all
 941   // threads to have SATB queues with active set to false.
 942   satb_mq_set.set_active_all_threads(true, /* new active value */
 943                                      false /* expected_active */);
 944 
 945   _root_regions.prepare_for_scan();
 946 
 947   // update_g1_committed() will be called at the end of an evac pause
 948   // when marking is on. So, it's also called at the end of the
 949   // initial-mark pause to update the heap end, if the heap expands
 950   // during it. No need to call it here.
 951 }
 952 
 953 /*
 954  * Notice that in the next two methods, we actually leave the STS
 955  * during the barrier sync and join it immediately afterwards. If we
 956  * do not do this, the following deadlock can occur: one thread could
 957  * be in the barrier sync code, waiting for the other thread to also
 958  * sync up, whereas another one could be trying to yield, while also
 959  * waiting for the other threads to sync up too.
 960  *
 961  * Note, however, that this code is also used during remark and in
 962  * this case we should not attempt to leave / enter the STS, otherwise
 963  * we'll either hit an assert (debug / fastdebug) or deadlock
 964  * (product). So we should only leave / enter the STS if we are
 965  * operating concurrently.
 966  *
 967  * Because the thread that does the sync barrier has left the STS, it
 968  * is possible to be suspended for a Full GC or an evacuation pause
 969  * could occur. This is actually safe, since the entering the sync
 970  * barrier is one of the last things do_marking_step() does, and it
 971  * doesn't manipulate any data structures afterwards.
 972  */
 973 
 974 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 975   if (verbose_low()) {
 976     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 977   }
 978 
 979   if (concurrent()) {
 980     SuspendibleThreadSet::leave();
 981   }
 982 
 983   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 984 
 985   if (concurrent()) {
 986     SuspendibleThreadSet::join();
 987   }
 988   // at this point everyone should have synced up and not be doing any
 989   // more work
 990 
 991   if (verbose_low()) {
 992     if (barrier_aborted) {
 993       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 994     } else {
 995       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 996     }
 997   }
 998 
 999   if (barrier_aborted) {
1000     // If the barrier aborted we ignore the overflow condition and
1001     // just abort the whole marking phase as quickly as possible.
1002     return;
1003   }
1004 
1005   // If we're executing the concurrent phase of marking, reset the marking
1006   // state; otherwise the marking state is reset after reference processing,
1007   // during the remark pause.
1008   // If we reset here as a result of an overflow during the remark we will
1009   // see assertion failures from any subsequent set_concurrency_and_phase()
1010   // calls.
1011   if (concurrent()) {
1012     // let the task associated with with worker 0 do this
1013     if (worker_id == 0) {
1014       // task 0 is responsible for clearing the global data structures
1015       // We should be here because of an overflow. During STW we should
1016       // not clear the overflow flag since we rely on it being true when
1017       // we exit this method to abort the pause and restart concurrent
1018       // marking.
1019       reset_marking_state(true /* clear_overflow */);
1020       force_overflow()->update();
1021 
1022       if (G1Log::fine()) {
1023         gclog_or_tty->date_stamp(PrintGCDateStamps);
1024         gclog_or_tty->stamp(PrintGCTimeStamps);
1025         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1026       }
1027     }
1028   }
1029 
1030   // after this, each task should reset its own data structures then
1031   // then go into the second barrier
1032 }
1033 
1034 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1035   if (verbose_low()) {
1036     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1037   }
1038 
1039   if (concurrent()) {
1040     SuspendibleThreadSet::leave();
1041   }
1042 
1043   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1044 
1045   if (concurrent()) {
1046     SuspendibleThreadSet::join();
1047   }
1048   // at this point everything should be re-initialized and ready to go
1049 
1050   if (verbose_low()) {
1051     if (barrier_aborted) {
1052       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1053     } else {
1054       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1055     }
1056   }
1057 }
1058 
1059 #ifndef PRODUCT
1060 void ForceOverflowSettings::init() {
1061   _num_remaining = G1ConcMarkForceOverflow;
1062   _force = false;
1063   update();
1064 }
1065 
1066 void ForceOverflowSettings::update() {
1067   if (_num_remaining > 0) {
1068     _num_remaining -= 1;
1069     _force = true;
1070   } else {
1071     _force = false;
1072   }
1073 }
1074 
1075 bool ForceOverflowSettings::should_force() {
1076   if (_force) {
1077     _force = false;
1078     return true;
1079   } else {
1080     return false;
1081   }
1082 }
1083 #endif // !PRODUCT
1084 
1085 class CMConcurrentMarkingTask: public AbstractGangTask {
1086 private:
1087   ConcurrentMark*       _cm;
1088   ConcurrentMarkThread* _cmt;
1089 
1090 public:
1091   void work(uint worker_id) {
1092     assert(Thread::current()->is_ConcurrentGC_thread(),
1093            "this should only be done by a conc GC thread");
1094     ResourceMark rm;
1095 
1096     double start_vtime = os::elapsedVTime();
1097 
1098     SuspendibleThreadSet::join();
1099 
1100     assert(worker_id < _cm->active_tasks(), "invariant");
1101     CMTask* the_task = _cm->task(worker_id);
1102     the_task->record_start_time();
1103     if (!_cm->has_aborted()) {
1104       do {
1105         double start_vtime_sec = os::elapsedVTime();
1106         double start_time_sec = os::elapsedTime();
1107         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1108 
1109         the_task->do_marking_step(mark_step_duration_ms,
1110                                   true  /* do_termination */,
1111                                   false /* is_serial*/);
1112 
1113         double end_time_sec = os::elapsedTime();
1114         double end_vtime_sec = os::elapsedVTime();
1115         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1116         double elapsed_time_sec = end_time_sec - start_time_sec;
1117         _cm->clear_has_overflown();
1118 
1119         bool ret = _cm->do_yield_check(worker_id);
1120 
1121         jlong sleep_time_ms;
1122         if (!_cm->has_aborted() && the_task->has_aborted()) {
1123           sleep_time_ms =
1124             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1125           SuspendibleThreadSet::leave();
1126           os::sleep(Thread::current(), sleep_time_ms, false);
1127           SuspendibleThreadSet::join();
1128         }
1129         double end_time2_sec = os::elapsedTime();
1130         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1131 
1132 #if 0
1133           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1134                                  "overhead %1.4lf",
1135                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1136                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1137           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1138                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1139 #endif
1140       } while (!_cm->has_aborted() && the_task->has_aborted());
1141     }
1142     the_task->record_end_time();
1143     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1144 
1145     SuspendibleThreadSet::leave();
1146 
1147     double end_vtime = os::elapsedVTime();
1148     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1149   }
1150 
1151   CMConcurrentMarkingTask(ConcurrentMark* cm,
1152                           ConcurrentMarkThread* cmt) :
1153       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1154 
1155   ~CMConcurrentMarkingTask() { }
1156 };
1157 
1158 // Calculates the number of active workers for a concurrent
1159 // phase.
1160 uint ConcurrentMark::calc_parallel_marking_threads() {
1161   if (G1CollectedHeap::use_parallel_gc_threads()) {
1162     uint n_conc_workers = 0;
1163     if (!UseDynamicNumberOfGCThreads ||
1164         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1165          !ForceDynamicNumberOfGCThreads)) {
1166       n_conc_workers = max_parallel_marking_threads();
1167     } else {
1168       n_conc_workers =
1169         AdaptiveSizePolicy::calc_default_active_workers(
1170                                      max_parallel_marking_threads(),
1171                                      1, /* Minimum workers */
1172                                      parallel_marking_threads(),
1173                                      Threads::number_of_non_daemon_threads());
1174       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1175       // that scaling has already gone into "_max_parallel_marking_threads".
1176     }
1177     assert(n_conc_workers > 0, "Always need at least 1");
1178     return n_conc_workers;
1179   }
1180   // If we are not running with any parallel GC threads we will not
1181   // have spawned any marking threads either. Hence the number of
1182   // concurrent workers should be 0.
1183   return 0;
1184 }
1185 
1186 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1187   // Currently, only survivors can be root regions.
1188   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1189   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1190 
1191   const uintx interval = PrefetchScanIntervalInBytes;
1192   HeapWord* curr = hr->bottom();
1193   const HeapWord* end = hr->top();
1194   while (curr < end) {
1195     Prefetch::read(curr, interval);
1196     oop obj = oop(curr);
1197     int size = obj->oop_iterate(&cl);
1198     assert(size == obj->size(), "sanity");
1199     curr += size;
1200   }
1201 }
1202 
1203 class CMRootRegionScanTask : public AbstractGangTask {
1204 private:
1205   ConcurrentMark* _cm;
1206 
1207 public:
1208   CMRootRegionScanTask(ConcurrentMark* cm) :
1209     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1210 
1211   void work(uint worker_id) {
1212     assert(Thread::current()->is_ConcurrentGC_thread(),
1213            "this should only be done by a conc GC thread");
1214 
1215     CMRootRegions* root_regions = _cm->root_regions();
1216     HeapRegion* hr = root_regions->claim_next();
1217     while (hr != NULL) {
1218       _cm->scanRootRegion(hr, worker_id);
1219       hr = root_regions->claim_next();
1220     }
1221   }
1222 };
1223 
1224 void ConcurrentMark::scanRootRegions() {
1225   // scan_in_progress() will have been set to true only if there was
1226   // at least one root region to scan. So, if it's false, we
1227   // should not attempt to do any further work.
1228   if (root_regions()->scan_in_progress()) {
1229     _parallel_marking_threads = calc_parallel_marking_threads();
1230     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1231            "Maximum number of marking threads exceeded");
1232     uint active_workers = MAX2(1U, parallel_marking_threads());
1233 
1234     CMRootRegionScanTask task(this);
1235     if (use_parallel_marking_threads()) {
1236       _parallel_workers->set_active_workers((int) active_workers);
1237       _parallel_workers->run_task(&task);
1238     } else {
1239       task.work(0);
1240     }
1241 
1242     // It's possible that has_aborted() is true here without actually
1243     // aborting the survivor scan earlier. This is OK as it's
1244     // mainly used for sanity checking.
1245     root_regions()->scan_finished();
1246   }
1247 }
1248 
1249 void ConcurrentMark::markFromRoots() {
1250   // we might be tempted to assert that:
1251   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1252   //        "inconsistent argument?");
1253   // However that wouldn't be right, because it's possible that
1254   // a safepoint is indeed in progress as a younger generation
1255   // stop-the-world GC happens even as we mark in this generation.
1256 
1257   _restart_for_overflow = false;
1258   force_overflow_conc()->init();
1259 
1260   // _g1h has _n_par_threads
1261   _parallel_marking_threads = calc_parallel_marking_threads();
1262   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1263     "Maximum number of marking threads exceeded");
1264 
1265   uint active_workers = MAX2(1U, parallel_marking_threads());
1266 
1267   // Parallel task terminator is set in "set_concurrency_and_phase()"
1268   set_concurrency_and_phase(active_workers, true /* concurrent */);
1269 
1270   CMConcurrentMarkingTask markingTask(this, cmThread());
1271   if (use_parallel_marking_threads()) {
1272     _parallel_workers->set_active_workers((int)active_workers);
1273     // Don't set _n_par_threads because it affects MT in process_strong_roots()
1274     // and the decisions on that MT processing is made elsewhere.
1275     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1276     _parallel_workers->run_task(&markingTask);
1277   } else {
1278     markingTask.work(0);
1279   }
1280   print_stats();
1281 }
1282 
1283 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1284   // world is stopped at this checkpoint
1285   assert(SafepointSynchronize::is_at_safepoint(),
1286          "world should be stopped");
1287 
1288   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1289 
1290   // If a full collection has happened, we shouldn't do this.
1291   if (has_aborted()) {
1292     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1293     return;
1294   }
1295 
1296   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1297 
1298   if (VerifyDuringGC) {
1299     HandleMark hm;  // handle scope
1300     Universe::heap()->prepare_for_verify();
1301     Universe::verify(VerifyOption_G1UsePrevMarking,
1302                      " VerifyDuringGC:(before)");
1303   }
1304   g1h->check_bitmaps("Remark Start");
1305 
1306   G1CollectorPolicy* g1p = g1h->g1_policy();
1307   g1p->record_concurrent_mark_remark_start();
1308 
1309   double start = os::elapsedTime();
1310 
1311   checkpointRootsFinalWork();
1312 
1313   double mark_work_end = os::elapsedTime();
1314 
1315   weakRefsWork(clear_all_soft_refs);
1316 
1317   if (has_overflown()) {
1318     // Oops.  We overflowed.  Restart concurrent marking.
1319     _restart_for_overflow = true;
1320     if (G1TraceMarkStackOverflow) {
1321       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1322     }
1323 
1324     // Verify the heap w.r.t. the previous marking bitmap.
1325     if (VerifyDuringGC) {
1326       HandleMark hm;  // handle scope
1327       Universe::heap()->prepare_for_verify();
1328       Universe::verify(VerifyOption_G1UsePrevMarking,
1329                        " VerifyDuringGC:(overflow)");
1330     }
1331 
1332     // Clear the marking state because we will be restarting
1333     // marking due to overflowing the global mark stack.
1334     reset_marking_state();
1335   } else {
1336     // Aggregate the per-task counting data that we have accumulated
1337     // while marking.
1338     aggregate_count_data();
1339 
1340     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1341     // We're done with marking.
1342     // This is the end of  the marking cycle, we're expected all
1343     // threads to have SATB queues with active set to true.
1344     satb_mq_set.set_active_all_threads(false, /* new active value */
1345                                        true /* expected_active */);
1346 
1347     if (VerifyDuringGC) {
1348       HandleMark hm;  // handle scope
1349       Universe::heap()->prepare_for_verify();
1350       Universe::verify(VerifyOption_G1UseNextMarking,
1351                        " VerifyDuringGC:(after)");
1352     }
1353     g1h->check_bitmaps("Remark End");
1354     assert(!restart_for_overflow(), "sanity");
1355     // Completely reset the marking state since marking completed
1356     set_non_marking_state();
1357   }
1358 
1359   // Expand the marking stack, if we have to and if we can.
1360   if (_markStack.should_expand()) {
1361     _markStack.expand();
1362   }
1363 
1364   // Statistics
1365   double now = os::elapsedTime();
1366   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1367   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1368   _remark_times.add((now - start) * 1000.0);
1369 
1370   g1p->record_concurrent_mark_remark_end();
1371 
1372   G1CMIsAliveClosure is_alive(g1h);
1373   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1374 }
1375 
1376 // Base class of the closures that finalize and verify the
1377 // liveness counting data.
1378 class CMCountDataClosureBase: public HeapRegionClosure {
1379 protected:
1380   G1CollectedHeap* _g1h;
1381   ConcurrentMark* _cm;
1382   CardTableModRefBS* _ct_bs;
1383 
1384   BitMap* _region_bm;
1385   BitMap* _card_bm;
1386 
1387   // Takes a region that's not empty (i.e., it has at least one
1388   // live object in it and sets its corresponding bit on the region
1389   // bitmap to 1. If the region is "starts humongous" it will also set
1390   // to 1 the bits on the region bitmap that correspond to its
1391   // associated "continues humongous" regions.
1392   void set_bit_for_region(HeapRegion* hr) {
1393     assert(!hr->continuesHumongous(), "should have filtered those out");
1394 
1395     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1396     if (!hr->startsHumongous()) {
1397       // Normal (non-humongous) case: just set the bit.
1398       _region_bm->par_at_put(index, true);
1399     } else {
1400       // Starts humongous case: calculate how many regions are part of
1401       // this humongous region and then set the bit range.
1402       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1403       _region_bm->par_at_put_range(index, end_index, true);
1404     }
1405   }
1406 
1407 public:
1408   CMCountDataClosureBase(G1CollectedHeap* g1h,
1409                          BitMap* region_bm, BitMap* card_bm):
1410     _g1h(g1h), _cm(g1h->concurrent_mark()),
1411     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1412     _region_bm(region_bm), _card_bm(card_bm) { }
1413 };
1414 
1415 // Closure that calculates the # live objects per region. Used
1416 // for verification purposes during the cleanup pause.
1417 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1418   CMBitMapRO* _bm;
1419   size_t _region_marked_bytes;
1420 
1421 public:
1422   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1423                          BitMap* region_bm, BitMap* card_bm) :
1424     CMCountDataClosureBase(g1h, region_bm, card_bm),
1425     _bm(bm), _region_marked_bytes(0) { }
1426 
1427   bool doHeapRegion(HeapRegion* hr) {
1428 
1429     if (hr->continuesHumongous()) {
1430       // We will ignore these here and process them when their
1431       // associated "starts humongous" region is processed (see
1432       // set_bit_for_heap_region()). Note that we cannot rely on their
1433       // associated "starts humongous" region to have their bit set to
1434       // 1 since, due to the region chunking in the parallel region
1435       // iteration, a "continues humongous" region might be visited
1436       // before its associated "starts humongous".
1437       return false;
1438     }
1439 
1440     HeapWord* ntams = hr->next_top_at_mark_start();
1441     HeapWord* start = hr->bottom();
1442 
1443     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1444            err_msg("Preconditions not met - "
1445                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1446                    p2i(start), p2i(ntams), p2i(hr->end())));
1447 
1448     // Find the first marked object at or after "start".
1449     start = _bm->getNextMarkedWordAddress(start, ntams);
1450 
1451     size_t marked_bytes = 0;
1452 
1453     while (start < ntams) {
1454       oop obj = oop(start);
1455       int obj_sz = obj->size();
1456       HeapWord* obj_end = start + obj_sz;
1457 
1458       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1459       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1460 
1461       // Note: if we're looking at the last region in heap - obj_end
1462       // could be actually just beyond the end of the heap; end_idx
1463       // will then correspond to a (non-existent) card that is also
1464       // just beyond the heap.
1465       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1466         // end of object is not card aligned - increment to cover
1467         // all the cards spanned by the object
1468         end_idx += 1;
1469       }
1470 
1471       // Set the bits in the card BM for the cards spanned by this object.
1472       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1473 
1474       // Add the size of this object to the number of marked bytes.
1475       marked_bytes += (size_t)obj_sz * HeapWordSize;
1476 
1477       // Find the next marked object after this one.
1478       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1479     }
1480 
1481     // Mark the allocated-since-marking portion...
1482     HeapWord* top = hr->top();
1483     if (ntams < top) {
1484       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1485       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1486 
1487       // Note: if we're looking at the last region in heap - top
1488       // could be actually just beyond the end of the heap; end_idx
1489       // will then correspond to a (non-existent) card that is also
1490       // just beyond the heap.
1491       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1492         // end of object is not card aligned - increment to cover
1493         // all the cards spanned by the object
1494         end_idx += 1;
1495       }
1496       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1497 
1498       // This definitely means the region has live objects.
1499       set_bit_for_region(hr);
1500     }
1501 
1502     // Update the live region bitmap.
1503     if (marked_bytes > 0) {
1504       set_bit_for_region(hr);
1505     }
1506 
1507     // Set the marked bytes for the current region so that
1508     // it can be queried by a calling verification routine
1509     _region_marked_bytes = marked_bytes;
1510 
1511     return false;
1512   }
1513 
1514   size_t region_marked_bytes() const { return _region_marked_bytes; }
1515 };
1516 
1517 // Heap region closure used for verifying the counting data
1518 // that was accumulated concurrently and aggregated during
1519 // the remark pause. This closure is applied to the heap
1520 // regions during the STW cleanup pause.
1521 
1522 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1523   G1CollectedHeap* _g1h;
1524   ConcurrentMark* _cm;
1525   CalcLiveObjectsClosure _calc_cl;
1526   BitMap* _region_bm;   // Region BM to be verified
1527   BitMap* _card_bm;     // Card BM to be verified
1528   bool _verbose;        // verbose output?
1529 
1530   BitMap* _exp_region_bm; // Expected Region BM values
1531   BitMap* _exp_card_bm;   // Expected card BM values
1532 
1533   int _failures;
1534 
1535 public:
1536   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1537                                 BitMap* region_bm,
1538                                 BitMap* card_bm,
1539                                 BitMap* exp_region_bm,
1540                                 BitMap* exp_card_bm,
1541                                 bool verbose) :
1542     _g1h(g1h), _cm(g1h->concurrent_mark()),
1543     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1544     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1545     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1546     _failures(0) { }
1547 
1548   int failures() const { return _failures; }
1549 
1550   bool doHeapRegion(HeapRegion* hr) {
1551     if (hr->continuesHumongous()) {
1552       // We will ignore these here and process them when their
1553       // associated "starts humongous" region is processed (see
1554       // set_bit_for_heap_region()). Note that we cannot rely on their
1555       // associated "starts humongous" region to have their bit set to
1556       // 1 since, due to the region chunking in the parallel region
1557       // iteration, a "continues humongous" region might be visited
1558       // before its associated "starts humongous".
1559       return false;
1560     }
1561 
1562     int failures = 0;
1563 
1564     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1565     // this region and set the corresponding bits in the expected region
1566     // and card bitmaps.
1567     bool res = _calc_cl.doHeapRegion(hr);
1568     assert(res == false, "should be continuing");
1569 
1570     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1571                     Mutex::_no_safepoint_check_flag);
1572 
1573     // Verify the marked bytes for this region.
1574     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1575     size_t act_marked_bytes = hr->next_marked_bytes();
1576 
1577     // We're not OK if expected marked bytes > actual marked bytes. It means
1578     // we have missed accounting some objects during the actual marking.
1579     if (exp_marked_bytes > act_marked_bytes) {
1580       if (_verbose) {
1581         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1582                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1583                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1584       }
1585       failures += 1;
1586     }
1587 
1588     // Verify the bit, for this region, in the actual and expected
1589     // (which was just calculated) region bit maps.
1590     // We're not OK if the bit in the calculated expected region
1591     // bitmap is set and the bit in the actual region bitmap is not.
1592     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1593 
1594     bool expected = _exp_region_bm->at(index);
1595     bool actual = _region_bm->at(index);
1596     if (expected && !actual) {
1597       if (_verbose) {
1598         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1599                                "expected: %s, actual: %s",
1600                                hr->hrs_index(),
1601                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1602       }
1603       failures += 1;
1604     }
1605 
1606     // Verify that the card bit maps for the cards spanned by the current
1607     // region match. We have an error if we have a set bit in the expected
1608     // bit map and the corresponding bit in the actual bitmap is not set.
1609 
1610     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1611     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1612 
1613     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1614       expected = _exp_card_bm->at(i);
1615       actual = _card_bm->at(i);
1616 
1617       if (expected && !actual) {
1618         if (_verbose) {
1619           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1620                                  "expected: %s, actual: %s",
1621                                  hr->hrs_index(), i,
1622                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1623         }
1624         failures += 1;
1625       }
1626     }
1627 
1628     if (failures > 0 && _verbose)  {
1629       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1630                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1631                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1632                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1633     }
1634 
1635     _failures += failures;
1636 
1637     // We could stop iteration over the heap when we
1638     // find the first violating region by returning true.
1639     return false;
1640   }
1641 };
1642 
1643 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1644 protected:
1645   G1CollectedHeap* _g1h;
1646   ConcurrentMark* _cm;
1647   BitMap* _actual_region_bm;
1648   BitMap* _actual_card_bm;
1649 
1650   uint    _n_workers;
1651 
1652   BitMap* _expected_region_bm;
1653   BitMap* _expected_card_bm;
1654 
1655   int  _failures;
1656   bool _verbose;
1657 
1658 public:
1659   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1660                             BitMap* region_bm, BitMap* card_bm,
1661                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1662     : AbstractGangTask("G1 verify final counting"),
1663       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1664       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1665       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1666       _failures(0), _verbose(false),
1667       _n_workers(0) {
1668     assert(VerifyDuringGC, "don't call this otherwise");
1669 
1670     // Use the value already set as the number of active threads
1671     // in the call to run_task().
1672     if (G1CollectedHeap::use_parallel_gc_threads()) {
1673       assert( _g1h->workers()->active_workers() > 0,
1674         "Should have been previously set");
1675       _n_workers = _g1h->workers()->active_workers();
1676     } else {
1677       _n_workers = 1;
1678     }
1679 
1680     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1681     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1682 
1683     _verbose = _cm->verbose_medium();
1684   }
1685 
1686   void work(uint worker_id) {
1687     assert(worker_id < _n_workers, "invariant");
1688 
1689     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1690                                             _actual_region_bm, _actual_card_bm,
1691                                             _expected_region_bm,
1692                                             _expected_card_bm,
1693                                             _verbose);
1694 
1695     if (G1CollectedHeap::use_parallel_gc_threads()) {
1696       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1697                                             worker_id,
1698                                             _n_workers,
1699                                             HeapRegion::VerifyCountClaimValue);
1700     } else {
1701       _g1h->heap_region_iterate(&verify_cl);
1702     }
1703 
1704     Atomic::add(verify_cl.failures(), &_failures);
1705   }
1706 
1707   int failures() const { return _failures; }
1708 };
1709 
1710 // Closure that finalizes the liveness counting data.
1711 // Used during the cleanup pause.
1712 // Sets the bits corresponding to the interval [NTAMS, top]
1713 // (which contains the implicitly live objects) in the
1714 // card liveness bitmap. Also sets the bit for each region,
1715 // containing live data, in the region liveness bitmap.
1716 
1717 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1718  public:
1719   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1720                               BitMap* region_bm,
1721                               BitMap* card_bm) :
1722     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1723 
1724   bool doHeapRegion(HeapRegion* hr) {
1725 
1726     if (hr->continuesHumongous()) {
1727       // We will ignore these here and process them when their
1728       // associated "starts humongous" region is processed (see
1729       // set_bit_for_heap_region()). Note that we cannot rely on their
1730       // associated "starts humongous" region to have their bit set to
1731       // 1 since, due to the region chunking in the parallel region
1732       // iteration, a "continues humongous" region might be visited
1733       // before its associated "starts humongous".
1734       return false;
1735     }
1736 
1737     HeapWord* ntams = hr->next_top_at_mark_start();
1738     HeapWord* top   = hr->top();
1739 
1740     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1741 
1742     // Mark the allocated-since-marking portion...
1743     if (ntams < top) {
1744       // This definitely means the region has live objects.
1745       set_bit_for_region(hr);
1746 
1747       // Now set the bits in the card bitmap for [ntams, top)
1748       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1749       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1750 
1751       // Note: if we're looking at the last region in heap - top
1752       // could be actually just beyond the end of the heap; end_idx
1753       // will then correspond to a (non-existent) card that is also
1754       // just beyond the heap.
1755       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1756         // end of object is not card aligned - increment to cover
1757         // all the cards spanned by the object
1758         end_idx += 1;
1759       }
1760 
1761       assert(end_idx <= _card_bm->size(),
1762              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1763                      end_idx, _card_bm->size()));
1764       assert(start_idx < _card_bm->size(),
1765              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1766                      start_idx, _card_bm->size()));
1767 
1768       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1769     }
1770 
1771     // Set the bit for the region if it contains live data
1772     if (hr->next_marked_bytes() > 0) {
1773       set_bit_for_region(hr);
1774     }
1775 
1776     return false;
1777   }
1778 };
1779 
1780 class G1ParFinalCountTask: public AbstractGangTask {
1781 protected:
1782   G1CollectedHeap* _g1h;
1783   ConcurrentMark* _cm;
1784   BitMap* _actual_region_bm;
1785   BitMap* _actual_card_bm;
1786 
1787   uint    _n_workers;
1788 
1789 public:
1790   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1791     : AbstractGangTask("G1 final counting"),
1792       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1793       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1794       _n_workers(0) {
1795     // Use the value already set as the number of active threads
1796     // in the call to run_task().
1797     if (G1CollectedHeap::use_parallel_gc_threads()) {
1798       assert( _g1h->workers()->active_workers() > 0,
1799         "Should have been previously set");
1800       _n_workers = _g1h->workers()->active_workers();
1801     } else {
1802       _n_workers = 1;
1803     }
1804   }
1805 
1806   void work(uint worker_id) {
1807     assert(worker_id < _n_workers, "invariant");
1808 
1809     FinalCountDataUpdateClosure final_update_cl(_g1h,
1810                                                 _actual_region_bm,
1811                                                 _actual_card_bm);
1812 
1813     if (G1CollectedHeap::use_parallel_gc_threads()) {
1814       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1815                                             worker_id,
1816                                             _n_workers,
1817                                             HeapRegion::FinalCountClaimValue);
1818     } else {
1819       _g1h->heap_region_iterate(&final_update_cl);
1820     }
1821   }
1822 };
1823 
1824 class G1ParNoteEndTask;
1825 
1826 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1827   G1CollectedHeap* _g1;
1828   size_t _max_live_bytes;
1829   uint _regions_claimed;
1830   size_t _freed_bytes;
1831   FreeRegionList* _local_cleanup_list;
1832   HeapRegionSetCount _old_regions_removed;
1833   HeapRegionSetCount _humongous_regions_removed;
1834   HRRSCleanupTask* _hrrs_cleanup_task;
1835   double _claimed_region_time;
1836   double _max_region_time;
1837 
1838 public:
1839   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1840                              FreeRegionList* local_cleanup_list,
1841                              HRRSCleanupTask* hrrs_cleanup_task) :
1842     _g1(g1),
1843     _max_live_bytes(0), _regions_claimed(0),
1844     _freed_bytes(0),
1845     _claimed_region_time(0.0), _max_region_time(0.0),
1846     _local_cleanup_list(local_cleanup_list),
1847     _old_regions_removed(),
1848     _humongous_regions_removed(),
1849     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1850 
1851   size_t freed_bytes() { return _freed_bytes; }
1852   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1853   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1854 
1855   bool doHeapRegion(HeapRegion *hr) {
1856     if (hr->continuesHumongous()) {
1857       return false;
1858     }
1859     // We use a claim value of zero here because all regions
1860     // were claimed with value 1 in the FinalCount task.
1861     _g1->reset_gc_time_stamps(hr);
1862     double start = os::elapsedTime();
1863     _regions_claimed++;
1864     hr->note_end_of_marking();
1865     _max_live_bytes += hr->max_live_bytes();
1866 
1867     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1868       _freed_bytes += hr->used();
1869       hr->set_containing_set(NULL);
1870       if (hr->isHumongous()) {
1871         assert(hr->startsHumongous(), "we should only see starts humongous");
1872         _humongous_regions_removed.increment(1u, hr->capacity());
1873         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1874       } else {
1875         _old_regions_removed.increment(1u, hr->capacity());
1876         _g1->free_region(hr, _local_cleanup_list, true);
1877       }
1878     } else {
1879       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1880     }
1881 
1882     double region_time = (os::elapsedTime() - start);
1883     _claimed_region_time += region_time;
1884     if (region_time > _max_region_time) {
1885       _max_region_time = region_time;
1886     }
1887     return false;
1888   }
1889 
1890   size_t max_live_bytes() { return _max_live_bytes; }
1891   uint regions_claimed() { return _regions_claimed; }
1892   double claimed_region_time_sec() { return _claimed_region_time; }
1893   double max_region_time_sec() { return _max_region_time; }
1894 };
1895 
1896 class G1ParNoteEndTask: public AbstractGangTask {
1897   friend class G1NoteEndOfConcMarkClosure;
1898 
1899 protected:
1900   G1CollectedHeap* _g1h;
1901   size_t _max_live_bytes;
1902   size_t _freed_bytes;
1903   FreeRegionList* _cleanup_list;
1904 
1905 public:
1906   G1ParNoteEndTask(G1CollectedHeap* g1h,
1907                    FreeRegionList* cleanup_list) :
1908     AbstractGangTask("G1 note end"), _g1h(g1h),
1909     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1910 
1911   void work(uint worker_id) {
1912     double start = os::elapsedTime();
1913     FreeRegionList local_cleanup_list("Local Cleanup List");
1914     HRRSCleanupTask hrrs_cleanup_task;
1915     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1916                                            &hrrs_cleanup_task);
1917     if (G1CollectedHeap::use_parallel_gc_threads()) {
1918       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1919                                             _g1h->workers()->active_workers(),
1920                                             HeapRegion::NoteEndClaimValue);
1921     } else {
1922       _g1h->heap_region_iterate(&g1_note_end);
1923     }
1924     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1925 
1926     // Now update the lists
1927     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1928     {
1929       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1930       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1931       _max_live_bytes += g1_note_end.max_live_bytes();
1932       _freed_bytes += g1_note_end.freed_bytes();
1933 
1934       // If we iterate over the global cleanup list at the end of
1935       // cleanup to do this printing we will not guarantee to only
1936       // generate output for the newly-reclaimed regions (the list
1937       // might not be empty at the beginning of cleanup; we might
1938       // still be working on its previous contents). So we do the
1939       // printing here, before we append the new regions to the global
1940       // cleanup list.
1941 
1942       G1HRPrinter* hr_printer = _g1h->hr_printer();
1943       if (hr_printer->is_active()) {
1944         FreeRegionListIterator iter(&local_cleanup_list);
1945         while (iter.more_available()) {
1946           HeapRegion* hr = iter.get_next();
1947           hr_printer->cleanup(hr);
1948         }
1949       }
1950 
1951       _cleanup_list->add_ordered(&local_cleanup_list);
1952       assert(local_cleanup_list.is_empty(), "post-condition");
1953 
1954       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1955     }
1956   }
1957   size_t max_live_bytes() { return _max_live_bytes; }
1958   size_t freed_bytes() { return _freed_bytes; }
1959 };
1960 
1961 class G1ParScrubRemSetTask: public AbstractGangTask {
1962 protected:
1963   G1RemSet* _g1rs;
1964   BitMap* _region_bm;
1965   BitMap* _card_bm;
1966 public:
1967   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1968                        BitMap* region_bm, BitMap* card_bm) :
1969     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1970     _region_bm(region_bm), _card_bm(card_bm) { }
1971 
1972   void work(uint worker_id) {
1973     if (G1CollectedHeap::use_parallel_gc_threads()) {
1974       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1975                        HeapRegion::ScrubRemSetClaimValue);
1976     } else {
1977       _g1rs->scrub(_region_bm, _card_bm);
1978     }
1979   }
1980 
1981 };
1982 
1983 void ConcurrentMark::cleanup() {
1984   // world is stopped at this checkpoint
1985   assert(SafepointSynchronize::is_at_safepoint(),
1986          "world should be stopped");
1987   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1988 
1989   // If a full collection has happened, we shouldn't do this.
1990   if (has_aborted()) {
1991     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1992     return;
1993   }
1994 
1995   g1h->verify_region_sets_optional();
1996 
1997   if (VerifyDuringGC) {
1998     HandleMark hm;  // handle scope
1999     Universe::heap()->prepare_for_verify();
2000     Universe::verify(VerifyOption_G1UsePrevMarking,
2001                      " VerifyDuringGC:(before)");
2002   }
2003   g1h->check_bitmaps("Cleanup Start");
2004 
2005   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2006   g1p->record_concurrent_mark_cleanup_start();
2007 
2008   double start = os::elapsedTime();
2009 
2010   HeapRegionRemSet::reset_for_cleanup_tasks();
2011 
2012   uint n_workers;
2013 
2014   // Do counting once more with the world stopped for good measure.
2015   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2016 
2017   if (G1CollectedHeap::use_parallel_gc_threads()) {
2018    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2019            "sanity check");
2020 
2021     g1h->set_par_threads();
2022     n_workers = g1h->n_par_threads();
2023     assert(g1h->n_par_threads() == n_workers,
2024            "Should not have been reset");
2025     g1h->workers()->run_task(&g1_par_count_task);
2026     // Done with the parallel phase so reset to 0.
2027     g1h->set_par_threads(0);
2028 
2029     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2030            "sanity check");
2031   } else {
2032     n_workers = 1;
2033     g1_par_count_task.work(0);
2034   }
2035 
2036   if (VerifyDuringGC) {
2037     // Verify that the counting data accumulated during marking matches
2038     // that calculated by walking the marking bitmap.
2039 
2040     // Bitmaps to hold expected values
2041     BitMap expected_region_bm(_region_bm.size(), true);
2042     BitMap expected_card_bm(_card_bm.size(), true);
2043 
2044     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2045                                                  &_region_bm,
2046                                                  &_card_bm,
2047                                                  &expected_region_bm,
2048                                                  &expected_card_bm);
2049 
2050     if (G1CollectedHeap::use_parallel_gc_threads()) {
2051       g1h->set_par_threads((int)n_workers);
2052       g1h->workers()->run_task(&g1_par_verify_task);
2053       // Done with the parallel phase so reset to 0.
2054       g1h->set_par_threads(0);
2055 
2056       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2057              "sanity check");
2058     } else {
2059       g1_par_verify_task.work(0);
2060     }
2061 
2062     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2063   }
2064 
2065   size_t start_used_bytes = g1h->used();
2066   g1h->set_marking_complete();
2067 
2068   double count_end = os::elapsedTime();
2069   double this_final_counting_time = (count_end - start);
2070   _total_counting_time += this_final_counting_time;
2071 
2072   if (G1PrintRegionLivenessInfo) {
2073     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2074     _g1h->heap_region_iterate(&cl);
2075   }
2076 
2077   // Install newly created mark bitMap as "prev".
2078   swapMarkBitMaps();
2079 
2080   g1h->reset_gc_time_stamp();
2081 
2082   // Note end of marking in all heap regions.
2083   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2084   if (G1CollectedHeap::use_parallel_gc_threads()) {
2085     g1h->set_par_threads((int)n_workers);
2086     g1h->workers()->run_task(&g1_par_note_end_task);
2087     g1h->set_par_threads(0);
2088 
2089     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2090            "sanity check");
2091   } else {
2092     g1_par_note_end_task.work(0);
2093   }
2094   g1h->check_gc_time_stamps();
2095 
2096   if (!cleanup_list_is_empty()) {
2097     // The cleanup list is not empty, so we'll have to process it
2098     // concurrently. Notify anyone else that might be wanting free
2099     // regions that there will be more free regions coming soon.
2100     g1h->set_free_regions_coming();
2101   }
2102 
2103   // call below, since it affects the metric by which we sort the heap
2104   // regions.
2105   if (G1ScrubRemSets) {
2106     double rs_scrub_start = os::elapsedTime();
2107     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2108     if (G1CollectedHeap::use_parallel_gc_threads()) {
2109       g1h->set_par_threads((int)n_workers);
2110       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2111       g1h->set_par_threads(0);
2112 
2113       assert(g1h->check_heap_region_claim_values(
2114                                             HeapRegion::ScrubRemSetClaimValue),
2115              "sanity check");
2116     } else {
2117       g1_par_scrub_rs_task.work(0);
2118     }
2119 
2120     double rs_scrub_end = os::elapsedTime();
2121     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2122     _total_rs_scrub_time += this_rs_scrub_time;
2123   }
2124 
2125   // this will also free any regions totally full of garbage objects,
2126   // and sort the regions.
2127   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2128 
2129   // Statistics.
2130   double end = os::elapsedTime();
2131   _cleanup_times.add((end - start) * 1000.0);
2132 
2133   if (G1Log::fine()) {
2134     g1h->print_size_transition(gclog_or_tty,
2135                                start_used_bytes,
2136                                g1h->used(),
2137                                g1h->capacity());
2138   }
2139 
2140   // Clean up will have freed any regions completely full of garbage.
2141   // Update the soft reference policy with the new heap occupancy.
2142   Universe::update_heap_info_at_gc();
2143 
2144   // We need to make this be a "collection" so any collection pause that
2145   // races with it goes around and waits for completeCleanup to finish.
2146   g1h->increment_total_collections();
2147 
2148   // We reclaimed old regions so we should calculate the sizes to make
2149   // sure we update the old gen/space data.
2150   g1h->g1mm()->update_sizes();
2151 
2152   if (VerifyDuringGC) {
2153     HandleMark hm;  // handle scope
2154     Universe::heap()->prepare_for_verify();
2155     Universe::verify(VerifyOption_G1UsePrevMarking,
2156                      " VerifyDuringGC:(after)");
2157   }
2158   g1h->check_bitmaps("Cleanup End");
2159 
2160   g1h->verify_region_sets_optional();
2161   g1h->trace_heap_after_concurrent_cycle();
2162 }
2163 
2164 void ConcurrentMark::completeCleanup() {
2165   if (has_aborted()) return;
2166 
2167   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2168 
2169   _cleanup_list.verify_optional();
2170   FreeRegionList tmp_free_list("Tmp Free List");
2171 
2172   if (G1ConcRegionFreeingVerbose) {
2173     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2174                            "cleanup list has %u entries",
2175                            _cleanup_list.length());
2176   }
2177 
2178   // Noone else should be accessing the _cleanup_list at this point,
2179   // so it's not necessary to take any locks
2180   while (!_cleanup_list.is_empty()) {
2181     HeapRegion* hr = _cleanup_list.remove_head();
2182     assert(hr != NULL, "Got NULL from a non-empty list");
2183     hr->par_clear();
2184     tmp_free_list.add_ordered(hr);
2185 
2186     // Instead of adding one region at a time to the secondary_free_list,
2187     // we accumulate them in the local list and move them a few at a
2188     // time. This also cuts down on the number of notify_all() calls
2189     // we do during this process. We'll also append the local list when
2190     // _cleanup_list is empty (which means we just removed the last
2191     // region from the _cleanup_list).
2192     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2193         _cleanup_list.is_empty()) {
2194       if (G1ConcRegionFreeingVerbose) {
2195         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2196                                "appending %u entries to the secondary_free_list, "
2197                                "cleanup list still has %u entries",
2198                                tmp_free_list.length(),
2199                                _cleanup_list.length());
2200       }
2201 
2202       {
2203         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2204         g1h->secondary_free_list_add(&tmp_free_list);
2205         SecondaryFreeList_lock->notify_all();
2206       }
2207 
2208       if (G1StressConcRegionFreeing) {
2209         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2210           os::sleep(Thread::current(), (jlong) 1, false);
2211         }
2212       }
2213     }
2214   }
2215   assert(tmp_free_list.is_empty(), "post-condition");
2216 }
2217 
2218 // Supporting Object and Oop closures for reference discovery
2219 // and processing in during marking
2220 
2221 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2222   HeapWord* addr = (HeapWord*)obj;
2223   return addr != NULL &&
2224          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2225 }
2226 
2227 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2228 // Uses the CMTask associated with a worker thread (for serial reference
2229 // processing the CMTask for worker 0 is used) to preserve (mark) and
2230 // trace referent objects.
2231 //
2232 // Using the CMTask and embedded local queues avoids having the worker
2233 // threads operating on the global mark stack. This reduces the risk
2234 // of overflowing the stack - which we would rather avoid at this late
2235 // state. Also using the tasks' local queues removes the potential
2236 // of the workers interfering with each other that could occur if
2237 // operating on the global stack.
2238 
2239 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2240   ConcurrentMark* _cm;
2241   CMTask*         _task;
2242   int             _ref_counter_limit;
2243   int             _ref_counter;
2244   bool            _is_serial;
2245  public:
2246   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2247     _cm(cm), _task(task), _is_serial(is_serial),
2248     _ref_counter_limit(G1RefProcDrainInterval) {
2249     assert(_ref_counter_limit > 0, "sanity");
2250     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2251     _ref_counter = _ref_counter_limit;
2252   }
2253 
2254   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2255   virtual void do_oop(      oop* p) { do_oop_work(p); }
2256 
2257   template <class T> void do_oop_work(T* p) {
2258     if (!_cm->has_overflown()) {
2259       oop obj = oopDesc::load_decode_heap_oop(p);
2260       if (_cm->verbose_high()) {
2261         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2262                                "*"PTR_FORMAT" = "PTR_FORMAT,
2263                                _task->worker_id(), p2i(p), p2i((void*) obj));
2264       }
2265 
2266       _task->deal_with_reference(obj);
2267       _ref_counter--;
2268 
2269       if (_ref_counter == 0) {
2270         // We have dealt with _ref_counter_limit references, pushing them
2271         // and objects reachable from them on to the local stack (and
2272         // possibly the global stack). Call CMTask::do_marking_step() to
2273         // process these entries.
2274         //
2275         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2276         // there's nothing more to do (i.e. we're done with the entries that
2277         // were pushed as a result of the CMTask::deal_with_reference() calls
2278         // above) or we overflow.
2279         //
2280         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2281         // flag while there may still be some work to do. (See the comment at
2282         // the beginning of CMTask::do_marking_step() for those conditions -
2283         // one of which is reaching the specified time target.) It is only
2284         // when CMTask::do_marking_step() returns without setting the
2285         // has_aborted() flag that the marking step has completed.
2286         do {
2287           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2288           _task->do_marking_step(mark_step_duration_ms,
2289                                  false      /* do_termination */,
2290                                  _is_serial);
2291         } while (_task->has_aborted() && !_cm->has_overflown());
2292         _ref_counter = _ref_counter_limit;
2293       }
2294     } else {
2295       if (_cm->verbose_high()) {
2296          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2297       }
2298     }
2299   }
2300 };
2301 
2302 // 'Drain' oop closure used by both serial and parallel reference processing.
2303 // Uses the CMTask associated with a given worker thread (for serial
2304 // reference processing the CMtask for worker 0 is used). Calls the
2305 // do_marking_step routine, with an unbelievably large timeout value,
2306 // to drain the marking data structures of the remaining entries
2307 // added by the 'keep alive' oop closure above.
2308 
2309 class G1CMDrainMarkingStackClosure: public VoidClosure {
2310   ConcurrentMark* _cm;
2311   CMTask*         _task;
2312   bool            _is_serial;
2313  public:
2314   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2315     _cm(cm), _task(task), _is_serial(is_serial) {
2316     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2317   }
2318 
2319   void do_void() {
2320     do {
2321       if (_cm->verbose_high()) {
2322         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2323                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2324       }
2325 
2326       // We call CMTask::do_marking_step() to completely drain the local
2327       // and global marking stacks of entries pushed by the 'keep alive'
2328       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2329       //
2330       // CMTask::do_marking_step() is called in a loop, which we'll exit
2331       // if there's nothing more to do (i.e. we've completely drained the
2332       // entries that were pushed as a a result of applying the 'keep alive'
2333       // closure to the entries on the discovered ref lists) or we overflow
2334       // the global marking stack.
2335       //
2336       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2337       // flag while there may still be some work to do. (See the comment at
2338       // the beginning of CMTask::do_marking_step() for those conditions -
2339       // one of which is reaching the specified time target.) It is only
2340       // when CMTask::do_marking_step() returns without setting the
2341       // has_aborted() flag that the marking step has completed.
2342 
2343       _task->do_marking_step(1000000000.0 /* something very large */,
2344                              true         /* do_termination */,
2345                              _is_serial);
2346     } while (_task->has_aborted() && !_cm->has_overflown());
2347   }
2348 };
2349 
2350 // Implementation of AbstractRefProcTaskExecutor for parallel
2351 // reference processing at the end of G1 concurrent marking
2352 
2353 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2354 private:
2355   G1CollectedHeap* _g1h;
2356   ConcurrentMark*  _cm;
2357   WorkGang*        _workers;
2358   int              _active_workers;
2359 
2360 public:
2361   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2362                         ConcurrentMark* cm,
2363                         WorkGang* workers,
2364                         int n_workers) :
2365     _g1h(g1h), _cm(cm),
2366     _workers(workers), _active_workers(n_workers) { }
2367 
2368   // Executes the given task using concurrent marking worker threads.
2369   virtual void execute(ProcessTask& task);
2370   virtual void execute(EnqueueTask& task);
2371 };
2372 
2373 class G1CMRefProcTaskProxy: public AbstractGangTask {
2374   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2375   ProcessTask&     _proc_task;
2376   G1CollectedHeap* _g1h;
2377   ConcurrentMark*  _cm;
2378 
2379 public:
2380   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2381                      G1CollectedHeap* g1h,
2382                      ConcurrentMark* cm) :
2383     AbstractGangTask("Process reference objects in parallel"),
2384     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2385     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2386     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2387   }
2388 
2389   virtual void work(uint worker_id) {
2390     CMTask* task = _cm->task(worker_id);
2391     G1CMIsAliveClosure g1_is_alive(_g1h);
2392     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2393     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2394 
2395     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2396   }
2397 };
2398 
2399 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2400   assert(_workers != NULL, "Need parallel worker threads.");
2401   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2402 
2403   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2404 
2405   // We need to reset the concurrency level before each
2406   // proxy task execution, so that the termination protocol
2407   // and overflow handling in CMTask::do_marking_step() knows
2408   // how many workers to wait for.
2409   _cm->set_concurrency(_active_workers);
2410   _g1h->set_par_threads(_active_workers);
2411   _workers->run_task(&proc_task_proxy);
2412   _g1h->set_par_threads(0);
2413 }
2414 
2415 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2416   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2417   EnqueueTask& _enq_task;
2418 
2419 public:
2420   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2421     AbstractGangTask("Enqueue reference objects in parallel"),
2422     _enq_task(enq_task) { }
2423 
2424   virtual void work(uint worker_id) {
2425     _enq_task.work(worker_id);
2426   }
2427 };
2428 
2429 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2430   assert(_workers != NULL, "Need parallel worker threads.");
2431   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2432 
2433   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2434 
2435   // Not strictly necessary but...
2436   //
2437   // We need to reset the concurrency level before each
2438   // proxy task execution, so that the termination protocol
2439   // and overflow handling in CMTask::do_marking_step() knows
2440   // how many workers to wait for.
2441   _cm->set_concurrency(_active_workers);
2442   _g1h->set_par_threads(_active_workers);
2443   _workers->run_task(&enq_task_proxy);
2444   _g1h->set_par_threads(0);
2445 }
2446 
2447 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2448   if (has_overflown()) {
2449     // Skip processing the discovered references if we have
2450     // overflown the global marking stack. Reference objects
2451     // only get discovered once so it is OK to not
2452     // de-populate the discovered reference lists. We could have,
2453     // but the only benefit would be that, when marking restarts,
2454     // less reference objects are discovered.
2455     return;
2456   }
2457 
2458   ResourceMark rm;
2459   HandleMark   hm;
2460 
2461   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2462 
2463   // Is alive closure.
2464   G1CMIsAliveClosure g1_is_alive(g1h);
2465 
2466   // Inner scope to exclude the cleaning of the string and symbol
2467   // tables from the displayed time.
2468   {
2469     if (G1Log::finer()) {
2470       gclog_or_tty->put(' ');
2471     }
2472     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2473 
2474     ReferenceProcessor* rp = g1h->ref_processor_cm();
2475 
2476     // See the comment in G1CollectedHeap::ref_processing_init()
2477     // about how reference processing currently works in G1.
2478 
2479     // Set the soft reference policy
2480     rp->setup_policy(clear_all_soft_refs);
2481     assert(_markStack.isEmpty(), "mark stack should be empty");
2482 
2483     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2484     // in serial reference processing. Note these closures are also
2485     // used for serially processing (by the the current thread) the
2486     // JNI references during parallel reference processing.
2487     //
2488     // These closures do not need to synchronize with the worker
2489     // threads involved in parallel reference processing as these
2490     // instances are executed serially by the current thread (e.g.
2491     // reference processing is not multi-threaded and is thus
2492     // performed by the current thread instead of a gang worker).
2493     //
2494     // The gang tasks involved in parallel reference processing create
2495     // their own instances of these closures, which do their own
2496     // synchronization among themselves.
2497     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2498     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2499 
2500     // We need at least one active thread. If reference processing
2501     // is not multi-threaded we use the current (VMThread) thread,
2502     // otherwise we use the work gang from the G1CollectedHeap and
2503     // we utilize all the worker threads we can.
2504     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2505     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2506     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2507 
2508     // Parallel processing task executor.
2509     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2510                                               g1h->workers(), active_workers);
2511     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2512 
2513     // Set the concurrency level. The phase was already set prior to
2514     // executing the remark task.
2515     set_concurrency(active_workers);
2516 
2517     // Set the degree of MT processing here.  If the discovery was done MT,
2518     // the number of threads involved during discovery could differ from
2519     // the number of active workers.  This is OK as long as the discovered
2520     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2521     rp->set_active_mt_degree(active_workers);
2522 
2523     // Process the weak references.
2524     const ReferenceProcessorStats& stats =
2525         rp->process_discovered_references(&g1_is_alive,
2526                                           &g1_keep_alive,
2527                                           &g1_drain_mark_stack,
2528                                           executor,
2529                                           g1h->gc_timer_cm());
2530     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2531 
2532     // The do_oop work routines of the keep_alive and drain_marking_stack
2533     // oop closures will set the has_overflown flag if we overflow the
2534     // global marking stack.
2535 
2536     assert(_markStack.overflow() || _markStack.isEmpty(),
2537             "mark stack should be empty (unless it overflowed)");
2538 
2539     if (_markStack.overflow()) {
2540       // This should have been done already when we tried to push an
2541       // entry on to the global mark stack. But let's do it again.
2542       set_has_overflown();
2543     }
2544 
2545     assert(rp->num_q() == active_workers, "why not");
2546 
2547     rp->enqueue_discovered_references(executor);
2548 
2549     rp->verify_no_references_recorded();
2550     assert(!rp->discovery_enabled(), "Post condition");
2551   }
2552 
2553   if (has_overflown()) {
2554     // We can not trust g1_is_alive if the marking stack overflowed
2555     return;
2556   }
2557 
2558   g1h->unlink_string_and_symbol_table(&g1_is_alive,
2559                                       /* process_strings */ false, // currently strings are always roots
2560                                       /* process_symbols */ true);
2561 }
2562 
2563 void ConcurrentMark::swapMarkBitMaps() {
2564   CMBitMapRO* temp = _prevMarkBitMap;
2565   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2566   _nextMarkBitMap  = (CMBitMap*)  temp;
2567 }
2568 
2569 class CMRemarkTask: public AbstractGangTask {
2570 private:
2571   ConcurrentMark* _cm;
2572   bool            _is_serial;
2573 public:
2574   void work(uint worker_id) {
2575     // Since all available tasks are actually started, we should
2576     // only proceed if we're supposed to be active.
2577     if (worker_id < _cm->active_tasks()) {
2578       CMTask* task = _cm->task(worker_id);
2579       task->record_start_time();
2580       do {
2581         task->do_marking_step(1000000000.0 /* something very large */,
2582                               true         /* do_termination       */,
2583                               _is_serial);
2584       } while (task->has_aborted() && !_cm->has_overflown());
2585       // If we overflow, then we do not want to restart. We instead
2586       // want to abort remark and do concurrent marking again.
2587       task->record_end_time();
2588     }
2589   }
2590 
2591   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2592     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2593     _cm->terminator()->reset_for_reuse(active_workers);
2594   }
2595 };
2596 
2597 void ConcurrentMark::checkpointRootsFinalWork() {
2598   ResourceMark rm;
2599   HandleMark   hm;
2600   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2601 
2602   g1h->ensure_parsability(false);
2603 
2604   if (G1CollectedHeap::use_parallel_gc_threads()) {
2605     G1CollectedHeap::StrongRootsScope srs(g1h);
2606     // this is remark, so we'll use up all active threads
2607     uint active_workers = g1h->workers()->active_workers();
2608     if (active_workers == 0) {
2609       assert(active_workers > 0, "Should have been set earlier");
2610       active_workers = (uint) ParallelGCThreads;
2611       g1h->workers()->set_active_workers(active_workers);
2612     }
2613     set_concurrency_and_phase(active_workers, false /* concurrent */);
2614     // Leave _parallel_marking_threads at it's
2615     // value originally calculated in the ConcurrentMark
2616     // constructor and pass values of the active workers
2617     // through the gang in the task.
2618 
2619     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2620     // We will start all available threads, even if we decide that the
2621     // active_workers will be fewer. The extra ones will just bail out
2622     // immediately.
2623     g1h->set_par_threads(active_workers);
2624     g1h->workers()->run_task(&remarkTask);
2625     g1h->set_par_threads(0);
2626   } else {
2627     G1CollectedHeap::StrongRootsScope srs(g1h);
2628     uint active_workers = 1;
2629     set_concurrency_and_phase(active_workers, false /* concurrent */);
2630 
2631     // Note - if there's no work gang then the VMThread will be
2632     // the thread to execute the remark - serially. We have
2633     // to pass true for the is_serial parameter so that
2634     // CMTask::do_marking_step() doesn't enter the sync
2635     // barriers in the event of an overflow. Doing so will
2636     // cause an assert that the current thread is not a
2637     // concurrent GC thread.
2638     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2639     remarkTask.work(0);
2640   }
2641   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2642   guarantee(has_overflown() ||
2643             satb_mq_set.completed_buffers_num() == 0,
2644             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2645                     BOOL_TO_STR(has_overflown()),
2646                     satb_mq_set.completed_buffers_num()));
2647 
2648   print_stats();
2649 }
2650 
2651 #ifndef PRODUCT
2652 
2653 class PrintReachableOopClosure: public OopClosure {
2654 private:
2655   G1CollectedHeap* _g1h;
2656   outputStream*    _out;
2657   VerifyOption     _vo;
2658   bool             _all;
2659 
2660 public:
2661   PrintReachableOopClosure(outputStream* out,
2662                            VerifyOption  vo,
2663                            bool          all) :
2664     _g1h(G1CollectedHeap::heap()),
2665     _out(out), _vo(vo), _all(all) { }
2666 
2667   void do_oop(narrowOop* p) { do_oop_work(p); }
2668   void do_oop(      oop* p) { do_oop_work(p); }
2669 
2670   template <class T> void do_oop_work(T* p) {
2671     oop         obj = oopDesc::load_decode_heap_oop(p);
2672     const char* str = NULL;
2673     const char* str2 = "";
2674 
2675     if (obj == NULL) {
2676       str = "";
2677     } else if (!_g1h->is_in_g1_reserved(obj)) {
2678       str = " O";
2679     } else {
2680       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2681       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2682       bool marked = _g1h->is_marked(obj, _vo);
2683 
2684       if (over_tams) {
2685         str = " >";
2686         if (marked) {
2687           str2 = " AND MARKED";
2688         }
2689       } else if (marked) {
2690         str = " M";
2691       } else {
2692         str = " NOT";
2693       }
2694     }
2695 
2696     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2697                    p2i(p), p2i((void*) obj), str, str2);
2698   }
2699 };
2700 
2701 class PrintReachableObjectClosure : public ObjectClosure {
2702 private:
2703   G1CollectedHeap* _g1h;
2704   outputStream*    _out;
2705   VerifyOption     _vo;
2706   bool             _all;
2707   HeapRegion*      _hr;
2708 
2709 public:
2710   PrintReachableObjectClosure(outputStream* out,
2711                               VerifyOption  vo,
2712                               bool          all,
2713                               HeapRegion*   hr) :
2714     _g1h(G1CollectedHeap::heap()),
2715     _out(out), _vo(vo), _all(all), _hr(hr) { }
2716 
2717   void do_object(oop o) {
2718     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2719     bool marked = _g1h->is_marked(o, _vo);
2720     bool print_it = _all || over_tams || marked;
2721 
2722     if (print_it) {
2723       _out->print_cr(" "PTR_FORMAT"%s",
2724                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2725       PrintReachableOopClosure oopCl(_out, _vo, _all);
2726       o->oop_iterate_no_header(&oopCl);
2727     }
2728   }
2729 };
2730 
2731 class PrintReachableRegionClosure : public HeapRegionClosure {
2732 private:
2733   G1CollectedHeap* _g1h;
2734   outputStream*    _out;
2735   VerifyOption     _vo;
2736   bool             _all;
2737 
2738 public:
2739   bool doHeapRegion(HeapRegion* hr) {
2740     HeapWord* b = hr->bottom();
2741     HeapWord* e = hr->end();
2742     HeapWord* t = hr->top();
2743     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2744     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2745                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2746     _out->cr();
2747 
2748     HeapWord* from = b;
2749     HeapWord* to   = t;
2750 
2751     if (to > from) {
2752       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2753       _out->cr();
2754       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2755       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2756       _out->cr();
2757     }
2758 
2759     return false;
2760   }
2761 
2762   PrintReachableRegionClosure(outputStream* out,
2763                               VerifyOption  vo,
2764                               bool          all) :
2765     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2766 };
2767 
2768 void ConcurrentMark::print_reachable(const char* str,
2769                                      VerifyOption vo,
2770                                      bool all) {
2771   gclog_or_tty->cr();
2772   gclog_or_tty->print_cr("== Doing heap dump... ");
2773 
2774   if (G1PrintReachableBaseFile == NULL) {
2775     gclog_or_tty->print_cr("  #### error: no base file defined");
2776     return;
2777   }
2778 
2779   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2780       (JVM_MAXPATHLEN - 1)) {
2781     gclog_or_tty->print_cr("  #### error: file name too long");
2782     return;
2783   }
2784 
2785   char file_name[JVM_MAXPATHLEN];
2786   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2787   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2788 
2789   fileStream fout(file_name);
2790   if (!fout.is_open()) {
2791     gclog_or_tty->print_cr("  #### error: could not open file");
2792     return;
2793   }
2794 
2795   outputStream* out = &fout;
2796   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2797   out->cr();
2798 
2799   out->print_cr("--- ITERATING OVER REGIONS");
2800   out->cr();
2801   PrintReachableRegionClosure rcl(out, vo, all);
2802   _g1h->heap_region_iterate(&rcl);
2803   out->cr();
2804 
2805   gclog_or_tty->print_cr("  done");
2806   gclog_or_tty->flush();
2807 }
2808 
2809 #endif // PRODUCT
2810 
2811 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2812   // Note we are overriding the read-only view of the prev map here, via
2813   // the cast.
2814   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2815 }
2816 
2817 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2818   _nextMarkBitMap->clearRange(mr);
2819 }
2820 
2821 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2822   clearRangePrevBitmap(mr);
2823   clearRangeNextBitmap(mr);
2824 }
2825 
2826 HeapRegion*
2827 ConcurrentMark::claim_region(uint worker_id) {
2828   // "checkpoint" the finger
2829   HeapWord* finger = _finger;
2830 
2831   // _heap_end will not change underneath our feet; it only changes at
2832   // yield points.
2833   while (finger < _heap_end) {
2834     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2835 
2836     // Note on how this code handles humongous regions. In the
2837     // normal case the finger will reach the start of a "starts
2838     // humongous" (SH) region. Its end will either be the end of the
2839     // last "continues humongous" (CH) region in the sequence, or the
2840     // standard end of the SH region (if the SH is the only region in
2841     // the sequence). That way claim_region() will skip over the CH
2842     // regions. However, there is a subtle race between a CM thread
2843     // executing this method and a mutator thread doing a humongous
2844     // object allocation. The two are not mutually exclusive as the CM
2845     // thread does not need to hold the Heap_lock when it gets
2846     // here. So there is a chance that claim_region() will come across
2847     // a free region that's in the progress of becoming a SH or a CH
2848     // region. In the former case, it will either
2849     //   a) Miss the update to the region's end, in which case it will
2850     //      visit every subsequent CH region, will find their bitmaps
2851     //      empty, and do nothing, or
2852     //   b) Will observe the update of the region's end (in which case
2853     //      it will skip the subsequent CH regions).
2854     // If it comes across a region that suddenly becomes CH, the
2855     // scenario will be similar to b). So, the race between
2856     // claim_region() and a humongous object allocation might force us
2857     // to do a bit of unnecessary work (due to some unnecessary bitmap
2858     // iterations) but it should not introduce and correctness issues.
2859     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2860     HeapWord*   bottom        = curr_region->bottom();
2861     HeapWord*   end           = curr_region->end();
2862     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2863 
2864     if (verbose_low()) {
2865       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2866                              "["PTR_FORMAT", "PTR_FORMAT"), "
2867                              "limit = "PTR_FORMAT,
2868                              worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2869     }
2870 
2871     // Is the gap between reading the finger and doing the CAS too long?
2872     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2873     if (res == finger) {
2874       // we succeeded
2875 
2876       // notice that _finger == end cannot be guaranteed here since,
2877       // someone else might have moved the finger even further
2878       assert(_finger >= end, "the finger should have moved forward");
2879 
2880       if (verbose_low()) {
2881         gclog_or_tty->print_cr("[%u] we were successful with region = "
2882                                PTR_FORMAT, worker_id, p2i(curr_region));
2883       }
2884 
2885       if (limit > bottom) {
2886         if (verbose_low()) {
2887           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2888                                  "returning it ", worker_id, p2i(curr_region));
2889         }
2890         return curr_region;
2891       } else {
2892         assert(limit == bottom,
2893                "the region limit should be at bottom");
2894         if (verbose_low()) {
2895           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2896                                  "returning NULL", worker_id, p2i(curr_region));
2897         }
2898         // we return NULL and the caller should try calling
2899         // claim_region() again.
2900         return NULL;
2901       }
2902     } else {
2903       assert(_finger > finger, "the finger should have moved forward");
2904       if (verbose_low()) {
2905         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2906                                "global finger = "PTR_FORMAT", "
2907                                "our finger = "PTR_FORMAT,
2908                                worker_id, p2i(_finger), p2i(finger));
2909       }
2910 
2911       // read it again
2912       finger = _finger;
2913     }
2914   }
2915 
2916   return NULL;
2917 }
2918 
2919 #ifndef PRODUCT
2920 enum VerifyNoCSetOopsPhase {
2921   VerifyNoCSetOopsStack,
2922   VerifyNoCSetOopsQueues,
2923   VerifyNoCSetOopsSATBCompleted,
2924   VerifyNoCSetOopsSATBThread
2925 };
2926 
2927 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2928 private:
2929   G1CollectedHeap* _g1h;
2930   VerifyNoCSetOopsPhase _phase;
2931   int _info;
2932 
2933   const char* phase_str() {
2934     switch (_phase) {
2935     case VerifyNoCSetOopsStack:         return "Stack";
2936     case VerifyNoCSetOopsQueues:        return "Queue";
2937     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2938     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2939     default:                            ShouldNotReachHere();
2940     }
2941     return NULL;
2942   }
2943 
2944   void do_object_work(oop obj) {
2945     guarantee(!_g1h->obj_in_cs(obj),
2946               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2947                       p2i((void*) obj), phase_str(), _info));
2948   }
2949 
2950 public:
2951   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2952 
2953   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2954     _phase = phase;
2955     _info = info;
2956   }
2957 
2958   virtual void do_oop(oop* p) {
2959     oop obj = oopDesc::load_decode_heap_oop(p);
2960     do_object_work(obj);
2961   }
2962 
2963   virtual void do_oop(narrowOop* p) {
2964     // We should not come across narrow oops while scanning marking
2965     // stacks and SATB buffers.
2966     ShouldNotReachHere();
2967   }
2968 
2969   virtual void do_object(oop obj) {
2970     do_object_work(obj);
2971   }
2972 };
2973 
2974 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2975                                          bool verify_enqueued_buffers,
2976                                          bool verify_thread_buffers,
2977                                          bool verify_fingers) {
2978   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2979   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2980     return;
2981   }
2982 
2983   VerifyNoCSetOopsClosure cl;
2984 
2985   if (verify_stacks) {
2986     // Verify entries on the global mark stack
2987     cl.set_phase(VerifyNoCSetOopsStack);
2988     _markStack.oops_do(&cl);
2989 
2990     // Verify entries on the task queues
2991     for (uint i = 0; i < _max_worker_id; i += 1) {
2992       cl.set_phase(VerifyNoCSetOopsQueues, i);
2993       CMTaskQueue* queue = _task_queues->queue(i);
2994       queue->oops_do(&cl);
2995     }
2996   }
2997 
2998   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2999 
3000   // Verify entries on the enqueued SATB buffers
3001   if (verify_enqueued_buffers) {
3002     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3003     satb_qs.iterate_completed_buffers_read_only(&cl);
3004   }
3005 
3006   // Verify entries on the per-thread SATB buffers
3007   if (verify_thread_buffers) {
3008     cl.set_phase(VerifyNoCSetOopsSATBThread);
3009     satb_qs.iterate_thread_buffers_read_only(&cl);
3010   }
3011 
3012   if (verify_fingers) {
3013     // Verify the global finger
3014     HeapWord* global_finger = finger();
3015     if (global_finger != NULL && global_finger < _heap_end) {
3016       // The global finger always points to a heap region boundary. We
3017       // use heap_region_containing_raw() to get the containing region
3018       // given that the global finger could be pointing to a free region
3019       // which subsequently becomes continues humongous. If that
3020       // happens, heap_region_containing() will return the bottom of the
3021       // corresponding starts humongous region and the check below will
3022       // not hold any more.
3023       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3024       guarantee(global_finger == global_hr->bottom(),
3025                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3026                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3027     }
3028 
3029     // Verify the task fingers
3030     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3031     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3032       CMTask* task = _tasks[i];
3033       HeapWord* task_finger = task->finger();
3034       if (task_finger != NULL && task_finger < _heap_end) {
3035         // See above note on the global finger verification.
3036         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3037         guarantee(task_finger == task_hr->bottom() ||
3038                   !task_hr->in_collection_set(),
3039                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3040                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3041       }
3042     }
3043   }
3044 }
3045 #endif // PRODUCT
3046 
3047 // Aggregate the counting data that was constructed concurrently
3048 // with marking.
3049 class AggregateCountDataHRClosure: public HeapRegionClosure {
3050   G1CollectedHeap* _g1h;
3051   ConcurrentMark* _cm;
3052   CardTableModRefBS* _ct_bs;
3053   BitMap* _cm_card_bm;
3054   uint _max_worker_id;
3055 
3056  public:
3057   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3058                               BitMap* cm_card_bm,
3059                               uint max_worker_id) :
3060     _g1h(g1h), _cm(g1h->concurrent_mark()),
3061     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3062     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3063 
3064   bool doHeapRegion(HeapRegion* hr) {
3065     if (hr->continuesHumongous()) {
3066       // We will ignore these here and process them when their
3067       // associated "starts humongous" region is processed.
3068       // Note that we cannot rely on their associated
3069       // "starts humongous" region to have their bit set to 1
3070       // since, due to the region chunking in the parallel region
3071       // iteration, a "continues humongous" region might be visited
3072       // before its associated "starts humongous".
3073       return false;
3074     }
3075 
3076     HeapWord* start = hr->bottom();
3077     HeapWord* limit = hr->next_top_at_mark_start();
3078     HeapWord* end = hr->end();
3079 
3080     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3081            err_msg("Preconditions not met - "
3082                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3083                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3084                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3085 
3086     assert(hr->next_marked_bytes() == 0, "Precondition");
3087 
3088     if (start == limit) {
3089       // NTAMS of this region has not been set so nothing to do.
3090       return false;
3091     }
3092 
3093     // 'start' should be in the heap.
3094     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3095     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3096     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3097 
3098     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3099     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3100     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3101 
3102     // If ntams is not card aligned then we bump card bitmap index
3103     // for limit so that we get the all the cards spanned by
3104     // the object ending at ntams.
3105     // Note: if this is the last region in the heap then ntams
3106     // could be actually just beyond the end of the the heap;
3107     // limit_idx will then  correspond to a (non-existent) card
3108     // that is also outside the heap.
3109     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3110       limit_idx += 1;
3111     }
3112 
3113     assert(limit_idx <= end_idx, "or else use atomics");
3114 
3115     // Aggregate the "stripe" in the count data associated with hr.
3116     uint hrs_index = hr->hrs_index();
3117     size_t marked_bytes = 0;
3118 
3119     for (uint i = 0; i < _max_worker_id; i += 1) {
3120       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3121       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3122 
3123       // Fetch the marked_bytes in this region for task i and
3124       // add it to the running total for this region.
3125       marked_bytes += marked_bytes_array[hrs_index];
3126 
3127       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3128       // into the global card bitmap.
3129       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3130 
3131       while (scan_idx < limit_idx) {
3132         assert(task_card_bm->at(scan_idx) == true, "should be");
3133         _cm_card_bm->set_bit(scan_idx);
3134         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3135 
3136         // BitMap::get_next_one_offset() can handle the case when
3137         // its left_offset parameter is greater than its right_offset
3138         // parameter. It does, however, have an early exit if
3139         // left_offset == right_offset. So let's limit the value
3140         // passed in for left offset here.
3141         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3142         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3143       }
3144     }
3145 
3146     // Update the marked bytes for this region.
3147     hr->add_to_marked_bytes(marked_bytes);
3148 
3149     // Next heap region
3150     return false;
3151   }
3152 };
3153 
3154 class G1AggregateCountDataTask: public AbstractGangTask {
3155 protected:
3156   G1CollectedHeap* _g1h;
3157   ConcurrentMark* _cm;
3158   BitMap* _cm_card_bm;
3159   uint _max_worker_id;
3160   int _active_workers;
3161 
3162 public:
3163   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3164                            ConcurrentMark* cm,
3165                            BitMap* cm_card_bm,
3166                            uint max_worker_id,
3167                            int n_workers) :
3168     AbstractGangTask("Count Aggregation"),
3169     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3170     _max_worker_id(max_worker_id),
3171     _active_workers(n_workers) { }
3172 
3173   void work(uint worker_id) {
3174     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3175 
3176     if (G1CollectedHeap::use_parallel_gc_threads()) {
3177       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3178                                             _active_workers,
3179                                             HeapRegion::AggregateCountClaimValue);
3180     } else {
3181       _g1h->heap_region_iterate(&cl);
3182     }
3183   }
3184 };
3185 
3186 
3187 void ConcurrentMark::aggregate_count_data() {
3188   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3189                         _g1h->workers()->active_workers() :
3190                         1);
3191 
3192   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3193                                            _max_worker_id, n_workers);
3194 
3195   if (G1CollectedHeap::use_parallel_gc_threads()) {
3196     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3197            "sanity check");
3198     _g1h->set_par_threads(n_workers);
3199     _g1h->workers()->run_task(&g1_par_agg_task);
3200     _g1h->set_par_threads(0);
3201 
3202     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3203            "sanity check");
3204     _g1h->reset_heap_region_claim_values();
3205   } else {
3206     g1_par_agg_task.work(0);
3207   }
3208 }
3209 
3210 // Clear the per-worker arrays used to store the per-region counting data
3211 void ConcurrentMark::clear_all_count_data() {
3212   // Clear the global card bitmap - it will be filled during
3213   // liveness count aggregation (during remark) and the
3214   // final counting task.
3215   _card_bm.clear();
3216 
3217   // Clear the global region bitmap - it will be filled as part
3218   // of the final counting task.
3219   _region_bm.clear();
3220 
3221   uint max_regions = _g1h->max_regions();
3222   assert(_max_worker_id > 0, "uninitialized");
3223 
3224   for (uint i = 0; i < _max_worker_id; i += 1) {
3225     BitMap* task_card_bm = count_card_bitmap_for(i);
3226     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3227 
3228     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3229     assert(marked_bytes_array != NULL, "uninitialized");
3230 
3231     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3232     task_card_bm->clear();
3233   }
3234 }
3235 
3236 void ConcurrentMark::print_stats() {
3237   if (verbose_stats()) {
3238     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3239     for (size_t i = 0; i < _active_tasks; ++i) {
3240       _tasks[i]->print_stats();
3241       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3242     }
3243   }
3244 }
3245 
3246 // abandon current marking iteration due to a Full GC
3247 void ConcurrentMark::abort() {
3248   // Clear all marks to force marking thread to do nothing
3249   _nextMarkBitMap->clearAll();
3250 
3251   // Note we cannot clear the previous marking bitmap here
3252   // since VerifyDuringGC verifies the objects marked during
3253   // a full GC against the previous bitmap.
3254 
3255   // Clear the liveness counting data
3256   clear_all_count_data();
3257   // Empty mark stack
3258   reset_marking_state();
3259   for (uint i = 0; i < _max_worker_id; ++i) {
3260     _tasks[i]->clear_region_fields();
3261   }
3262   _first_overflow_barrier_sync.abort();
3263   _second_overflow_barrier_sync.abort();
3264   _has_aborted = true;
3265 
3266   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3267   satb_mq_set.abandon_partial_marking();
3268   // This can be called either during or outside marking, we'll read
3269   // the expected_active value from the SATB queue set.
3270   satb_mq_set.set_active_all_threads(
3271                                  false, /* new active value */
3272                                  satb_mq_set.is_active() /* expected_active */);
3273 
3274   _g1h->trace_heap_after_concurrent_cycle();
3275 }
3276 
3277 static void print_ms_time_info(const char* prefix, const char* name,
3278                                NumberSeq& ns) {
3279   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3280                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3281   if (ns.num() > 0) {
3282     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3283                            prefix, ns.sd(), ns.maximum());
3284   }
3285 }
3286 
3287 void ConcurrentMark::print_summary_info() {
3288   gclog_or_tty->print_cr(" Concurrent marking:");
3289   print_ms_time_info("  ", "init marks", _init_times);
3290   print_ms_time_info("  ", "remarks", _remark_times);
3291   {
3292     print_ms_time_info("     ", "final marks", _remark_mark_times);
3293     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3294 
3295   }
3296   print_ms_time_info("  ", "cleanups", _cleanup_times);
3297   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3298                          _total_counting_time,
3299                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3300                           (double)_cleanup_times.num()
3301                          : 0.0));
3302   if (G1ScrubRemSets) {
3303     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3304                            _total_rs_scrub_time,
3305                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3306                             (double)_cleanup_times.num()
3307                            : 0.0));
3308   }
3309   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3310                          (_init_times.sum() + _remark_times.sum() +
3311                           _cleanup_times.sum())/1000.0);
3312   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3313                 "(%8.2f s marking).",
3314                 cmThread()->vtime_accum(),
3315                 cmThread()->vtime_mark_accum());
3316 }
3317 
3318 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3319   if (use_parallel_marking_threads()) {
3320     _parallel_workers->print_worker_threads_on(st);
3321   }
3322 }
3323 
3324 void ConcurrentMark::print_on_error(outputStream* st) const {
3325   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3326       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3327   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3328   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3329 }
3330 
3331 // We take a break if someone is trying to stop the world.
3332 bool ConcurrentMark::do_yield_check(uint worker_id) {
3333   if (SuspendibleThreadSet::should_yield()) {
3334     if (worker_id == 0) {
3335       _g1h->g1_policy()->record_concurrent_pause();
3336     }
3337     SuspendibleThreadSet::yield();
3338     return true;
3339   } else {
3340     return false;
3341   }
3342 }
3343 
3344 bool ConcurrentMark::containing_card_is_marked(void* p) {
3345   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3346   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3347 }
3348 
3349 bool ConcurrentMark::containing_cards_are_marked(void* start,
3350                                                  void* last) {
3351   return containing_card_is_marked(start) &&
3352          containing_card_is_marked(last);
3353 }
3354 
3355 #ifndef PRODUCT
3356 // for debugging purposes
3357 void ConcurrentMark::print_finger() {
3358   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3359                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3360   for (uint i = 0; i < _max_worker_id; ++i) {
3361     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3362   }
3363   gclog_or_tty->cr();
3364 }
3365 #endif
3366 
3367 void CMTask::scan_object(oop obj) {
3368   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3369 
3370   if (_cm->verbose_high()) {
3371     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3372                            _worker_id, p2i((void*) obj));
3373   }
3374 
3375   size_t obj_size = obj->size();
3376   _words_scanned += obj_size;
3377 
3378   obj->oop_iterate(_cm_oop_closure);
3379   statsOnly( ++_objs_scanned );
3380   check_limits();
3381 }
3382 
3383 // Closure for iteration over bitmaps
3384 class CMBitMapClosure : public BitMapClosure {
3385 private:
3386   // the bitmap that is being iterated over
3387   CMBitMap*                   _nextMarkBitMap;
3388   ConcurrentMark*             _cm;
3389   CMTask*                     _task;
3390 
3391 public:
3392   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3393     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3394 
3395   bool do_bit(size_t offset) {
3396     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3397     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3398     assert( addr < _cm->finger(), "invariant");
3399 
3400     statsOnly( _task->increase_objs_found_on_bitmap() );
3401     assert(addr >= _task->finger(), "invariant");
3402 
3403     // We move that task's local finger along.
3404     _task->move_finger_to(addr);
3405 
3406     _task->scan_object(oop(addr));
3407     // we only partially drain the local queue and global stack
3408     _task->drain_local_queue(true);
3409     _task->drain_global_stack(true);
3410 
3411     // if the has_aborted flag has been raised, we need to bail out of
3412     // the iteration
3413     return !_task->has_aborted();
3414   }
3415 };
3416 
3417 // Closure for iterating over objects, currently only used for
3418 // processing SATB buffers.
3419 class CMObjectClosure : public ObjectClosure {
3420 private:
3421   CMTask* _task;
3422 
3423 public:
3424   void do_object(oop obj) {
3425     _task->deal_with_reference(obj);
3426   }
3427 
3428   CMObjectClosure(CMTask* task) : _task(task) { }
3429 };
3430 
3431 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3432                                ConcurrentMark* cm,
3433                                CMTask* task)
3434   : _g1h(g1h), _cm(cm), _task(task) {
3435   assert(_ref_processor == NULL, "should be initialized to NULL");
3436 
3437   if (G1UseConcMarkReferenceProcessing) {
3438     _ref_processor = g1h->ref_processor_cm();
3439     assert(_ref_processor != NULL, "should not be NULL");
3440   }
3441 }
3442 
3443 void CMTask::setup_for_region(HeapRegion* hr) {
3444   assert(hr != NULL,
3445         "claim_region() should have filtered out NULL regions");
3446   assert(!hr->continuesHumongous(),
3447         "claim_region() should have filtered out continues humongous regions");
3448 
3449   if (_cm->verbose_low()) {
3450     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3451                            _worker_id, p2i(hr));
3452   }
3453 
3454   _curr_region  = hr;
3455   _finger       = hr->bottom();
3456   update_region_limit();
3457 }
3458 
3459 void CMTask::update_region_limit() {
3460   HeapRegion* hr            = _curr_region;
3461   HeapWord* bottom          = hr->bottom();
3462   HeapWord* limit           = hr->next_top_at_mark_start();
3463 
3464   if (limit == bottom) {
3465     if (_cm->verbose_low()) {
3466       gclog_or_tty->print_cr("[%u] found an empty region "
3467                              "["PTR_FORMAT", "PTR_FORMAT")",
3468                              _worker_id, p2i(bottom), p2i(limit));
3469     }
3470     // The region was collected underneath our feet.
3471     // We set the finger to bottom to ensure that the bitmap
3472     // iteration that will follow this will not do anything.
3473     // (this is not a condition that holds when we set the region up,
3474     // as the region is not supposed to be empty in the first place)
3475     _finger = bottom;
3476   } else if (limit >= _region_limit) {
3477     assert(limit >= _finger, "peace of mind");
3478   } else {
3479     assert(limit < _region_limit, "only way to get here");
3480     // This can happen under some pretty unusual circumstances.  An
3481     // evacuation pause empties the region underneath our feet (NTAMS
3482     // at bottom). We then do some allocation in the region (NTAMS
3483     // stays at bottom), followed by the region being used as a GC
3484     // alloc region (NTAMS will move to top() and the objects
3485     // originally below it will be grayed). All objects now marked in
3486     // the region are explicitly grayed, if below the global finger,
3487     // and we do not need in fact to scan anything else. So, we simply
3488     // set _finger to be limit to ensure that the bitmap iteration
3489     // doesn't do anything.
3490     _finger = limit;
3491   }
3492 
3493   _region_limit = limit;
3494 }
3495 
3496 void CMTask::giveup_current_region() {
3497   assert(_curr_region != NULL, "invariant");
3498   if (_cm->verbose_low()) {
3499     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3500                            _worker_id, p2i(_curr_region));
3501   }
3502   clear_region_fields();
3503 }
3504 
3505 void CMTask::clear_region_fields() {
3506   // Values for these three fields that indicate that we're not
3507   // holding on to a region.
3508   _curr_region   = NULL;
3509   _finger        = NULL;
3510   _region_limit  = NULL;
3511 }
3512 
3513 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3514   if (cm_oop_closure == NULL) {
3515     assert(_cm_oop_closure != NULL, "invariant");
3516   } else {
3517     assert(_cm_oop_closure == NULL, "invariant");
3518   }
3519   _cm_oop_closure = cm_oop_closure;
3520 }
3521 
3522 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3523   guarantee(nextMarkBitMap != NULL, "invariant");
3524 
3525   if (_cm->verbose_low()) {
3526     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3527   }
3528 
3529   _nextMarkBitMap                = nextMarkBitMap;
3530   clear_region_fields();
3531 
3532   _calls                         = 0;
3533   _elapsed_time_ms               = 0.0;
3534   _termination_time_ms           = 0.0;
3535   _termination_start_time_ms     = 0.0;
3536 
3537 #if _MARKING_STATS_
3538   _local_pushes                  = 0;
3539   _local_pops                    = 0;
3540   _local_max_size                = 0;
3541   _objs_scanned                  = 0;
3542   _global_pushes                 = 0;
3543   _global_pops                   = 0;
3544   _global_max_size               = 0;
3545   _global_transfers_to           = 0;
3546   _global_transfers_from         = 0;
3547   _regions_claimed               = 0;
3548   _objs_found_on_bitmap          = 0;
3549   _satb_buffers_processed        = 0;
3550   _steal_attempts                = 0;
3551   _steals                        = 0;
3552   _aborted                       = 0;
3553   _aborted_overflow              = 0;
3554   _aborted_cm_aborted            = 0;
3555   _aborted_yield                 = 0;
3556   _aborted_timed_out             = 0;
3557   _aborted_satb                  = 0;
3558   _aborted_termination           = 0;
3559 #endif // _MARKING_STATS_
3560 }
3561 
3562 bool CMTask::should_exit_termination() {
3563   regular_clock_call();
3564   // This is called when we are in the termination protocol. We should
3565   // quit if, for some reason, this task wants to abort or the global
3566   // stack is not empty (this means that we can get work from it).
3567   return !_cm->mark_stack_empty() || has_aborted();
3568 }
3569 
3570 void CMTask::reached_limit() {
3571   assert(_words_scanned >= _words_scanned_limit ||
3572          _refs_reached >= _refs_reached_limit ,
3573          "shouldn't have been called otherwise");
3574   regular_clock_call();
3575 }
3576 
3577 void CMTask::regular_clock_call() {
3578   if (has_aborted()) return;
3579 
3580   // First, we need to recalculate the words scanned and refs reached
3581   // limits for the next clock call.
3582   recalculate_limits();
3583 
3584   // During the regular clock call we do the following
3585 
3586   // (1) If an overflow has been flagged, then we abort.
3587   if (_cm->has_overflown()) {
3588     set_has_aborted();
3589     return;
3590   }
3591 
3592   // If we are not concurrent (i.e. we're doing remark) we don't need
3593   // to check anything else. The other steps are only needed during
3594   // the concurrent marking phase.
3595   if (!concurrent()) return;
3596 
3597   // (2) If marking has been aborted for Full GC, then we also abort.
3598   if (_cm->has_aborted()) {
3599     set_has_aborted();
3600     statsOnly( ++_aborted_cm_aborted );
3601     return;
3602   }
3603 
3604   double curr_time_ms = os::elapsedVTime() * 1000.0;
3605 
3606   // (3) If marking stats are enabled, then we update the step history.
3607 #if _MARKING_STATS_
3608   if (_words_scanned >= _words_scanned_limit) {
3609     ++_clock_due_to_scanning;
3610   }
3611   if (_refs_reached >= _refs_reached_limit) {
3612     ++_clock_due_to_marking;
3613   }
3614 
3615   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3616   _interval_start_time_ms = curr_time_ms;
3617   _all_clock_intervals_ms.add(last_interval_ms);
3618 
3619   if (_cm->verbose_medium()) {
3620       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3621                         "scanned = %d%s, refs reached = %d%s",
3622                         _worker_id, last_interval_ms,
3623                         _words_scanned,
3624                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3625                         _refs_reached,
3626                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3627   }
3628 #endif // _MARKING_STATS_
3629 
3630   // (4) We check whether we should yield. If we have to, then we abort.
3631   if (SuspendibleThreadSet::should_yield()) {
3632     // We should yield. To do this we abort the task. The caller is
3633     // responsible for yielding.
3634     set_has_aborted();
3635     statsOnly( ++_aborted_yield );
3636     return;
3637   }
3638 
3639   // (5) We check whether we've reached our time quota. If we have,
3640   // then we abort.
3641   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3642   if (elapsed_time_ms > _time_target_ms) {
3643     set_has_aborted();
3644     _has_timed_out = true;
3645     statsOnly( ++_aborted_timed_out );
3646     return;
3647   }
3648 
3649   // (6) Finally, we check whether there are enough completed STAB
3650   // buffers available for processing. If there are, we abort.
3651   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3652   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3653     if (_cm->verbose_low()) {
3654       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3655                              _worker_id);
3656     }
3657     // we do need to process SATB buffers, we'll abort and restart
3658     // the marking task to do so
3659     set_has_aborted();
3660     statsOnly( ++_aborted_satb );
3661     return;
3662   }
3663 }
3664 
3665 void CMTask::recalculate_limits() {
3666   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3667   _words_scanned_limit      = _real_words_scanned_limit;
3668 
3669   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3670   _refs_reached_limit       = _real_refs_reached_limit;
3671 }
3672 
3673 void CMTask::decrease_limits() {
3674   // This is called when we believe that we're going to do an infrequent
3675   // operation which will increase the per byte scanned cost (i.e. move
3676   // entries to/from the global stack). It basically tries to decrease the
3677   // scanning limit so that the clock is called earlier.
3678 
3679   if (_cm->verbose_medium()) {
3680     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3681   }
3682 
3683   _words_scanned_limit = _real_words_scanned_limit -
3684     3 * words_scanned_period / 4;
3685   _refs_reached_limit  = _real_refs_reached_limit -
3686     3 * refs_reached_period / 4;
3687 }
3688 
3689 void CMTask::move_entries_to_global_stack() {
3690   // local array where we'll store the entries that will be popped
3691   // from the local queue
3692   oop buffer[global_stack_transfer_size];
3693 
3694   int n = 0;
3695   oop obj;
3696   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3697     buffer[n] = obj;
3698     ++n;
3699   }
3700 
3701   if (n > 0) {
3702     // we popped at least one entry from the local queue
3703 
3704     statsOnly( ++_global_transfers_to; _local_pops += n );
3705 
3706     if (!_cm->mark_stack_push(buffer, n)) {
3707       if (_cm->verbose_low()) {
3708         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3709                                _worker_id);
3710       }
3711       set_has_aborted();
3712     } else {
3713       // the transfer was successful
3714 
3715       if (_cm->verbose_medium()) {
3716         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3717                                _worker_id, n);
3718       }
3719       statsOnly( int tmp_size = _cm->mark_stack_size();
3720                  if (tmp_size > _global_max_size) {
3721                    _global_max_size = tmp_size;
3722                  }
3723                  _global_pushes += n );
3724     }
3725   }
3726 
3727   // this operation was quite expensive, so decrease the limits
3728   decrease_limits();
3729 }
3730 
3731 void CMTask::get_entries_from_global_stack() {
3732   // local array where we'll store the entries that will be popped
3733   // from the global stack.
3734   oop buffer[global_stack_transfer_size];
3735   int n;
3736   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3737   assert(n <= global_stack_transfer_size,
3738          "we should not pop more than the given limit");
3739   if (n > 0) {
3740     // yes, we did actually pop at least one entry
3741 
3742     statsOnly( ++_global_transfers_from; _global_pops += n );
3743     if (_cm->verbose_medium()) {
3744       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3745                              _worker_id, n);
3746     }
3747     for (int i = 0; i < n; ++i) {
3748       bool success = _task_queue->push(buffer[i]);
3749       // We only call this when the local queue is empty or under a
3750       // given target limit. So, we do not expect this push to fail.
3751       assert(success, "invariant");
3752     }
3753 
3754     statsOnly( int tmp_size = _task_queue->size();
3755                if (tmp_size > _local_max_size) {
3756                  _local_max_size = tmp_size;
3757                }
3758                _local_pushes += n );
3759   }
3760 
3761   // this operation was quite expensive, so decrease the limits
3762   decrease_limits();
3763 }
3764 
3765 void CMTask::drain_local_queue(bool partially) {
3766   if (has_aborted()) return;
3767 
3768   // Decide what the target size is, depending whether we're going to
3769   // drain it partially (so that other tasks can steal if they run out
3770   // of things to do) or totally (at the very end).
3771   size_t target_size;
3772   if (partially) {
3773     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3774   } else {
3775     target_size = 0;
3776   }
3777 
3778   if (_task_queue->size() > target_size) {
3779     if (_cm->verbose_high()) {
3780       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3781                              _worker_id, target_size);
3782     }
3783 
3784     oop obj;
3785     bool ret = _task_queue->pop_local(obj);
3786     while (ret) {
3787       statsOnly( ++_local_pops );
3788 
3789       if (_cm->verbose_high()) {
3790         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3791                                p2i((void*) obj));
3792       }
3793 
3794       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3795       assert(!_g1h->is_on_master_free_list(
3796                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3797 
3798       scan_object(obj);
3799 
3800       if (_task_queue->size() <= target_size || has_aborted()) {
3801         ret = false;
3802       } else {
3803         ret = _task_queue->pop_local(obj);
3804       }
3805     }
3806 
3807     if (_cm->verbose_high()) {
3808       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3809                              _worker_id, _task_queue->size());
3810     }
3811   }
3812 }
3813 
3814 void CMTask::drain_global_stack(bool partially) {
3815   if (has_aborted()) return;
3816 
3817   // We have a policy to drain the local queue before we attempt to
3818   // drain the global stack.
3819   assert(partially || _task_queue->size() == 0, "invariant");
3820 
3821   // Decide what the target size is, depending whether we're going to
3822   // drain it partially (so that other tasks can steal if they run out
3823   // of things to do) or totally (at the very end).  Notice that,
3824   // because we move entries from the global stack in chunks or
3825   // because another task might be doing the same, we might in fact
3826   // drop below the target. But, this is not a problem.
3827   size_t target_size;
3828   if (partially) {
3829     target_size = _cm->partial_mark_stack_size_target();
3830   } else {
3831     target_size = 0;
3832   }
3833 
3834   if (_cm->mark_stack_size() > target_size) {
3835     if (_cm->verbose_low()) {
3836       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3837                              _worker_id, target_size);
3838     }
3839 
3840     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3841       get_entries_from_global_stack();
3842       drain_local_queue(partially);
3843     }
3844 
3845     if (_cm->verbose_low()) {
3846       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3847                              _worker_id, _cm->mark_stack_size());
3848     }
3849   }
3850 }
3851 
3852 // SATB Queue has several assumptions on whether to call the par or
3853 // non-par versions of the methods. this is why some of the code is
3854 // replicated. We should really get rid of the single-threaded version
3855 // of the code to simplify things.
3856 void CMTask::drain_satb_buffers() {
3857   if (has_aborted()) return;
3858 
3859   // We set this so that the regular clock knows that we're in the
3860   // middle of draining buffers and doesn't set the abort flag when it
3861   // notices that SATB buffers are available for draining. It'd be
3862   // very counter productive if it did that. :-)
3863   _draining_satb_buffers = true;
3864 
3865   CMObjectClosure oc(this);
3866   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3867   if (G1CollectedHeap::use_parallel_gc_threads()) {
3868     satb_mq_set.set_par_closure(_worker_id, &oc);
3869   } else {
3870     satb_mq_set.set_closure(&oc);
3871   }
3872 
3873   // This keeps claiming and applying the closure to completed buffers
3874   // until we run out of buffers or we need to abort.
3875   if (G1CollectedHeap::use_parallel_gc_threads()) {
3876     while (!has_aborted() &&
3877            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3878       if (_cm->verbose_medium()) {
3879         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3880       }
3881       statsOnly( ++_satb_buffers_processed );
3882       regular_clock_call();
3883     }
3884   } else {
3885     while (!has_aborted() &&
3886            satb_mq_set.apply_closure_to_completed_buffer()) {
3887       if (_cm->verbose_medium()) {
3888         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3889       }
3890       statsOnly( ++_satb_buffers_processed );
3891       regular_clock_call();
3892     }
3893   }
3894 
3895   if (!concurrent() && !has_aborted()) {
3896     // We should only do this during remark.
3897     if (G1CollectedHeap::use_parallel_gc_threads()) {
3898       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3899     } else {
3900       satb_mq_set.iterate_closure_all_threads();
3901     }
3902   }
3903 
3904   _draining_satb_buffers = false;
3905 
3906   assert(has_aborted() ||
3907          concurrent() ||
3908          satb_mq_set.completed_buffers_num() == 0, "invariant");
3909 
3910   if (G1CollectedHeap::use_parallel_gc_threads()) {
3911     satb_mq_set.set_par_closure(_worker_id, NULL);
3912   } else {
3913     satb_mq_set.set_closure(NULL);
3914   }
3915 
3916   // again, this was a potentially expensive operation, decrease the
3917   // limits to get the regular clock call early
3918   decrease_limits();
3919 }
3920 
3921 void CMTask::print_stats() {
3922   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3923                          _worker_id, _calls);
3924   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3925                          _elapsed_time_ms, _termination_time_ms);
3926   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3927                          _step_times_ms.num(), _step_times_ms.avg(),
3928                          _step_times_ms.sd());
3929   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3930                          _step_times_ms.maximum(), _step_times_ms.sum());
3931 
3932 #if _MARKING_STATS_
3933   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3934                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3935                          _all_clock_intervals_ms.sd());
3936   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3937                          _all_clock_intervals_ms.maximum(),
3938                          _all_clock_intervals_ms.sum());
3939   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3940                          _clock_due_to_scanning, _clock_due_to_marking);
3941   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3942                          _objs_scanned, _objs_found_on_bitmap);
3943   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3944                          _local_pushes, _local_pops, _local_max_size);
3945   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3946                          _global_pushes, _global_pops, _global_max_size);
3947   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3948                          _global_transfers_to,_global_transfers_from);
3949   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3950   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3951   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3952                          _steal_attempts, _steals);
3953   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3954   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3955                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3956   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3957                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3958 #endif // _MARKING_STATS_
3959 }
3960 
3961 /*****************************************************************************
3962 
3963     The do_marking_step(time_target_ms, ...) method is the building
3964     block of the parallel marking framework. It can be called in parallel
3965     with other invocations of do_marking_step() on different tasks
3966     (but only one per task, obviously) and concurrently with the
3967     mutator threads, or during remark, hence it eliminates the need
3968     for two versions of the code. When called during remark, it will
3969     pick up from where the task left off during the concurrent marking
3970     phase. Interestingly, tasks are also claimable during evacuation
3971     pauses too, since do_marking_step() ensures that it aborts before
3972     it needs to yield.
3973 
3974     The data structures that it uses to do marking work are the
3975     following:
3976 
3977       (1) Marking Bitmap. If there are gray objects that appear only
3978       on the bitmap (this happens either when dealing with an overflow
3979       or when the initial marking phase has simply marked the roots
3980       and didn't push them on the stack), then tasks claim heap
3981       regions whose bitmap they then scan to find gray objects. A
3982       global finger indicates where the end of the last claimed region
3983       is. A local finger indicates how far into the region a task has
3984       scanned. The two fingers are used to determine how to gray an
3985       object (i.e. whether simply marking it is OK, as it will be
3986       visited by a task in the future, or whether it needs to be also
3987       pushed on a stack).
3988 
3989       (2) Local Queue. The local queue of the task which is accessed
3990       reasonably efficiently by the task. Other tasks can steal from
3991       it when they run out of work. Throughout the marking phase, a
3992       task attempts to keep its local queue short but not totally
3993       empty, so that entries are available for stealing by other
3994       tasks. Only when there is no more work, a task will totally
3995       drain its local queue.
3996 
3997       (3) Global Mark Stack. This handles local queue overflow. During
3998       marking only sets of entries are moved between it and the local
3999       queues, as access to it requires a mutex and more fine-grain
4000       interaction with it which might cause contention. If it
4001       overflows, then the marking phase should restart and iterate
4002       over the bitmap to identify gray objects. Throughout the marking
4003       phase, tasks attempt to keep the global mark stack at a small
4004       length but not totally empty, so that entries are available for
4005       popping by other tasks. Only when there is no more work, tasks
4006       will totally drain the global mark stack.
4007 
4008       (4) SATB Buffer Queue. This is where completed SATB buffers are
4009       made available. Buffers are regularly removed from this queue
4010       and scanned for roots, so that the queue doesn't get too
4011       long. During remark, all completed buffers are processed, as
4012       well as the filled in parts of any uncompleted buffers.
4013 
4014     The do_marking_step() method tries to abort when the time target
4015     has been reached. There are a few other cases when the
4016     do_marking_step() method also aborts:
4017 
4018       (1) When the marking phase has been aborted (after a Full GC).
4019 
4020       (2) When a global overflow (on the global stack) has been
4021       triggered. Before the task aborts, it will actually sync up with
4022       the other tasks to ensure that all the marking data structures
4023       (local queues, stacks, fingers etc.)  are re-initialized so that
4024       when do_marking_step() completes, the marking phase can
4025       immediately restart.
4026 
4027       (3) When enough completed SATB buffers are available. The
4028       do_marking_step() method only tries to drain SATB buffers right
4029       at the beginning. So, if enough buffers are available, the
4030       marking step aborts and the SATB buffers are processed at
4031       the beginning of the next invocation.
4032 
4033       (4) To yield. when we have to yield then we abort and yield
4034       right at the end of do_marking_step(). This saves us from a lot
4035       of hassle as, by yielding we might allow a Full GC. If this
4036       happens then objects will be compacted underneath our feet, the
4037       heap might shrink, etc. We save checking for this by just
4038       aborting and doing the yield right at the end.
4039 
4040     From the above it follows that the do_marking_step() method should
4041     be called in a loop (or, otherwise, regularly) until it completes.
4042 
4043     If a marking step completes without its has_aborted() flag being
4044     true, it means it has completed the current marking phase (and
4045     also all other marking tasks have done so and have all synced up).
4046 
4047     A method called regular_clock_call() is invoked "regularly" (in
4048     sub ms intervals) throughout marking. It is this clock method that
4049     checks all the abort conditions which were mentioned above and
4050     decides when the task should abort. A work-based scheme is used to
4051     trigger this clock method: when the number of object words the
4052     marking phase has scanned or the number of references the marking
4053     phase has visited reach a given limit. Additional invocations to
4054     the method clock have been planted in a few other strategic places
4055     too. The initial reason for the clock method was to avoid calling
4056     vtime too regularly, as it is quite expensive. So, once it was in
4057     place, it was natural to piggy-back all the other conditions on it
4058     too and not constantly check them throughout the code.
4059 
4060     If do_termination is true then do_marking_step will enter its
4061     termination protocol.
4062 
4063     The value of is_serial must be true when do_marking_step is being
4064     called serially (i.e. by the VMThread) and do_marking_step should
4065     skip any synchronization in the termination and overflow code.
4066     Examples include the serial remark code and the serial reference
4067     processing closures.
4068 
4069     The value of is_serial must be false when do_marking_step is
4070     being called by any of the worker threads in a work gang.
4071     Examples include the concurrent marking code (CMMarkingTask),
4072     the MT remark code, and the MT reference processing closures.
4073 
4074  *****************************************************************************/
4075 
4076 void CMTask::do_marking_step(double time_target_ms,
4077                              bool do_termination,
4078                              bool is_serial) {
4079   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4080   assert(concurrent() == _cm->concurrent(), "they should be the same");
4081 
4082   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4083   assert(_task_queues != NULL, "invariant");
4084   assert(_task_queue != NULL, "invariant");
4085   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4086 
4087   assert(!_claimed,
4088          "only one thread should claim this task at any one time");
4089 
4090   // OK, this doesn't safeguard again all possible scenarios, as it is
4091   // possible for two threads to set the _claimed flag at the same
4092   // time. But it is only for debugging purposes anyway and it will
4093   // catch most problems.
4094   _claimed = true;
4095 
4096   _start_time_ms = os::elapsedVTime() * 1000.0;
4097   statsOnly( _interval_start_time_ms = _start_time_ms );
4098 
4099   // If do_stealing is true then do_marking_step will attempt to
4100   // steal work from the other CMTasks. It only makes sense to
4101   // enable stealing when the termination protocol is enabled
4102   // and do_marking_step() is not being called serially.
4103   bool do_stealing = do_termination && !is_serial;
4104 
4105   double diff_prediction_ms =
4106     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4107   _time_target_ms = time_target_ms - diff_prediction_ms;
4108 
4109   // set up the variables that are used in the work-based scheme to
4110   // call the regular clock method
4111   _words_scanned = 0;
4112   _refs_reached  = 0;
4113   recalculate_limits();
4114 
4115   // clear all flags
4116   clear_has_aborted();
4117   _has_timed_out = false;
4118   _draining_satb_buffers = false;
4119 
4120   ++_calls;
4121 
4122   if (_cm->verbose_low()) {
4123     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4124                            "target = %1.2lfms >>>>>>>>>>",
4125                            _worker_id, _calls, _time_target_ms);
4126   }
4127 
4128   // Set up the bitmap and oop closures. Anything that uses them is
4129   // eventually called from this method, so it is OK to allocate these
4130   // statically.
4131   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4132   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4133   set_cm_oop_closure(&cm_oop_closure);
4134 
4135   if (_cm->has_overflown()) {
4136     // This can happen if the mark stack overflows during a GC pause
4137     // and this task, after a yield point, restarts. We have to abort
4138     // as we need to get into the overflow protocol which happens
4139     // right at the end of this task.
4140     set_has_aborted();
4141   }
4142 
4143   // First drain any available SATB buffers. After this, we will not
4144   // look at SATB buffers before the next invocation of this method.
4145   // If enough completed SATB buffers are queued up, the regular clock
4146   // will abort this task so that it restarts.
4147   drain_satb_buffers();
4148   // ...then partially drain the local queue and the global stack
4149   drain_local_queue(true);
4150   drain_global_stack(true);
4151 
4152   do {
4153     if (!has_aborted() && _curr_region != NULL) {
4154       // This means that we're already holding on to a region.
4155       assert(_finger != NULL, "if region is not NULL, then the finger "
4156              "should not be NULL either");
4157 
4158       // We might have restarted this task after an evacuation pause
4159       // which might have evacuated the region we're holding on to
4160       // underneath our feet. Let's read its limit again to make sure
4161       // that we do not iterate over a region of the heap that
4162       // contains garbage (update_region_limit() will also move
4163       // _finger to the start of the region if it is found empty).
4164       update_region_limit();
4165       // We will start from _finger not from the start of the region,
4166       // as we might be restarting this task after aborting half-way
4167       // through scanning this region. In this case, _finger points to
4168       // the address where we last found a marked object. If this is a
4169       // fresh region, _finger points to start().
4170       MemRegion mr = MemRegion(_finger, _region_limit);
4171 
4172       if (_cm->verbose_low()) {
4173         gclog_or_tty->print_cr("[%u] we're scanning part "
4174                                "["PTR_FORMAT", "PTR_FORMAT") "
4175                                "of region "HR_FORMAT,
4176                                _worker_id, p2i(_finger), p2i(_region_limit),
4177                                HR_FORMAT_PARAMS(_curr_region));
4178       }
4179 
4180       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4181              "humongous regions should go around loop once only");
4182 
4183       // Some special cases:
4184       // If the memory region is empty, we can just give up the region.
4185       // If the current region is humongous then we only need to check
4186       // the bitmap for the bit associated with the start of the object,
4187       // scan the object if it's live, and give up the region.
4188       // Otherwise, let's iterate over the bitmap of the part of the region
4189       // that is left.
4190       // If the iteration is successful, give up the region.
4191       if (mr.is_empty()) {
4192         giveup_current_region();
4193         regular_clock_call();
4194       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4195         if (_nextMarkBitMap->isMarked(mr.start())) {
4196           // The object is marked - apply the closure
4197           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4198           bitmap_closure.do_bit(offset);
4199         }
4200         // Even if this task aborted while scanning the humongous object
4201         // we can (and should) give up the current region.
4202         giveup_current_region();
4203         regular_clock_call();
4204       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4205         giveup_current_region();
4206         regular_clock_call();
4207       } else {
4208         assert(has_aborted(), "currently the only way to do so");
4209         // The only way to abort the bitmap iteration is to return
4210         // false from the do_bit() method. However, inside the
4211         // do_bit() method we move the _finger to point to the
4212         // object currently being looked at. So, if we bail out, we
4213         // have definitely set _finger to something non-null.
4214         assert(_finger != NULL, "invariant");
4215 
4216         // Region iteration was actually aborted. So now _finger
4217         // points to the address of the object we last scanned. If we
4218         // leave it there, when we restart this task, we will rescan
4219         // the object. It is easy to avoid this. We move the finger by
4220         // enough to point to the next possible object header (the
4221         // bitmap knows by how much we need to move it as it knows its
4222         // granularity).
4223         assert(_finger < _region_limit, "invariant");
4224         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4225         // Check if bitmap iteration was aborted while scanning the last object
4226         if (new_finger >= _region_limit) {
4227           giveup_current_region();
4228         } else {
4229           move_finger_to(new_finger);
4230         }
4231       }
4232     }
4233     // At this point we have either completed iterating over the
4234     // region we were holding on to, or we have aborted.
4235 
4236     // We then partially drain the local queue and the global stack.
4237     // (Do we really need this?)
4238     drain_local_queue(true);
4239     drain_global_stack(true);
4240 
4241     // Read the note on the claim_region() method on why it might
4242     // return NULL with potentially more regions available for
4243     // claiming and why we have to check out_of_regions() to determine
4244     // whether we're done or not.
4245     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4246       // We are going to try to claim a new region. We should have
4247       // given up on the previous one.
4248       // Separated the asserts so that we know which one fires.
4249       assert(_curr_region  == NULL, "invariant");
4250       assert(_finger       == NULL, "invariant");
4251       assert(_region_limit == NULL, "invariant");
4252       if (_cm->verbose_low()) {
4253         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4254       }
4255       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4256       if (claimed_region != NULL) {
4257         // Yes, we managed to claim one
4258         statsOnly( ++_regions_claimed );
4259 
4260         if (_cm->verbose_low()) {
4261           gclog_or_tty->print_cr("[%u] we successfully claimed "
4262                                  "region "PTR_FORMAT,
4263                                  _worker_id, p2i(claimed_region));
4264         }
4265 
4266         setup_for_region(claimed_region);
4267         assert(_curr_region == claimed_region, "invariant");
4268       }
4269       // It is important to call the regular clock here. It might take
4270       // a while to claim a region if, for example, we hit a large
4271       // block of empty regions. So we need to call the regular clock
4272       // method once round the loop to make sure it's called
4273       // frequently enough.
4274       regular_clock_call();
4275     }
4276 
4277     if (!has_aborted() && _curr_region == NULL) {
4278       assert(_cm->out_of_regions(),
4279              "at this point we should be out of regions");
4280     }
4281   } while ( _curr_region != NULL && !has_aborted());
4282 
4283   if (!has_aborted()) {
4284     // We cannot check whether the global stack is empty, since other
4285     // tasks might be pushing objects to it concurrently.
4286     assert(_cm->out_of_regions(),
4287            "at this point we should be out of regions");
4288 
4289     if (_cm->verbose_low()) {
4290       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4291     }
4292 
4293     // Try to reduce the number of available SATB buffers so that
4294     // remark has less work to do.
4295     drain_satb_buffers();
4296   }
4297 
4298   // Since we've done everything else, we can now totally drain the
4299   // local queue and global stack.
4300   drain_local_queue(false);
4301   drain_global_stack(false);
4302 
4303   // Attempt at work stealing from other task's queues.
4304   if (do_stealing && !has_aborted()) {
4305     // We have not aborted. This means that we have finished all that
4306     // we could. Let's try to do some stealing...
4307 
4308     // We cannot check whether the global stack is empty, since other
4309     // tasks might be pushing objects to it concurrently.
4310     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4311            "only way to reach here");
4312 
4313     if (_cm->verbose_low()) {
4314       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4315     }
4316 
4317     while (!has_aborted()) {
4318       oop obj;
4319       statsOnly( ++_steal_attempts );
4320 
4321       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4322         if (_cm->verbose_medium()) {
4323           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4324                                  _worker_id, p2i((void*) obj));
4325         }
4326 
4327         statsOnly( ++_steals );
4328 
4329         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4330                "any stolen object should be marked");
4331         scan_object(obj);
4332 
4333         // And since we're towards the end, let's totally drain the
4334         // local queue and global stack.
4335         drain_local_queue(false);
4336         drain_global_stack(false);
4337       } else {
4338         break;
4339       }
4340     }
4341   }
4342 
4343   // If we are about to wrap up and go into termination, check if we
4344   // should raise the overflow flag.
4345   if (do_termination && !has_aborted()) {
4346     if (_cm->force_overflow()->should_force()) {
4347       _cm->set_has_overflown();
4348       regular_clock_call();
4349     }
4350   }
4351 
4352   // We still haven't aborted. Now, let's try to get into the
4353   // termination protocol.
4354   if (do_termination && !has_aborted()) {
4355     // We cannot check whether the global stack is empty, since other
4356     // tasks might be concurrently pushing objects on it.
4357     // Separated the asserts so that we know which one fires.
4358     assert(_cm->out_of_regions(), "only way to reach here");
4359     assert(_task_queue->size() == 0, "only way to reach here");
4360 
4361     if (_cm->verbose_low()) {
4362       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4363     }
4364 
4365     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4366 
4367     // The CMTask class also extends the TerminatorTerminator class,
4368     // hence its should_exit_termination() method will also decide
4369     // whether to exit the termination protocol or not.
4370     bool finished = (is_serial ||
4371                      _cm->terminator()->offer_termination(this));
4372     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4373     _termination_time_ms +=
4374       termination_end_time_ms - _termination_start_time_ms;
4375 
4376     if (finished) {
4377       // We're all done.
4378 
4379       if (_worker_id == 0) {
4380         // let's allow task 0 to do this
4381         if (concurrent()) {
4382           assert(_cm->concurrent_marking_in_progress(), "invariant");
4383           // we need to set this to false before the next
4384           // safepoint. This way we ensure that the marking phase
4385           // doesn't observe any more heap expansions.
4386           _cm->clear_concurrent_marking_in_progress();
4387         }
4388       }
4389 
4390       // We can now guarantee that the global stack is empty, since
4391       // all other tasks have finished. We separated the guarantees so
4392       // that, if a condition is false, we can immediately find out
4393       // which one.
4394       guarantee(_cm->out_of_regions(), "only way to reach here");
4395       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4396       guarantee(_task_queue->size() == 0, "only way to reach here");
4397       guarantee(!_cm->has_overflown(), "only way to reach here");
4398       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4399 
4400       if (_cm->verbose_low()) {
4401         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4402       }
4403     } else {
4404       // Apparently there's more work to do. Let's abort this task. It
4405       // will restart it and we can hopefully find more things to do.
4406 
4407       if (_cm->verbose_low()) {
4408         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4409                                _worker_id);
4410       }
4411 
4412       set_has_aborted();
4413       statsOnly( ++_aborted_termination );
4414     }
4415   }
4416 
4417   // Mainly for debugging purposes to make sure that a pointer to the
4418   // closure which was statically allocated in this frame doesn't
4419   // escape it by accident.
4420   set_cm_oop_closure(NULL);
4421   double end_time_ms = os::elapsedVTime() * 1000.0;
4422   double elapsed_time_ms = end_time_ms - _start_time_ms;
4423   // Update the step history.
4424   _step_times_ms.add(elapsed_time_ms);
4425 
4426   if (has_aborted()) {
4427     // The task was aborted for some reason.
4428 
4429     statsOnly( ++_aborted );
4430 
4431     if (_has_timed_out) {
4432       double diff_ms = elapsed_time_ms - _time_target_ms;
4433       // Keep statistics of how well we did with respect to hitting
4434       // our target only if we actually timed out (if we aborted for
4435       // other reasons, then the results might get skewed).
4436       _marking_step_diffs_ms.add(diff_ms);
4437     }
4438 
4439     if (_cm->has_overflown()) {
4440       // This is the interesting one. We aborted because a global
4441       // overflow was raised. This means we have to restart the
4442       // marking phase and start iterating over regions. However, in
4443       // order to do this we have to make sure that all tasks stop
4444       // what they are doing and re-initialize in a safe manner. We
4445       // will achieve this with the use of two barrier sync points.
4446 
4447       if (_cm->verbose_low()) {
4448         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4449       }
4450 
4451       if (!is_serial) {
4452         // We only need to enter the sync barrier if being called
4453         // from a parallel context
4454         _cm->enter_first_sync_barrier(_worker_id);
4455 
4456         // When we exit this sync barrier we know that all tasks have
4457         // stopped doing marking work. So, it's now safe to
4458         // re-initialize our data structures. At the end of this method,
4459         // task 0 will clear the global data structures.
4460       }
4461 
4462       statsOnly( ++_aborted_overflow );
4463 
4464       // We clear the local state of this task...
4465       clear_region_fields();
4466 
4467       if (!is_serial) {
4468         // ...and enter the second barrier.
4469         _cm->enter_second_sync_barrier(_worker_id);
4470       }
4471       // At this point, if we're during the concurrent phase of
4472       // marking, everything has been re-initialized and we're
4473       // ready to restart.
4474     }
4475 
4476     if (_cm->verbose_low()) {
4477       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4478                              "elapsed = %1.2lfms <<<<<<<<<<",
4479                              _worker_id, _time_target_ms, elapsed_time_ms);
4480       if (_cm->has_aborted()) {
4481         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4482                                _worker_id);
4483       }
4484     }
4485   } else {
4486     if (_cm->verbose_low()) {
4487       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4488                              "elapsed = %1.2lfms <<<<<<<<<<",
4489                              _worker_id, _time_target_ms, elapsed_time_ms);
4490     }
4491   }
4492 
4493   _claimed = false;
4494 }
4495 
4496 CMTask::CMTask(uint worker_id,
4497                ConcurrentMark* cm,
4498                size_t* marked_bytes,
4499                BitMap* card_bm,
4500                CMTaskQueue* task_queue,
4501                CMTaskQueueSet* task_queues)
4502   : _g1h(G1CollectedHeap::heap()),
4503     _worker_id(worker_id), _cm(cm),
4504     _claimed(false),
4505     _nextMarkBitMap(NULL), _hash_seed(17),
4506     _task_queue(task_queue),
4507     _task_queues(task_queues),
4508     _cm_oop_closure(NULL),
4509     _marked_bytes_array(marked_bytes),
4510     _card_bm(card_bm) {
4511   guarantee(task_queue != NULL, "invariant");
4512   guarantee(task_queues != NULL, "invariant");
4513 
4514   statsOnly( _clock_due_to_scanning = 0;
4515              _clock_due_to_marking  = 0 );
4516 
4517   _marking_step_diffs_ms.add(0.5);
4518 }
4519 
4520 // These are formatting macros that are used below to ensure
4521 // consistent formatting. The *_H_* versions are used to format the
4522 // header for a particular value and they should be kept consistent
4523 // with the corresponding macro. Also note that most of the macros add
4524 // the necessary white space (as a prefix) which makes them a bit
4525 // easier to compose.
4526 
4527 // All the output lines are prefixed with this string to be able to
4528 // identify them easily in a large log file.
4529 #define G1PPRL_LINE_PREFIX            "###"
4530 
4531 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4532 #ifdef _LP64
4533 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4534 #else // _LP64
4535 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4536 #endif // _LP64
4537 
4538 // For per-region info
4539 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4540 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4541 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4542 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4543 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4544 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4545 
4546 // For summary info
4547 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4548 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4549 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4550 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4551 
4552 G1PrintRegionLivenessInfoClosure::
4553 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4554   : _out(out),
4555     _total_used_bytes(0), _total_capacity_bytes(0),
4556     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4557     _hum_used_bytes(0), _hum_capacity_bytes(0),
4558     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4559     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4560   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4561   MemRegion g1_committed = g1h->g1_committed();
4562   MemRegion g1_reserved = g1h->g1_reserved();
4563   double now = os::elapsedTime();
4564 
4565   // Print the header of the output.
4566   _out->cr();
4567   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4568   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4569                  G1PPRL_SUM_ADDR_FORMAT("committed")
4570                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4571                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4572                  p2i(g1_committed.start()), p2i(g1_committed.end()),
4573                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4574                  HeapRegion::GrainBytes);
4575   _out->print_cr(G1PPRL_LINE_PREFIX);
4576   _out->print_cr(G1PPRL_LINE_PREFIX
4577                 G1PPRL_TYPE_H_FORMAT
4578                 G1PPRL_ADDR_BASE_H_FORMAT
4579                 G1PPRL_BYTE_H_FORMAT
4580                 G1PPRL_BYTE_H_FORMAT
4581                 G1PPRL_BYTE_H_FORMAT
4582                 G1PPRL_DOUBLE_H_FORMAT
4583                 G1PPRL_BYTE_H_FORMAT
4584                 G1PPRL_BYTE_H_FORMAT,
4585                 "type", "address-range",
4586                 "used", "prev-live", "next-live", "gc-eff",
4587                 "remset", "code-roots");
4588   _out->print_cr(G1PPRL_LINE_PREFIX
4589                 G1PPRL_TYPE_H_FORMAT
4590                 G1PPRL_ADDR_BASE_H_FORMAT
4591                 G1PPRL_BYTE_H_FORMAT
4592                 G1PPRL_BYTE_H_FORMAT
4593                 G1PPRL_BYTE_H_FORMAT
4594                 G1PPRL_DOUBLE_H_FORMAT
4595                 G1PPRL_BYTE_H_FORMAT
4596                 G1PPRL_BYTE_H_FORMAT,
4597                 "", "",
4598                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4599                 "(bytes)", "(bytes)");
4600 }
4601 
4602 // It takes as a parameter a reference to one of the _hum_* fields, it
4603 // deduces the corresponding value for a region in a humongous region
4604 // series (either the region size, or what's left if the _hum_* field
4605 // is < the region size), and updates the _hum_* field accordingly.
4606 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4607   size_t bytes = 0;
4608   // The > 0 check is to deal with the prev and next live bytes which
4609   // could be 0.
4610   if (*hum_bytes > 0) {
4611     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4612     *hum_bytes -= bytes;
4613   }
4614   return bytes;
4615 }
4616 
4617 // It deduces the values for a region in a humongous region series
4618 // from the _hum_* fields and updates those accordingly. It assumes
4619 // that that _hum_* fields have already been set up from the "starts
4620 // humongous" region and we visit the regions in address order.
4621 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4622                                                      size_t* capacity_bytes,
4623                                                      size_t* prev_live_bytes,
4624                                                      size_t* next_live_bytes) {
4625   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4626   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4627   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4628   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4629   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4630 }
4631 
4632 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4633   const char* type = "";
4634   HeapWord* bottom       = r->bottom();
4635   HeapWord* end          = r->end();
4636   size_t capacity_bytes  = r->capacity();
4637   size_t used_bytes      = r->used();
4638   size_t prev_live_bytes = r->live_bytes();
4639   size_t next_live_bytes = r->next_live_bytes();
4640   double gc_eff          = r->gc_efficiency();
4641   size_t remset_bytes    = r->rem_set()->mem_size();
4642   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4643 
4644   if (r->used() == 0) {
4645     type = "FREE";
4646   } else if (r->is_survivor()) {
4647     type = "SURV";
4648   } else if (r->is_young()) {
4649     type = "EDEN";
4650   } else if (r->startsHumongous()) {
4651     type = "HUMS";
4652 
4653     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4654            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4655            "they should have been zeroed after the last time we used them");
4656     // Set up the _hum_* fields.
4657     _hum_capacity_bytes  = capacity_bytes;
4658     _hum_used_bytes      = used_bytes;
4659     _hum_prev_live_bytes = prev_live_bytes;
4660     _hum_next_live_bytes = next_live_bytes;
4661     get_hum_bytes(&used_bytes, &capacity_bytes,
4662                   &prev_live_bytes, &next_live_bytes);
4663     end = bottom + HeapRegion::GrainWords;
4664   } else if (r->continuesHumongous()) {
4665     type = "HUMC";
4666     get_hum_bytes(&used_bytes, &capacity_bytes,
4667                   &prev_live_bytes, &next_live_bytes);
4668     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4669   } else {
4670     type = "OLD";
4671   }
4672 
4673   _total_used_bytes      += used_bytes;
4674   _total_capacity_bytes  += capacity_bytes;
4675   _total_prev_live_bytes += prev_live_bytes;
4676   _total_next_live_bytes += next_live_bytes;
4677   _total_remset_bytes    += remset_bytes;
4678   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4679 
4680   // Print a line for this particular region.
4681   _out->print_cr(G1PPRL_LINE_PREFIX
4682                  G1PPRL_TYPE_FORMAT
4683                  G1PPRL_ADDR_BASE_FORMAT
4684                  G1PPRL_BYTE_FORMAT
4685                  G1PPRL_BYTE_FORMAT
4686                  G1PPRL_BYTE_FORMAT
4687                  G1PPRL_DOUBLE_FORMAT
4688                  G1PPRL_BYTE_FORMAT
4689                  G1PPRL_BYTE_FORMAT,
4690                  type, p2i(bottom), p2i(end),
4691                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4692                  remset_bytes, strong_code_roots_bytes);
4693 
4694   return false;
4695 }
4696 
4697 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4698   // add static memory usages to remembered set sizes
4699   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4700   // Print the footer of the output.
4701   _out->print_cr(G1PPRL_LINE_PREFIX);
4702   _out->print_cr(G1PPRL_LINE_PREFIX
4703                  " SUMMARY"
4704                  G1PPRL_SUM_MB_FORMAT("capacity")
4705                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4706                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4707                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4708                  G1PPRL_SUM_MB_FORMAT("remset")
4709                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4710                  bytes_to_mb(_total_capacity_bytes),
4711                  bytes_to_mb(_total_used_bytes),
4712                  perc(_total_used_bytes, _total_capacity_bytes),
4713                  bytes_to_mb(_total_prev_live_bytes),
4714                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4715                  bytes_to_mb(_total_next_live_bytes),
4716                  perc(_total_next_live_bytes, _total_capacity_bytes),
4717                  bytes_to_mb(_total_remset_bytes),
4718                  bytes_to_mb(_total_strong_code_roots_bytes));
4719   _out->cr();
4720 }