1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1RemSet.hpp"
  32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  34 #include "gc_implementation/shared/vmGCOperations.hpp"
  35 #include "memory/genOopClosures.inline.hpp"
  36 #include "memory/referencePolicy.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "oops/oop.inline.hpp"
  39 #include "runtime/handles.inline.hpp"
  40 #include "runtime/java.hpp"
  41 
  42 //
  43 // CMS Bit Map Wrapper
  44 
  45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  46   _bm((uintptr_t*)NULL,0),
  47   _shifter(shifter) {
  48   _bmStartWord = (HeapWord*)(rs.base());
  49   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  50   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  51                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  52 
  53   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  54   // For now we'll just commit all of the bit map up fromt.
  55   // Later on we'll try to be more parsimonious with swap.
  56   guarantee(_virtual_space.initialize(brs, brs.size()),
  57             "couldn't reseve backing store for CMS bit map");
  58   assert(_virtual_space.committed_size() == brs.size(),
  59          "didn't reserve backing store for all of CMS bit map?");
  60   _bm.set_map((uintptr_t*)_virtual_space.low());
  61   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  62          _bmWordSize, "inconsistency in bit map sizing");
  63   _bm.set_size(_bmWordSize >> _shifter);
  64 }
  65 
  66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  67                                                HeapWord* limit) const {
  68   // First we must round addr *up* to a possible object boundary.
  69   addr = (HeapWord*)align_size_up((intptr_t)addr,
  70                                   HeapWordSize << _shifter);
  71   size_t addrOffset = heapWordToOffset(addr);
  72   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  83                                                  HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  86   size_t limitOffset = heapWordToOffset(limit);
  87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  89   assert(nextAddr >= addr, "get_next_one postcondition");
  90   assert(nextAddr == limit || !isMarked(nextAddr),
  91          "get_next_one postcondition");
  92   return nextAddr;
  93 }
  94 
  95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  97   return (int) (diff >> _shifter);
  98 }
  99 
 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 101   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 102   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 103   if (right > left) {
 104     // Right-open interval [leftOffset, rightOffset).
 105     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 106   } else {
 107     return true;
 108   }
 109 }
 110 
 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 112                                              size_t    from_start_index,
 113                                              HeapWord* to_start_word,
 114                                              size_t    word_num) {
 115   _bm.mostly_disjoint_range_union(from_bitmap,
 116                                   from_start_index,
 117                                   heapWordToOffset(to_start_word),
 118                                   word_num);
 119 }
 120 
 121 #ifndef PRODUCT
 122 bool CMBitMapRO::covers(ReservedSpace rs) const {
 123   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 124   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 125          "size inconsistency");
 126   return _bmStartWord == (HeapWord*)(rs.base()) &&
 127          _bmWordSize  == rs.size()>>LogHeapWordSize;
 128 }
 129 #endif
 130 
 131 void CMBitMap::clearAll() {
 132   _bm.clear();
 133   return;
 134 }
 135 
 136 void CMBitMap::markRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 140           ((HeapWord *) mr.end())),
 141          "markRange memory region end is not card aligned");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), true);
 145 }
 146 
 147 void CMBitMap::clearRange(MemRegion mr) {
 148   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 149   assert(!mr.is_empty(), "unexpected empty region");
 150   // convert address range into offset range
 151   _bm.at_put_range(heapWordToOffset(mr.start()),
 152                    heapWordToOffset(mr.end()), false);
 153 }
 154 
 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 156                                             HeapWord* end_addr) {
 157   HeapWord* start = getNextMarkedWordAddress(addr);
 158   start = MIN2(start, end_addr);
 159   HeapWord* end   = getNextUnmarkedWordAddress(start);
 160   end = MIN2(end, end_addr);
 161   assert(start <= end, "Consistency check");
 162   MemRegion mr(start, end);
 163   if (!mr.is_empty()) {
 164     clearRange(mr);
 165   }
 166   return mr;
 167 }
 168 
 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 170   _base(NULL), _cm(cm)
 171 #ifdef ASSERT
 172   , _drain_in_progress(false)
 173   , _drain_in_progress_yields(false)
 174 #endif
 175 {}
 176 
 177 void CMMarkStack::allocate(size_t size) {
 178   _base = NEW_C_HEAP_ARRAY(oop, size);
 179   if (_base == NULL)
 180     vm_exit_during_initialization("Failed to allocate "
 181                                   "CM region mark stack");
 182   _index = 0;
 183   // QQQQ cast ...
 184   _capacity = (jint) size;
 185   _oops_do_bound = -1;
 186   NOT_PRODUCT(_max_depth = 0);
 187 }
 188 
 189 CMMarkStack::~CMMarkStack() {
 190   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 191 }
 192 
 193 void CMMarkStack::par_push(oop ptr) {
 194   while (true) {
 195     if (isFull()) {
 196       _overflow = true;
 197       return;
 198     }
 199     // Otherwise...
 200     jint index = _index;
 201     jint next_index = index+1;
 202     jint res = Atomic::cmpxchg(next_index, &_index, index);
 203     if (res == index) {
 204       _base[index] = ptr;
 205       // Note that we don't maintain this atomically.  We could, but it
 206       // doesn't seem necessary.
 207       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 208       return;
 209     }
 210     // Otherwise, we need to try again.
 211   }
 212 }
 213 
 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 215   while (true) {
 216     if (isFull()) {
 217       _overflow = true;
 218       return;
 219     }
 220     // Otherwise...
 221     jint index = _index;
 222     jint next_index = index + n;
 223     if (next_index > _capacity) {
 224       _overflow = true;
 225       return;
 226     }
 227     jint res = Atomic::cmpxchg(next_index, &_index, index);
 228     if (res == index) {
 229       for (int i = 0; i < n; i++) {
 230         int ind = index + i;
 231         assert(ind < _capacity, "By overflow test above.");
 232         _base[ind] = ptr_arr[i];
 233       }
 234       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 235       return;
 236     }
 237     // Otherwise, we need to try again.
 238   }
 239 }
 240 
 241 
 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 244   jint start = _index;
 245   jint next_index = start + n;
 246   if (next_index > _capacity) {
 247     _overflow = true;
 248     return;
 249   }
 250   // Otherwise.
 251   _index = next_index;
 252   for (int i = 0; i < n; i++) {
 253     int ind = start + i;
 254     assert(ind < _capacity, "By overflow test above.");
 255     _base[ind] = ptr_arr[i];
 256   }
 257 }
 258 
 259 
 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 261   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 262   jint index = _index;
 263   if (index == 0) {
 264     *n = 0;
 265     return false;
 266   } else {
 267     int k = MIN2(max, index);
 268     jint new_ind = index - k;
 269     for (int j = 0; j < k; j++) {
 270       ptr_arr[j] = _base[new_ind + j];
 271     }
 272     _index = new_ind;
 273     *n = k;
 274     return true;
 275   }
 276 }
 277 
 278 
 279 CMRegionStack::CMRegionStack() : _base(NULL) {}
 280 
 281 void CMRegionStack::allocate(size_t size) {
 282   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 283   if (_base == NULL)
 284     vm_exit_during_initialization("Failed to allocate "
 285                                   "CM region mark stack");
 286   _index = 0;
 287   // QQQQ cast ...
 288   _capacity = (jint) size;
 289 }
 290 
 291 CMRegionStack::~CMRegionStack() {
 292   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 293 }
 294 
 295 void CMRegionStack::push_lock_free(MemRegion mr) {
 296   assert(mr.word_size() > 0, "Precondition");
 297   while (true) {
 298     jint index = _index;
 299 
 300     if (index >= _capacity) {
 301       _overflow = true;
 302       return;
 303     }
 304     // Otherwise...
 305     jint next_index = index+1;
 306     jint res = Atomic::cmpxchg(next_index, &_index, index);
 307     if (res == index) {
 308       _base[index] = mr;
 309       return;
 310     }
 311     // Otherwise, we need to try again.
 312   }
 313 }
 314 
 315 // Lock-free pop of the region stack. Called during the concurrent
 316 // marking / remark phases. Should only be called in tandem with
 317 // other lock-free pops.
 318 MemRegion CMRegionStack::pop_lock_free() {
 319   while (true) {
 320     jint index = _index;
 321 
 322     if (index == 0) {
 323       return MemRegion();
 324     }
 325     // Otherwise...
 326     jint next_index = index-1;
 327     jint res = Atomic::cmpxchg(next_index, &_index, index);
 328     if (res == index) {
 329       MemRegion mr = _base[next_index];
 330       if (mr.start() != NULL) {
 331         assert(mr.end() != NULL, "invariant");
 332         assert(mr.word_size() > 0, "invariant");
 333         return mr;
 334       } else {
 335         // that entry was invalidated... let's skip it
 336         assert(mr.end() == NULL, "invariant");
 337       }
 338     }
 339     // Otherwise, we need to try again.
 340   }
 341 }
 342 
 343 #if 0
 344 // The routines that manipulate the region stack with a lock are
 345 // not currently used. They should be retained, however, as a
 346 // diagnostic aid.
 347 
 348 void CMRegionStack::push_with_lock(MemRegion mr) {
 349   assert(mr.word_size() > 0, "Precondition");
 350   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 351 
 352   if (isFull()) {
 353     _overflow = true;
 354     return;
 355   }
 356 
 357   _base[_index] = mr;
 358   _index += 1;
 359 }
 360 
 361 MemRegion CMRegionStack::pop_with_lock() {
 362   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 363 
 364   while (true) {
 365     if (_index == 0) {
 366       return MemRegion();
 367     }
 368     _index -= 1;
 369 
 370     MemRegion mr = _base[_index];
 371     if (mr.start() != NULL) {
 372       assert(mr.end() != NULL, "invariant");
 373       assert(mr.word_size() > 0, "invariant");
 374       return mr;
 375     } else {
 376       // that entry was invalidated... let's skip it
 377       assert(mr.end() == NULL, "invariant");
 378     }
 379   }
 380 }
 381 #endif
 382 
 383 bool CMRegionStack::invalidate_entries_into_cset() {
 384   bool result = false;
 385   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 386   for (int i = 0; i < _oops_do_bound; ++i) {
 387     MemRegion mr = _base[i];
 388     if (mr.start() != NULL) {
 389       assert(mr.end() != NULL, "invariant");
 390       assert(mr.word_size() > 0, "invariant");
 391       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 392       assert(hr != NULL, "invariant");
 393       if (hr->in_collection_set()) {
 394         // The region points into the collection set
 395         _base[i] = MemRegion();
 396         result = true;
 397       }
 398     } else {
 399       // that entry was invalidated... let's skip it
 400       assert(mr.end() == NULL, "invariant");
 401     }
 402   }
 403   return result;
 404 }
 405 
 406 template<class OopClosureClass>
 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 408   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 409          || SafepointSynchronize::is_at_safepoint(),
 410          "Drain recursion must be yield-safe.");
 411   bool res = true;
 412   debug_only(_drain_in_progress = true);
 413   debug_only(_drain_in_progress_yields = yield_after);
 414   while (!isEmpty()) {
 415     oop newOop = pop();
 416     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 417     assert(newOop->is_oop(), "Expected an oop");
 418     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 419            "only grey objects on this stack");
 420     // iterate over the oops in this oop, marking and pushing
 421     // the ones in CMS generation.
 422     newOop->oop_iterate(cl);
 423     if (yield_after && _cm->do_yield_check()) {
 424       res = false; break;
 425     }
 426   }
 427   debug_only(_drain_in_progress = false);
 428   return res;
 429 }
 430 
 431 void CMMarkStack::oops_do(OopClosure* f) {
 432   if (_index == 0) return;
 433   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 434          "Bound must be set.");
 435   for (int i = 0; i < _oops_do_bound; i++) {
 436     f->do_oop(&_base[i]);
 437   }
 438   _oops_do_bound = -1;
 439 }
 440 
 441 bool ConcurrentMark::not_yet_marked(oop obj) const {
 442   return (_g1h->is_obj_ill(obj)
 443           || (_g1h->is_in_permanent(obj)
 444               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 445 }
 446 
 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 449 #endif // _MSC_VER
 450 
 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 452                                int max_regions) :
 453   _markBitMap1(rs, MinObjAlignment - 1),
 454   _markBitMap2(rs, MinObjAlignment - 1),
 455 
 456   _parallel_marking_threads(0),
 457   _sleep_factor(0.0),
 458   _marking_task_overhead(1.0),
 459   _cleanup_sleep_factor(0.0),
 460   _cleanup_task_overhead(1.0),
 461   _cleanup_list("Cleanup List"),
 462   _region_bm(max_regions, false /* in_resource_area*/),
 463   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 464            CardTableModRefBS::card_shift,
 465            false /* in_resource_area*/),
 466   _prevMarkBitMap(&_markBitMap1),
 467   _nextMarkBitMap(&_markBitMap2),
 468   _at_least_one_mark_complete(false),
 469 
 470   _markStack(this),
 471   _regionStack(),
 472   // _finger set in set_non_marking_state
 473 
 474   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 475   // _active_tasks set in set_non_marking_state
 476   // _tasks set inside the constructor
 477   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 478   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 479 
 480   _has_overflown(false),
 481   _concurrent(false),
 482   _has_aborted(false),
 483   _restart_for_overflow(false),
 484   _concurrent_marking_in_progress(false),
 485   _should_gray_objects(false),
 486 
 487   // _verbose_level set below
 488 
 489   _init_times(),
 490   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 491   _cleanup_times(),
 492   _total_counting_time(0.0),
 493   _total_rs_scrub_time(0.0),
 494 
 495   _parallel_workers(NULL)
 496 {
 497   CMVerboseLevel verbose_level =
 498     (CMVerboseLevel) G1MarkingVerboseLevel;
 499   if (verbose_level < no_verbose)
 500     verbose_level = no_verbose;
 501   if (verbose_level > high_verbose)
 502     verbose_level = high_verbose;
 503   _verbose_level = verbose_level;
 504 
 505   if (verbose_low())
 506     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 507                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 508 
 509   _markStack.allocate(MarkStackSize);
 510   _regionStack.allocate(G1MarkRegionStackSize);
 511 
 512   // Create & start a ConcurrentMark thread.
 513   _cmThread = new ConcurrentMarkThread(this);
 514   assert(cmThread() != NULL, "CM Thread should have been created");
 515   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 516 
 517   _g1h = G1CollectedHeap::heap();
 518   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 519   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 520   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 521 
 522   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 523   satb_qs.set_buffer_size(G1SATBBufferSize);
 524 
 525   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 526   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 527 
 528   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 529   _active_tasks = _max_task_num;
 530   for (int i = 0; i < (int) _max_task_num; ++i) {
 531     CMTaskQueue* task_queue = new CMTaskQueue();
 532     task_queue->initialize();
 533     _task_queues->register_queue(i, task_queue);
 534 
 535     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 536     _accum_task_vtime[i] = 0.0;
 537   }
 538 
 539   if (ConcGCThreads > ParallelGCThreads) {
 540     vm_exit_during_initialization("Can't have more ConcGCThreads "
 541                                   "than ParallelGCThreads.");
 542   }
 543   if (ParallelGCThreads == 0) {
 544     // if we are not running with any parallel GC threads we will not
 545     // spawn any marking threads either
 546     _parallel_marking_threads =   0;
 547     _sleep_factor             = 0.0;
 548     _marking_task_overhead    = 1.0;
 549   } else {
 550     if (ConcGCThreads > 0) {
 551       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 552       // if both are set
 553 
 554       _parallel_marking_threads = ConcGCThreads;
 555       _sleep_factor             = 0.0;
 556       _marking_task_overhead    = 1.0;
 557     } else if (G1MarkingOverheadPercent > 0) {
 558       // we will calculate the number of parallel marking threads
 559       // based on a target overhead with respect to the soft real-time
 560       // goal
 561 
 562       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 563       double overall_cm_overhead =
 564         (double) MaxGCPauseMillis * marking_overhead /
 565         (double) GCPauseIntervalMillis;
 566       double cpu_ratio = 1.0 / (double) os::processor_count();
 567       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 568       double marking_task_overhead =
 569         overall_cm_overhead / marking_thread_num *
 570                                                 (double) os::processor_count();
 571       double sleep_factor =
 572                          (1.0 - marking_task_overhead) / marking_task_overhead;
 573 
 574       _parallel_marking_threads = (size_t) marking_thread_num;
 575       _sleep_factor             = sleep_factor;
 576       _marking_task_overhead    = marking_task_overhead;
 577     } else {
 578       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;
 581     }
 582 
 583     if (parallel_marking_threads() > 1)
 584       _cleanup_task_overhead = 1.0;
 585     else
 586       _cleanup_task_overhead = marking_task_overhead();
 587     _cleanup_sleep_factor =
 588                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 589 
 590 #if 0
 591     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 592     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 593     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 594     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 595     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 596 #endif
 597 
 598     guarantee(parallel_marking_threads() > 0, "peace of mind");
 599     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 600          (int) _parallel_marking_threads, false, true);
 601     if (_parallel_workers == NULL) {
 602       vm_exit_during_initialization("Failed necessary allocation.");
 603     } else {
 604       _parallel_workers->initialize_workers();
 605     }
 606   }
 607 
 608   // so that the call below can read a sensible value
 609   _heap_start = (HeapWord*) rs.base();
 610   set_non_marking_state();
 611 }
 612 
 613 void ConcurrentMark::update_g1_committed(bool force) {
 614   // If concurrent marking is not in progress, then we do not need to
 615   // update _heap_end. This has a subtle and important
 616   // side-effect. Imagine that two evacuation pauses happen between
 617   // marking completion and remark. The first one can grow the
 618   // heap (hence now the finger is below the heap end). Then, the
 619   // second one could unnecessarily push regions on the region
 620   // stack. This causes the invariant that the region stack is empty
 621   // at the beginning of remark to be false. By ensuring that we do
 622   // not observe heap expansions after marking is complete, then we do
 623   // not have this problem.
 624   if (!concurrent_marking_in_progress() && !force)
 625     return;
 626 
 627   MemRegion committed = _g1h->g1_committed();
 628   assert(committed.start() == _heap_start, "start shouldn't change");
 629   HeapWord* new_end = committed.end();
 630   if (new_end > _heap_end) {
 631     // The heap has been expanded.
 632 
 633     _heap_end = new_end;
 634   }
 635   // Notice that the heap can also shrink. However, this only happens
 636   // during a Full GC (at least currently) and the entire marking
 637   // phase will bail out and the task will not be restarted. So, let's
 638   // do nothing.
 639 }
 640 
 641 void ConcurrentMark::reset() {
 642   // Starting values for these two. This should be called in a STW
 643   // phase. CM will be notified of any future g1_committed expansions
 644   // will be at the end of evacuation pauses, when tasks are
 645   // inactive.
 646   MemRegion committed = _g1h->g1_committed();
 647   _heap_start = committed.start();
 648   _heap_end   = committed.end();
 649 
 650   // Separated the asserts so that we know which one fires.
 651   assert(_heap_start != NULL, "heap bounds should look ok");
 652   assert(_heap_end != NULL, "heap bounds should look ok");
 653   assert(_heap_start < _heap_end, "heap bounds should look ok");
 654 
 655   // reset all the marking data structures and any necessary flags
 656   clear_marking_state();
 657 
 658   if (verbose_low())
 659     gclog_or_tty->print_cr("[global] resetting");
 660 
 661   // We do reset all of them, since different phases will use
 662   // different number of active threads. So, it's easiest to have all
 663   // of them ready.
 664   for (int i = 0; i < (int) _max_task_num; ++i) {
 665     _tasks[i]->reset(_nextMarkBitMap);
 666   }
 667 
 668   // we need this to make sure that the flag is on during the evac
 669   // pause with initial mark piggy-backed
 670   set_concurrent_marking_in_progress();
 671 }
 672 
 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 674   assert(active_tasks <= _max_task_num, "we should not have more");
 675 
 676   _active_tasks = active_tasks;
 677   // Need to update the three data structures below according to the
 678   // number of active threads for this phase.
 679   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 680   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 681   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 682 
 683   _concurrent = concurrent;
 684   // We propagate this to all tasks, not just the active ones.
 685   for (int i = 0; i < (int) _max_task_num; ++i)
 686     _tasks[i]->set_concurrent(concurrent);
 687 
 688   if (concurrent) {
 689     set_concurrent_marking_in_progress();
 690   } else {
 691     // We currently assume that the concurrent flag has been set to
 692     // false before we start remark. At this point we should also be
 693     // in a STW phase.
 694     assert(!concurrent_marking_in_progress(), "invariant");
 695     assert(_finger == _heap_end, "only way to get here");
 696     update_g1_committed(true);
 697   }
 698 }
 699 
 700 void ConcurrentMark::set_non_marking_state() {
 701   // We set the global marking state to some default values when we're
 702   // not doing marking.
 703   clear_marking_state();
 704   _active_tasks = 0;
 705   clear_concurrent_marking_in_progress();
 706 }
 707 
 708 ConcurrentMark::~ConcurrentMark() {
 709   for (int i = 0; i < (int) _max_task_num; ++i) {
 710     delete _task_queues->queue(i);
 711     delete _tasks[i];
 712   }
 713   delete _task_queues;
 714   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 715 }
 716 
 717 // This closure is used to mark refs into the g1 generation
 718 // from external roots in the CMS bit map.
 719 // Called at the first checkpoint.
 720 //
 721 
 722 void ConcurrentMark::clearNextBitmap() {
 723   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 724   G1CollectorPolicy* g1p = g1h->g1_policy();
 725 
 726   // Make sure that the concurrent mark thread looks to still be in
 727   // the current cycle.
 728   guarantee(cmThread()->during_cycle(), "invariant");
 729 
 730   // We are finishing up the current cycle by clearing the next
 731   // marking bitmap and getting it ready for the next cycle. During
 732   // this time no other cycle can start. So, let's make sure that this
 733   // is the case.
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 
 736   // clear the mark bitmap (no grey objects to start with).
 737   // We need to do this in chunks and offer to yield in between
 738   // each chunk.
 739   HeapWord* start  = _nextMarkBitMap->startWord();
 740   HeapWord* end    = _nextMarkBitMap->endWord();
 741   HeapWord* cur    = start;
 742   size_t chunkSize = M;
 743   while (cur < end) {
 744     HeapWord* next = cur + chunkSize;
 745     if (next > end)
 746       next = end;
 747     MemRegion mr(cur,next);
 748     _nextMarkBitMap->clearRange(mr);
 749     cur = next;
 750     do_yield_check();
 751 
 752     // Repeat the asserts from above. We'll do them as asserts here to
 753     // minimize their overhead on the product. However, we'll have
 754     // them as guarantees at the beginning / end of the bitmap
 755     // clearing to get some checking in the product.
 756     assert(cmThread()->during_cycle(), "invariant");
 757     assert(!g1h->mark_in_progress(), "invariant");
 758   }
 759 
 760   // Repeat the asserts from above.
 761   guarantee(cmThread()->during_cycle(), "invariant");
 762   guarantee(!g1h->mark_in_progress(), "invariant");
 763 }
 764 
 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 766 public:
 767   bool doHeapRegion(HeapRegion* r) {
 768     if (!r->continuesHumongous()) {
 769       r->note_start_of_marking(true);
 770     }
 771     return false;
 772   }
 773 };
 774 
 775 void ConcurrentMark::checkpointRootsInitialPre() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777   G1CollectorPolicy* g1p = g1h->g1_policy();
 778 
 779   _has_aborted = false;
 780 
 781 #ifndef PRODUCT
 782   if (G1PrintReachableAtInitialMark) {
 783     print_reachable("at-cycle-start",
 784                     true /* use_prev_marking */, true /* all */);
 785   }
 786 #endif
 787 
 788   // Initialise marking structures. This has to be done in a STW phase.
 789   reset();
 790 }
 791 
 792 class CMMarkRootsClosure: public OopsInGenClosure {
 793 private:
 794   ConcurrentMark*  _cm;
 795   G1CollectedHeap* _g1h;
 796   bool             _do_barrier;
 797 
 798 public:
 799   CMMarkRootsClosure(ConcurrentMark* cm,
 800                      G1CollectedHeap* g1h,
 801                      bool do_barrier) : _cm(cm), _g1h(g1h),
 802                                         _do_barrier(do_barrier) { }
 803 
 804   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 805   virtual void do_oop(      oop* p) { do_oop_work(p); }
 806 
 807   template <class T> void do_oop_work(T* p) {
 808     T heap_oop = oopDesc::load_heap_oop(p);
 809     if (!oopDesc::is_null(heap_oop)) {
 810       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 811       assert(obj->is_oop() || obj->mark() == NULL,
 812              "expected an oop, possibly with mark word displaced");
 813       HeapWord* addr = (HeapWord*)obj;
 814       if (_g1h->is_in_g1_reserved(addr)) {
 815         _cm->grayRoot(obj);
 816       }
 817     }
 818     if (_do_barrier) {
 819       assert(!_g1h->is_in_g1_reserved(p),
 820              "Should be called on external roots");
 821       do_barrier(p);
 822     }
 823   }
 824 };
 825 
 826 void ConcurrentMark::checkpointRootsInitialPost() {
 827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 828 
 829   // If we force an overflow during remark, the remark operation will
 830   // actually abort and we'll restart concurrent marking. If we always
 831   // force an oveflow during remark we'll never actually complete the
 832   // marking phase. So, we initilize this here, at the start of the
 833   // cycle, so that at the remaining overflow number will decrease at
 834   // every remark and we'll eventually not need to cause one.
 835   force_overflow_stw()->init();
 836 
 837   // For each region note start of marking.
 838   NoteStartOfMarkHRClosure startcl;
 839   g1h->heap_region_iterate(&startcl);
 840 
 841   // Start weak-reference discovery.
 842   ReferenceProcessor* rp = g1h->ref_processor();
 843   rp->verify_no_references_recorded();
 844   rp->enable_discovery(); // enable ("weak") refs discovery
 845   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 846 
 847   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 848   // This is the start of  the marking cycle, we're expected all
 849   // threads to have SATB queues with active set to false.
 850   satb_mq_set.set_active_all_threads(true, /* new active value */
 851                                      false /* expected_active */);
 852 
 853   // update_g1_committed() will be called at the end of an evac pause
 854   // when marking is on. So, it's also called at the end of the
 855   // initial-mark pause to update the heap end, if the heap expands
 856   // during it. No need to call it here.
 857 }
 858 
 859 // Checkpoint the roots into this generation from outside
 860 // this generation. [Note this initial checkpoint need only
 861 // be approximate -- we'll do a catch up phase subsequently.]
 862 void ConcurrentMark::checkpointRootsInitial() {
 863   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 864   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 865 
 866   double start = os::elapsedTime();
 867 
 868   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 869   g1p->record_concurrent_mark_init_start();
 870   checkpointRootsInitialPre();
 871 
 872   // YSR: when concurrent precleaning is in place, we'll
 873   // need to clear the cached card table here
 874 
 875   ResourceMark rm;
 876   HandleMark  hm;
 877 
 878   g1h->ensure_parsability(false);
 879   g1h->perm_gen()->save_marks();
 880 
 881   CMMarkRootsClosure notOlder(this, g1h, false);
 882   CMMarkRootsClosure older(this, g1h, true);
 883 
 884   g1h->set_marking_started();
 885   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 886 
 887   g1h->process_strong_roots(true,    // activate StrongRootsScope
 888                             false,   // fake perm gen collection
 889                             SharedHeap::SO_AllClasses,
 890                             &notOlder, // Regular roots
 891                             NULL,     // do not visit active blobs
 892                             &older    // Perm Gen Roots
 893                             );
 894   checkpointRootsInitialPost();
 895 
 896   // Statistics.
 897   double end = os::elapsedTime();
 898   _init_times.add((end - start) * 1000.0);
 899 
 900   g1p->record_concurrent_mark_init_end();
 901 }
 902 
 903 /*
 904  * Notice that in the next two methods, we actually leave the STS
 905  * during the barrier sync and join it immediately afterwards. If we
 906  * do not do this, the following deadlock can occur: one thread could
 907  * be in the barrier sync code, waiting for the other thread to also
 908  * sync up, whereas another one could be trying to yield, while also
 909  * waiting for the other threads to sync up too.
 910  *
 911  * Note, however, that this code is also used during remark and in
 912  * this case we should not attempt to leave / enter the STS, otherwise
 913  * we'll either hit an asseert (debug / fastdebug) or deadlock
 914  * (product). So we should only leave / enter the STS if we are
 915  * operating concurrently.
 916  *
 917  * Because the thread that does the sync barrier has left the STS, it
 918  * is possible to be suspended for a Full GC or an evacuation pause
 919  * could occur. This is actually safe, since the entering the sync
 920  * barrier is one of the last things do_marking_step() does, and it
 921  * doesn't manipulate any data structures afterwards.
 922  */
 923 
 924 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 925   if (verbose_low())
 926     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 927 
 928   if (concurrent()) {
 929     ConcurrentGCThread::stsLeave();
 930   }
 931   _first_overflow_barrier_sync.enter();
 932   if (concurrent()) {
 933     ConcurrentGCThread::stsJoin();
 934   }
 935   // at this point everyone should have synced up and not be doing any
 936   // more work
 937 
 938   if (verbose_low())
 939     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 940 
 941   // let task 0 do this
 942   if (task_num == 0) {
 943     // task 0 is responsible for clearing the global data structures
 944     // We should be here because of an overflow. During STW we should
 945     // not clear the overflow flag since we rely on it being true when
 946     // we exit this method to abort the pause and restart concurent
 947     // marking.
 948     clear_marking_state(concurrent() /* clear_overflow */);
 949     force_overflow()->update();
 950 
 951     if (PrintGC) {
 952       gclog_or_tty->date_stamp(PrintGCDateStamps);
 953       gclog_or_tty->stamp(PrintGCTimeStamps);
 954       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 955     }
 956   }
 957 
 958   // after this, each task should reset its own data structures then
 959   // then go into the second barrier
 960 }
 961 
 962 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 963   if (verbose_low())
 964     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 965 
 966   if (concurrent()) {
 967     ConcurrentGCThread::stsLeave();
 968   }
 969   _second_overflow_barrier_sync.enter();
 970   if (concurrent()) {
 971     ConcurrentGCThread::stsJoin();
 972   }
 973   // at this point everything should be re-initialised and ready to go
 974 
 975   if (verbose_low())
 976     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 977 }
 978 
 979 #ifndef PRODUCT
 980 void ForceOverflowSettings::init() {
 981   _num_remaining = G1ConcMarkForceOverflow;
 982   _force = false;
 983   update();
 984 }
 985 
 986 void ForceOverflowSettings::update() {
 987   if (_num_remaining > 0) {
 988     _num_remaining -= 1;
 989     _force = true;
 990   } else {
 991     _force = false;
 992   }
 993 }
 994 
 995 bool ForceOverflowSettings::should_force() {
 996   if (_force) {
 997     _force = false;
 998     return true;
 999   } else {
1000     return false;
1001   }
1002 }
1003 #endif // !PRODUCT
1004 
1005 void ConcurrentMark::grayRoot(oop p) {
1006   HeapWord* addr = (HeapWord*) p;
1007   // We can't really check against _heap_start and _heap_end, since it
1008   // is possible during an evacuation pause with piggy-backed
1009   // initial-mark that the committed space is expanded during the
1010   // pause without CM observing this change. So the assertions below
1011   // is a bit conservative; but better than nothing.
1012   assert(_g1h->g1_committed().contains(addr),
1013          "address should be within the heap bounds");
1014 
1015   if (!_nextMarkBitMap->isMarked(addr))
1016     _nextMarkBitMap->parMark(addr);
1017 }
1018 
1019 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1020   // The objects on the region have already been marked "in bulk" by
1021   // the caller. We only need to decide whether to push the region on
1022   // the region stack or not.
1023 
1024   if (!concurrent_marking_in_progress() || !_should_gray_objects)
1025     // We're done with marking and waiting for remark. We do not need to
1026     // push anything else on the region stack.
1027     return;
1028 
1029   HeapWord* finger = _finger;
1030 
1031   if (verbose_low())
1032     gclog_or_tty->print_cr("[global] attempting to push "
1033                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1034                            PTR_FORMAT, mr.start(), mr.end(), finger);
1035 
1036   if (mr.start() < finger) {
1037     // The finger is always heap region aligned and it is not possible
1038     // for mr to span heap regions.
1039     assert(mr.end() <= finger, "invariant");
1040 
1041     // Separated the asserts so that we know which one fires.
1042     assert(mr.start() <= mr.end(),
1043            "region boundaries should fall within the committed space");
1044     assert(_heap_start <= mr.start(),
1045            "region boundaries should fall within the committed space");
1046     assert(mr.end() <= _heap_end,
1047            "region boundaries should fall within the committed space");
1048     if (verbose_low())
1049       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1050                              "below the finger, pushing it",
1051                              mr.start(), mr.end());
1052 
1053     if (!region_stack_push_lock_free(mr)) {
1054       if (verbose_low())
1055         gclog_or_tty->print_cr("[global] region stack has overflown.");
1056     }
1057   }
1058 }
1059 
1060 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1061   // The object is not marked by the caller. We need to at least mark
1062   // it and maybe push in on the stack.
1063 
1064   HeapWord* addr = (HeapWord*)p;
1065   if (!_nextMarkBitMap->isMarked(addr)) {
1066     // We definitely need to mark it, irrespective whether we bail out
1067     // because we're done with marking.
1068     if (_nextMarkBitMap->parMark(addr)) {
1069       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1070         // If we're done with concurrent marking and we're waiting for
1071         // remark, then we're not pushing anything on the stack.
1072         return;
1073 
1074       // No OrderAccess:store_load() is needed. It is implicit in the
1075       // CAS done in parMark(addr) above
1076       HeapWord* finger = _finger;
1077 
1078       if (addr < finger) {
1079         if (!mark_stack_push(oop(addr))) {
1080           if (verbose_low())
1081             gclog_or_tty->print_cr("[global] global stack overflow "
1082                                    "during parMark");
1083         }
1084       }
1085     }
1086   }
1087 }
1088 
1089 class CMConcurrentMarkingTask: public AbstractGangTask {
1090 private:
1091   ConcurrentMark*       _cm;
1092   ConcurrentMarkThread* _cmt;
1093 
1094 public:
1095   void work(int worker_i) {
1096     assert(Thread::current()->is_ConcurrentGC_thread(),
1097            "this should only be done by a conc GC thread");
1098     ResourceMark rm;
1099 
1100     double start_vtime = os::elapsedVTime();
1101 
1102     ConcurrentGCThread::stsJoin();
1103 
1104     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1105     CMTask* the_task = _cm->task(worker_i);
1106     the_task->record_start_time();
1107     if (!_cm->has_aborted()) {
1108       do {
1109         double start_vtime_sec = os::elapsedVTime();
1110         double start_time_sec = os::elapsedTime();
1111         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1112 
1113         the_task->do_marking_step(mark_step_duration_ms,
1114                                   true /* do_stealing    */,
1115                                   true /* do_termination */);
1116 
1117         double end_time_sec = os::elapsedTime();
1118         double end_vtime_sec = os::elapsedVTime();
1119         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1120         double elapsed_time_sec = end_time_sec - start_time_sec;
1121         _cm->clear_has_overflown();
1122 
1123         bool ret = _cm->do_yield_check(worker_i);
1124 
1125         jlong sleep_time_ms;
1126         if (!_cm->has_aborted() && the_task->has_aborted()) {
1127           sleep_time_ms =
1128             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1129           ConcurrentGCThread::stsLeave();
1130           os::sleep(Thread::current(), sleep_time_ms, false);
1131           ConcurrentGCThread::stsJoin();
1132         }
1133         double end_time2_sec = os::elapsedTime();
1134         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1135 
1136 #if 0
1137           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1138                                  "overhead %1.4lf",
1139                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1140                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1141           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1142                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1143 #endif
1144       } while (!_cm->has_aborted() && the_task->has_aborted());
1145     }
1146     the_task->record_end_time();
1147     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1148 
1149     ConcurrentGCThread::stsLeave();
1150 
1151     double end_vtime = os::elapsedVTime();
1152     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1153   }
1154 
1155   CMConcurrentMarkingTask(ConcurrentMark* cm,
1156                           ConcurrentMarkThread* cmt) :
1157       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1158 
1159   ~CMConcurrentMarkingTask() { }
1160 };
1161 
1162 void ConcurrentMark::markFromRoots() {
1163   // we might be tempted to assert that:
1164   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1165   //        "inconsistent argument?");
1166   // However that wouldn't be right, because it's possible that
1167   // a safepoint is indeed in progress as a younger generation
1168   // stop-the-world GC happens even as we mark in this generation.
1169 
1170   _restart_for_overflow = false;
1171 
1172   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1173   force_overflow_conc()->init();
1174   set_phase(active_workers, true /* concurrent */);
1175 
1176   CMConcurrentMarkingTask markingTask(this, cmThread());
1177   if (parallel_marking_threads() > 0)
1178     _parallel_workers->run_task(&markingTask);
1179   else
1180     markingTask.work(0);
1181   print_stats();
1182 }
1183 
1184 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1185   // world is stopped at this checkpoint
1186   assert(SafepointSynchronize::is_at_safepoint(),
1187          "world should be stopped");
1188   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1189 
1190   // If a full collection has happened, we shouldn't do this.
1191   if (has_aborted()) {
1192     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1193     return;
1194   }
1195 
1196   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1197 
1198   if (VerifyDuringGC) {
1199     HandleMark hm;  // handle scope
1200     gclog_or_tty->print(" VerifyDuringGC:(before)");
1201     Universe::heap()->prepare_for_verify();
1202     Universe::verify(true, false, true);
1203   }
1204 
1205   G1CollectorPolicy* g1p = g1h->g1_policy();
1206   g1p->record_concurrent_mark_remark_start();
1207 
1208   double start = os::elapsedTime();
1209 
1210   checkpointRootsFinalWork();
1211 
1212   double mark_work_end = os::elapsedTime();
1213 
1214   weakRefsWork(clear_all_soft_refs);
1215 
1216   if (has_overflown()) {
1217     // Oops.  We overflowed.  Restart concurrent marking.
1218     _restart_for_overflow = true;
1219     // Clear the flag. We do not need it any more.
1220     clear_has_overflown();
1221     if (G1TraceMarkStackOverflow)
1222       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1223   } else {
1224     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225     // We're done with marking.
1226     // This is the end of  the marking cycle, we're expected all
1227     // threads to have SATB queues with active set to true.
1228     satb_mq_set.set_active_all_threads(false, /* new active value */
1229                                        true /* expected_active */);
1230 
1231     if (VerifyDuringGC) {
1232       HandleMark hm;  // handle scope
1233       gclog_or_tty->print(" VerifyDuringGC:(after)");
1234       Universe::heap()->prepare_for_verify();
1235       Universe::heap()->verify(/* allow_dirty */      true,
1236                                /* silent */           false,
1237                                /* use_prev_marking */ false);
1238     }
1239     assert(!restart_for_overflow(), "sanity");
1240   }
1241 
1242   // Reset the marking state if marking completed
1243   if (!restart_for_overflow()) {
1244     set_non_marking_state();
1245   }
1246 
1247 #if VERIFY_OBJS_PROCESSED
1248   _scan_obj_cl.objs_processed = 0;
1249   ThreadLocalObjQueue::objs_enqueued = 0;
1250 #endif
1251 
1252   // Statistics
1253   double now = os::elapsedTime();
1254   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1255   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1256   _remark_times.add((now - start) * 1000.0);
1257 
1258   g1p->record_concurrent_mark_remark_end();
1259 }
1260 
1261 #define CARD_BM_TEST_MODE 0
1262 
1263 class CalcLiveObjectsClosure: public HeapRegionClosure {
1264 
1265   CMBitMapRO* _bm;
1266   ConcurrentMark* _cm;
1267   bool _changed;
1268   bool _yield;
1269   size_t _words_done;
1270   size_t _tot_live;
1271   size_t _tot_used;
1272   size_t _regions_done;
1273   double _start_vtime_sec;
1274 
1275   BitMap* _region_bm;
1276   BitMap* _card_bm;
1277   intptr_t _bottom_card_num;
1278   bool _final;
1279 
1280   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1281     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1282 #if CARD_BM_TEST_MODE
1283       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1284 #else
1285       _card_bm->par_at_put(i - _bottom_card_num, 1);
1286 #endif
1287     }
1288   }
1289 
1290 public:
1291   CalcLiveObjectsClosure(bool final,
1292                          CMBitMapRO *bm, ConcurrentMark *cm,
1293                          BitMap* region_bm, BitMap* card_bm) :
1294     _bm(bm), _cm(cm), _changed(false), _yield(true),
1295     _words_done(0), _tot_live(0), _tot_used(0),
1296     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1297     _regions_done(0), _start_vtime_sec(0.0)
1298   {
1299     _bottom_card_num =
1300       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1301                CardTableModRefBS::card_shift);
1302   }
1303 
1304   // It takes a region that's not empty (i.e., it has at least one
1305   // live object in it and sets its corresponding bit on the region
1306   // bitmap to 1. If the region is "starts humongous" it will also set
1307   // to 1 the bits on the region bitmap that correspond to its
1308   // associated "continues humongous" regions.
1309   void set_bit_for_region(HeapRegion* hr) {
1310     assert(!hr->continuesHumongous(), "should have filtered those out");
1311 
1312     size_t index = hr->hrs_index();
1313     if (!hr->startsHumongous()) {
1314       // Normal (non-humongous) case: just set the bit.
1315       _region_bm->par_at_put((BitMap::idx_t) index, true);
1316     } else {
1317       // Starts humongous case: calculate how many regions are part of
1318       // this humongous region and then set the bit range. It might
1319       // have been a bit more efficient to look at the object that
1320       // spans these humongous regions to calculate their number from
1321       // the object's size. However, it's a good idea to calculate
1322       // this based on the metadata itself, and not the region
1323       // contents, so that this code is not aware of what goes into
1324       // the humongous regions (in case this changes in the future).
1325       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1326       size_t end_index = index + 1;
1327       while (end_index < g1h->n_regions()) {
1328         HeapRegion* chr = g1h->region_at(end_index);
1329         if (!chr->continuesHumongous()) {
1330           break;
1331         }
1332         end_index += 1;
1333       }
1334       _region_bm->par_at_put_range((BitMap::idx_t) index,
1335                                    (BitMap::idx_t) end_index, true);
1336     }
1337   }
1338 
1339   bool doHeapRegion(HeapRegion* hr) {
1340     if (!_final && _regions_done == 0)
1341       _start_vtime_sec = os::elapsedVTime();
1342 
1343     if (hr->continuesHumongous()) {
1344       // We will ignore these here and process them when their
1345       // associated "starts humongous" region is processed (see
1346       // set_bit_for_heap_region()). Note that we cannot rely on their
1347       // associated "starts humongous" region to have their bit set to
1348       // 1 since, due to the region chunking in the parallel region
1349       // iteration, a "continues humongous" region might be visited
1350       // before its associated "starts humongous".
1351       return false;
1352     }
1353 
1354     HeapWord* nextTop = hr->next_top_at_mark_start();
1355     HeapWord* start   = hr->top_at_conc_mark_count();
1356     assert(hr->bottom() <= start && start <= hr->end() &&
1357            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358            start <= nextTop,
1359            "Preconditions.");
1360     // Otherwise, record the number of word's we'll examine.
1361     size_t words_done = (nextTop - start);
1362     // Find the first marked object at or after "start".
1363     start = _bm->getNextMarkedWordAddress(start, nextTop);
1364     size_t marked_bytes = 0;
1365 
1366     // Below, the term "card num" means the result of shifting an address
1367     // by the card shift -- address 0 corresponds to card number 0.  One
1368     // must subtract the card num of the bottom of the heap to obtain a
1369     // card table index.
1370     // The first card num of the sequence of live cards currently being
1371     // constructed.  -1 ==> no sequence.
1372     intptr_t start_card_num = -1;
1373     // The last card num of the sequence of live cards currently being
1374     // constructed.  -1 ==> no sequence.
1375     intptr_t last_card_num = -1;
1376 
1377     while (start < nextTop) {
1378       if (_yield && _cm->do_yield_check()) {
1379         // We yielded.  It might be for a full collection, in which case
1380         // all bets are off; terminate the traversal.
1381         if (_cm->has_aborted()) {
1382           _changed = false;
1383           return true;
1384         } else {
1385           // Otherwise, it might be a collection pause, and the region
1386           // we're looking at might be in the collection set.  We'll
1387           // abandon this region.
1388           return false;
1389         }
1390       }
1391       oop obj = oop(start);
1392       int obj_sz = obj->size();
1393       // The card num of the start of the current object.
1394       intptr_t obj_card_num =
1395         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396 
1397       HeapWord* obj_last = start + obj_sz - 1;
1398       intptr_t obj_last_card_num =
1399         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 
1401       if (obj_card_num != last_card_num) {
1402         if (start_card_num == -1) {
1403           assert(last_card_num == -1, "Both or neither.");
1404           start_card_num = obj_card_num;
1405         } else {
1406           assert(last_card_num != -1, "Both or neither.");
1407           assert(obj_card_num >= last_card_num, "Inv");
1408           if ((obj_card_num - last_card_num) > 1) {
1409             // Mark the last run, and start a new one.
1410             mark_card_num_range(start_card_num, last_card_num);
1411             start_card_num = obj_card_num;
1412           }
1413         }
1414 #if CARD_BM_TEST_MODE
1415         /*
1416         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417                                obj_card_num - _bottom_card_num,
1418                                obj_last_card_num - _bottom_card_num);
1419         */
1420         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421           _card_bm->par_at_put(j - _bottom_card_num, 1);
1422         }
1423 #endif
1424       }
1425       // In any case, we set the last card num.
1426       last_card_num = obj_last_card_num;
1427 
1428       marked_bytes += (size_t)obj_sz * HeapWordSize;
1429       // Find the next marked object after this one.
1430       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431       _changed = true;
1432     }
1433     // Handle the last range, if any.
1434     if (start_card_num != -1)
1435       mark_card_num_range(start_card_num, last_card_num);
1436     if (_final) {
1437       // Mark the allocated-since-marking portion...
1438       HeapWord* tp = hr->top();
1439       if (nextTop < tp) {
1440         start_card_num =
1441           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1442         last_card_num =
1443           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1444         mark_card_num_range(start_card_num, last_card_num);
1445         // This definitely means the region has live objects.
1446         set_bit_for_region(hr);
1447       }
1448     }
1449 
1450     hr->add_to_marked_bytes(marked_bytes);
1451     // Update the live region bitmap.
1452     if (marked_bytes > 0) {
1453       set_bit_for_region(hr);
1454     }
1455     hr->set_top_at_conc_mark_count(nextTop);
1456     _tot_live += hr->next_live_bytes();
1457     _tot_used += hr->used();
1458     _words_done = words_done;
1459 
1460     if (!_final) {
1461       ++_regions_done;
1462       if (_regions_done % 10 == 0) {
1463         double end_vtime_sec = os::elapsedVTime();
1464         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1465         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1466           jlong sleep_time_ms =
1467             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1468           os::sleep(Thread::current(), sleep_time_ms, false);
1469           _start_vtime_sec = end_vtime_sec;
1470         }
1471       }
1472     }
1473 
1474     return false;
1475   }
1476 
1477   bool changed() { return _changed;  }
1478   void reset()   { _changed = false; _words_done = 0; }
1479   void no_yield() { _yield = false; }
1480   size_t words_done() { return _words_done; }
1481   size_t tot_live() { return _tot_live; }
1482   size_t tot_used() { return _tot_used; }
1483 };
1484 
1485 
1486 void ConcurrentMark::calcDesiredRegions() {
1487   _region_bm.clear();
1488   _card_bm.clear();
1489   CalcLiveObjectsClosure calccl(false /*final*/,
1490                                 nextMarkBitMap(), this,
1491                                 &_region_bm, &_card_bm);
1492   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1493   g1h->heap_region_iterate(&calccl);
1494 
1495   do {
1496     calccl.reset();
1497     g1h->heap_region_iterate(&calccl);
1498   } while (calccl.changed());
1499 }
1500 
1501 class G1ParFinalCountTask: public AbstractGangTask {
1502 protected:
1503   G1CollectedHeap* _g1h;
1504   CMBitMap* _bm;
1505   size_t _n_workers;
1506   size_t *_live_bytes;
1507   size_t *_used_bytes;
1508   BitMap* _region_bm;
1509   BitMap* _card_bm;
1510 public:
1511   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1512                       BitMap* region_bm, BitMap* card_bm) :
1513     AbstractGangTask("G1 final counting"), _g1h(g1h),
1514     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1515   {
1516     if (ParallelGCThreads > 0)
1517       _n_workers = _g1h->workers()->total_workers();
1518     else
1519       _n_workers = 1;
1520     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1521     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1522   }
1523 
1524   ~G1ParFinalCountTask() {
1525     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1526     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1527   }
1528 
1529   void work(int i) {
1530     CalcLiveObjectsClosure calccl(true /*final*/,
1531                                   _bm, _g1h->concurrent_mark(),
1532                                   _region_bm, _card_bm);
1533     calccl.no_yield();
1534     if (G1CollectedHeap::use_parallel_gc_threads()) {
1535       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1536                                             HeapRegion::FinalCountClaimValue);
1537     } else {
1538       _g1h->heap_region_iterate(&calccl);
1539     }
1540     assert(calccl.complete(), "Shouldn't have yielded!");
1541 
1542     assert((size_t) i < _n_workers, "invariant");
1543     _live_bytes[i] = calccl.tot_live();
1544     _used_bytes[i] = calccl.tot_used();
1545   }
1546   size_t live_bytes()  {
1547     size_t live_bytes = 0;
1548     for (size_t i = 0; i < _n_workers; ++i)
1549       live_bytes += _live_bytes[i];
1550     return live_bytes;
1551   }
1552   size_t used_bytes()  {
1553     size_t used_bytes = 0;
1554     for (size_t i = 0; i < _n_workers; ++i)
1555       used_bytes += _used_bytes[i];
1556     return used_bytes;
1557   }
1558 };
1559 
1560 class G1ParNoteEndTask;
1561 
1562 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1563   G1CollectedHeap* _g1;
1564   int _worker_num;
1565   size_t _max_live_bytes;
1566   size_t _regions_claimed;
1567   size_t _freed_bytes;
1568   FreeRegionList* _local_cleanup_list;
1569   HumongousRegionSet* _humongous_proxy_set;
1570   HRRSCleanupTask* _hrrs_cleanup_task;
1571   double _claimed_region_time;
1572   double _max_region_time;
1573 
1574 public:
1575   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1576                              int worker_num,
1577                              FreeRegionList* local_cleanup_list,
1578                              HumongousRegionSet* humongous_proxy_set,
1579                              HRRSCleanupTask* hrrs_cleanup_task);
1580   size_t freed_bytes() { return _freed_bytes; }
1581 
1582   bool doHeapRegion(HeapRegion *r);
1583 
1584   size_t max_live_bytes() { return _max_live_bytes; }
1585   size_t regions_claimed() { return _regions_claimed; }
1586   double claimed_region_time_sec() { return _claimed_region_time; }
1587   double max_region_time_sec() { return _max_region_time; }
1588 };
1589 
1590 class G1ParNoteEndTask: public AbstractGangTask {
1591   friend class G1NoteEndOfConcMarkClosure;
1592 
1593 protected:
1594   G1CollectedHeap* _g1h;
1595   size_t _max_live_bytes;
1596   size_t _freed_bytes;
1597   FreeRegionList* _cleanup_list;
1598 
1599 public:
1600   G1ParNoteEndTask(G1CollectedHeap* g1h,
1601                    FreeRegionList* cleanup_list) :
1602     AbstractGangTask("G1 note end"), _g1h(g1h),
1603     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1604 
1605   void work(int i) {
1606     double start = os::elapsedTime();
1607     FreeRegionList local_cleanup_list("Local Cleanup List");
1608     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1609     HRRSCleanupTask hrrs_cleanup_task;
1610     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1611                                            &humongous_proxy_set,
1612                                            &hrrs_cleanup_task);
1613     if (G1CollectedHeap::use_parallel_gc_threads()) {
1614       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1615                                             HeapRegion::NoteEndClaimValue);
1616     } else {
1617       _g1h->heap_region_iterate(&g1_note_end);
1618     }
1619     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1620 
1621     // Now update the lists
1622     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1623                                             NULL /* free_list */,
1624                                             &humongous_proxy_set,
1625                                             true /* par */);
1626     {
1627       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1628       _max_live_bytes += g1_note_end.max_live_bytes();
1629       _freed_bytes += g1_note_end.freed_bytes();
1630 
1631       _cleanup_list->add_as_tail(&local_cleanup_list);
1632       assert(local_cleanup_list.is_empty(), "post-condition");
1633 
1634       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1635     }
1636     double end = os::elapsedTime();
1637     if (G1PrintParCleanupStats) {
1638       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1639                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1640                           i, start, end, (end-start)*1000.0,
1641                           g1_note_end.regions_claimed(),
1642                           g1_note_end.claimed_region_time_sec()*1000.0,
1643                           g1_note_end.max_region_time_sec()*1000.0);
1644     }
1645   }
1646   size_t max_live_bytes() { return _max_live_bytes; }
1647   size_t freed_bytes() { return _freed_bytes; }
1648 };
1649 
1650 class G1ParScrubRemSetTask: public AbstractGangTask {
1651 protected:
1652   G1RemSet* _g1rs;
1653   BitMap* _region_bm;
1654   BitMap* _card_bm;
1655 public:
1656   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1657                        BitMap* region_bm, BitMap* card_bm) :
1658     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1659     _region_bm(region_bm), _card_bm(card_bm)
1660   {}
1661 
1662   void work(int i) {
1663     if (G1CollectedHeap::use_parallel_gc_threads()) {
1664       _g1rs->scrub_par(_region_bm, _card_bm, i,
1665                        HeapRegion::ScrubRemSetClaimValue);
1666     } else {
1667       _g1rs->scrub(_region_bm, _card_bm);
1668     }
1669   }
1670 
1671 };
1672 
1673 G1NoteEndOfConcMarkClosure::
1674 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1675                            int worker_num,
1676                            FreeRegionList* local_cleanup_list,
1677                            HumongousRegionSet* humongous_proxy_set,
1678                            HRRSCleanupTask* hrrs_cleanup_task)
1679   : _g1(g1), _worker_num(worker_num),
1680     _max_live_bytes(0), _regions_claimed(0),
1681     _freed_bytes(0),
1682     _claimed_region_time(0.0), _max_region_time(0.0),
1683     _local_cleanup_list(local_cleanup_list),
1684     _humongous_proxy_set(humongous_proxy_set),
1685     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1686 
1687 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1688   // We use a claim value of zero here because all regions
1689   // were claimed with value 1 in the FinalCount task.
1690   hr->reset_gc_time_stamp();
1691   if (!hr->continuesHumongous()) {
1692     double start = os::elapsedTime();
1693     _regions_claimed++;
1694     hr->note_end_of_marking();
1695     _max_live_bytes += hr->max_live_bytes();
1696     _g1->free_region_if_empty(hr,
1697                               &_freed_bytes,
1698                               _local_cleanup_list,
1699                               _humongous_proxy_set,
1700                               _hrrs_cleanup_task,
1701                               true /* par */);
1702     double region_time = (os::elapsedTime() - start);
1703     _claimed_region_time += region_time;
1704     if (region_time > _max_region_time) _max_region_time = region_time;
1705   }
1706   return false;
1707 }
1708 
1709 void ConcurrentMark::cleanup() {
1710   // world is stopped at this checkpoint
1711   assert(SafepointSynchronize::is_at_safepoint(),
1712          "world should be stopped");
1713   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1714 
1715   // If a full collection has happened, we shouldn't do this.
1716   if (has_aborted()) {
1717     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1718     return;
1719   }
1720 
1721   g1h->verify_region_sets_optional();
1722 
1723   if (VerifyDuringGC) {
1724     HandleMark hm;  // handle scope
1725     gclog_or_tty->print(" VerifyDuringGC:(before)");
1726     Universe::heap()->prepare_for_verify();
1727     Universe::verify(/* allow dirty  */ true,
1728                      /* silent       */ false,
1729                      /* prev marking */ true);
1730   }
1731 
1732   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1733   g1p->record_concurrent_mark_cleanup_start();
1734 
1735   double start = os::elapsedTime();
1736 
1737   HeapRegionRemSet::reset_for_cleanup_tasks();
1738 
1739   // Do counting once more with the world stopped for good measure.
1740   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1741                                         &_region_bm, &_card_bm);
1742   if (G1CollectedHeap::use_parallel_gc_threads()) {
1743     assert(g1h->check_heap_region_claim_values(
1744                                                HeapRegion::InitialClaimValue),
1745            "sanity check");
1746 
1747     int n_workers = g1h->workers()->total_workers();
1748     g1h->set_par_threads(n_workers);
1749     g1h->workers()->run_task(&g1_par_count_task);
1750     g1h->set_par_threads(0);
1751 
1752     assert(g1h->check_heap_region_claim_values(
1753                                              HeapRegion::FinalCountClaimValue),
1754            "sanity check");
1755   } else {
1756     g1_par_count_task.work(0);
1757   }
1758 
1759   size_t known_garbage_bytes =
1760     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1761 #if 0
1762   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1763                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1764                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1765                          (double) known_garbage_bytes / (double) (1024 * 1024));
1766 #endif // 0
1767   g1p->set_known_garbage_bytes(known_garbage_bytes);
1768 
1769   size_t start_used_bytes = g1h->used();
1770   _at_least_one_mark_complete = true;
1771   g1h->set_marking_complete();
1772 
1773   double count_end = os::elapsedTime();
1774   double this_final_counting_time = (count_end - start);
1775   if (G1PrintParCleanupStats) {
1776     gclog_or_tty->print_cr("Cleanup:");
1777     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1778                            this_final_counting_time*1000.0);
1779   }
1780   _total_counting_time += this_final_counting_time;
1781 
1782   if (G1PrintRegionLivenessInfo) {
1783     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1784     _g1h->heap_region_iterate(&cl);
1785   }
1786 
1787   // Install newly created mark bitMap as "prev".
1788   swapMarkBitMaps();
1789 
1790   g1h->reset_gc_time_stamp();
1791 
1792   // Note end of marking in all heap regions.
1793   double note_end_start = os::elapsedTime();
1794   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1795   if (G1CollectedHeap::use_parallel_gc_threads()) {
1796     int n_workers = g1h->workers()->total_workers();
1797     g1h->set_par_threads(n_workers);
1798     g1h->workers()->run_task(&g1_par_note_end_task);
1799     g1h->set_par_threads(0);
1800 
1801     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1802            "sanity check");
1803   } else {
1804     g1_par_note_end_task.work(0);
1805   }
1806 
1807   if (!cleanup_list_is_empty()) {
1808     // The cleanup list is not empty, so we'll have to process it
1809     // concurrently. Notify anyone else that might be wanting free
1810     // regions that there will be more free regions coming soon.
1811     g1h->set_free_regions_coming();
1812   }
1813   double note_end_end = os::elapsedTime();
1814   if (G1PrintParCleanupStats) {
1815     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1816                            (note_end_end - note_end_start)*1000.0);
1817   }
1818 
1819 
1820   // call below, since it affects the metric by which we sort the heap
1821   // regions.
1822   if (G1ScrubRemSets) {
1823     double rs_scrub_start = os::elapsedTime();
1824     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1825     if (G1CollectedHeap::use_parallel_gc_threads()) {
1826       int n_workers = g1h->workers()->total_workers();
1827       g1h->set_par_threads(n_workers);
1828       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1829       g1h->set_par_threads(0);
1830 
1831       assert(g1h->check_heap_region_claim_values(
1832                                             HeapRegion::ScrubRemSetClaimValue),
1833              "sanity check");
1834     } else {
1835       g1_par_scrub_rs_task.work(0);
1836     }
1837 
1838     double rs_scrub_end = os::elapsedTime();
1839     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1840     _total_rs_scrub_time += this_rs_scrub_time;
1841   }
1842 
1843   // this will also free any regions totally full of garbage objects,
1844   // and sort the regions.
1845   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1846                         g1_par_note_end_task.freed_bytes(),
1847                         g1_par_note_end_task.max_live_bytes());
1848 
1849   // Statistics.
1850   double end = os::elapsedTime();
1851   _cleanup_times.add((end - start) * 1000.0);
1852 
1853   // G1CollectedHeap::heap()->print();
1854   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1855   // G1CollectedHeap::heap()->get_gc_time_stamp());
1856 
1857   if (PrintGC || PrintGCDetails) {
1858     g1h->print_size_transition(gclog_or_tty,
1859                                start_used_bytes,
1860                                g1h->used(),
1861                                g1h->capacity());
1862   }
1863 
1864   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1865   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1866 
1867   // We need to make this be a "collection" so any collection pause that
1868   // races with it goes around and waits for completeCleanup to finish.
1869   g1h->increment_total_collections();
1870 
1871   if (VerifyDuringGC) {
1872     HandleMark hm;  // handle scope
1873     gclog_or_tty->print(" VerifyDuringGC:(after)");
1874     Universe::heap()->prepare_for_verify();
1875     Universe::verify(/* allow dirty  */ true,
1876                      /* silent       */ false,
1877                      /* prev marking */ true);
1878   }
1879 
1880   g1h->verify_region_sets_optional();
1881 }
1882 
1883 void ConcurrentMark::completeCleanup() {
1884   if (has_aborted()) return;
1885 
1886   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1887 
1888   _cleanup_list.verify_optional();
1889   FreeRegionList tmp_free_list("Tmp Free List");
1890 
1891   if (G1ConcRegionFreeingVerbose) {
1892     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1893                            "cleanup list has "SIZE_FORMAT" entries",
1894                            _cleanup_list.length());
1895   }
1896 
1897   // Noone else should be accessing the _cleanup_list at this point,
1898   // so it's not necessary to take any locks
1899   while (!_cleanup_list.is_empty()) {
1900     HeapRegion* hr = _cleanup_list.remove_head();
1901     assert(hr != NULL, "the list was not empty");
1902     hr->par_clear();
1903     tmp_free_list.add_as_tail(hr);
1904 
1905     // Instead of adding one region at a time to the secondary_free_list,
1906     // we accumulate them in the local list and move them a few at a
1907     // time. This also cuts down on the number of notify_all() calls
1908     // we do during this process. We'll also append the local list when
1909     // _cleanup_list is empty (which means we just removed the last
1910     // region from the _cleanup_list).
1911     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1912         _cleanup_list.is_empty()) {
1913       if (G1ConcRegionFreeingVerbose) {
1914         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1915                                "appending "SIZE_FORMAT" entries to the "
1916                                "secondary_free_list, clean list still has "
1917                                SIZE_FORMAT" entries",
1918                                tmp_free_list.length(),
1919                                _cleanup_list.length());
1920       }
1921 
1922       {
1923         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1924         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1925         SecondaryFreeList_lock->notify_all();
1926       }
1927 
1928       if (G1StressConcRegionFreeing) {
1929         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1930           os::sleep(Thread::current(), (jlong) 1, false);
1931         }
1932       }
1933     }
1934   }
1935   assert(tmp_free_list.is_empty(), "post-condition");
1936 }
1937 
1938 // Support closures for reference procssing in G1
1939 
1940 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1941   HeapWord* addr = (HeapWord*)obj;
1942   return addr != NULL &&
1943          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1944 }
1945 
1946 class G1CMKeepAliveClosure: public OopClosure {
1947   G1CollectedHeap* _g1;
1948   ConcurrentMark*  _cm;
1949   CMBitMap*        _bitMap;
1950  public:
1951   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1952                        CMBitMap* bitMap) :
1953     _g1(g1), _cm(cm),
1954     _bitMap(bitMap) {}
1955 
1956   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1957   virtual void do_oop(      oop* p) { do_oop_work(p); }
1958 
1959   template <class T> void do_oop_work(T* p) {
1960     oop obj = oopDesc::load_decode_heap_oop(p);
1961     HeapWord* addr = (HeapWord*)obj;
1962 
1963     if (_cm->verbose_high())
1964       gclog_or_tty->print_cr("\t[0] we're looking at location "
1965                                "*"PTR_FORMAT" = "PTR_FORMAT,
1966                                p, (void*) obj);
1967 
1968     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1969       _bitMap->mark(addr);
1970       _cm->mark_stack_push(obj);
1971     }
1972   }
1973 };
1974 
1975 class G1CMDrainMarkingStackClosure: public VoidClosure {
1976   CMMarkStack*                  _markStack;
1977   CMBitMap*                     _bitMap;
1978   G1CMKeepAliveClosure*         _oopClosure;
1979  public:
1980   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1981                                G1CMKeepAliveClosure* oopClosure) :
1982     _bitMap(bitMap),
1983     _markStack(markStack),
1984     _oopClosure(oopClosure)
1985   {}
1986 
1987   void do_void() {
1988     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1989   }
1990 };
1991 
1992 // 'Keep Alive' closure used by parallel reference processing.
1993 // An instance of this closure is used in the parallel reference processing
1994 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1995 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1996 // placed on to discovered ref lists once so we can mark and push with no
1997 // need to check whether the object has already been marked. Using the
1998 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1999 // operating on the global mark stack. This means that an individual
2000 // worker would be doing lock-free pushes while it processes its own
2001 // discovered ref list followed by drain call. If the discovered ref lists
2002 // are unbalanced then this could cause interference with the other
2003 // workers. Using a CMTask (and its embedded local data structures)
2004 // avoids that potential interference.
2005 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2006   ConcurrentMark*  _cm;
2007   CMTask*          _task;
2008   CMBitMap*        _bitMap;
2009   int              _ref_counter_limit;
2010   int              _ref_counter;
2011  public:
2012   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2013                                   CMTask* task,
2014                                   CMBitMap* bitMap) :
2015     _cm(cm), _task(task), _bitMap(bitMap),
2016     _ref_counter_limit(G1RefProcDrainInterval)
2017   {
2018     assert(_ref_counter_limit > 0, "sanity");
2019     _ref_counter = _ref_counter_limit;
2020   }
2021 
2022   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2023   virtual void do_oop(      oop* p) { do_oop_work(p); }
2024 
2025   template <class T> void do_oop_work(T* p) {
2026     if (!_cm->has_overflown()) {
2027       oop obj = oopDesc::load_decode_heap_oop(p);
2028       if (_cm->verbose_high())
2029         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2030                                "*"PTR_FORMAT" = "PTR_FORMAT,
2031                                _task->task_id(), p, (void*) obj);
2032 
2033       _task->deal_with_reference(obj);
2034       _ref_counter--;
2035 
2036       if (_ref_counter == 0) {
2037         // We have dealt with _ref_counter_limit references, pushing them and objects
2038         // reachable from them on to the local stack (and possibly the global stack).
2039         // Call do_marking_step() to process these entries. We call the routine in a
2040         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2041         // with the entries that we've pushed as a result of the deal_with_reference
2042         // calls above) or we overflow.
2043         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2044         // while there may still be some work to do. (See the comment at the
2045         // beginning of CMTask::do_marking_step() for those conditions - one of which
2046         // is reaching the specified time target.) It is only when
2047         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2048         // that the marking has completed.
2049         do {
2050           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2051           _task->do_marking_step(mark_step_duration_ms,
2052                                  false /* do_stealing    */,
2053                                  false /* do_termination */);
2054         } while (_task->has_aborted() && !_cm->has_overflown());
2055         _ref_counter = _ref_counter_limit;
2056       }
2057     } else {
2058        if (_cm->verbose_high())
2059          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2060     }
2061   }
2062 };
2063 
2064 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2065   ConcurrentMark* _cm;
2066   CMTask* _task;
2067  public:
2068   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2069     _cm(cm), _task(task)
2070   {}
2071 
2072   void do_void() {
2073     do {
2074       if (_cm->verbose_high())
2075         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
2076 
2077       // We call CMTask::do_marking_step() to completely drain the local and
2078       // global marking stacks. The routine is called in a loop, which we'll
2079       // exit if there's nothing more to do (i.e. we'completely drained the
2080       // entries that were pushed as a result of applying the
2081       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2082       // lists above) or we overflow the global marking stack.
2083       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2084       // while there may still be some work to do. (See the comment at the
2085       // beginning of CMTask::do_marking_step() for those conditions - one of which
2086       // is reaching the specified time target.) It is only when
2087       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2088       // that the marking has completed.
2089 
2090       _task->do_marking_step(1000000000.0 /* something very large */,
2091                              true /* do_stealing    */,
2092                              true /* do_termination */);
2093     } while (_task->has_aborted() && !_cm->has_overflown());
2094   }
2095 };
2096 
2097 // Implementation of AbstractRefProcTaskExecutor for G1
2098 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2099 private:
2100   G1CollectedHeap* _g1h;
2101   ConcurrentMark*  _cm;
2102   CMBitMap*        _bitmap;
2103   WorkGang*        _workers;
2104   int              _active_workers;
2105 
2106 public:
2107   G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2108                         ConcurrentMark* cm,
2109                         CMBitMap* bitmap,
2110                         WorkGang* workers,
2111                         int n_workers) :
2112     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2113     _workers(workers), _active_workers(n_workers)
2114   { }
2115 
2116   // Executes the given task using concurrent marking worker threads.
2117   virtual void execute(ProcessTask& task);
2118   virtual void execute(EnqueueTask& task);
2119 };
2120 
2121 class G1RefProcTaskProxy: public AbstractGangTask {
2122   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2123   ProcessTask&     _proc_task;
2124   G1CollectedHeap* _g1h;
2125   ConcurrentMark*  _cm;
2126   CMBitMap*        _bitmap;
2127 
2128 public:
2129   G1RefProcTaskProxy(ProcessTask& proc_task,
2130                      G1CollectedHeap* g1h,
2131                      ConcurrentMark* cm,
2132                      CMBitMap* bitmap) :
2133     AbstractGangTask("Process reference objects in parallel"),
2134     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2135   {}
2136 
2137   virtual void work(int i) {
2138     CMTask* marking_task = _cm->task(i);
2139     G1CMIsAliveClosure g1_is_alive(_g1h);
2140     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2141     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2142 
2143     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2144   }
2145 };
2146 
2147 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2148   assert(_workers != NULL, "Need parallel worker threads.");
2149 
2150   G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2151 
2152   // We need to reset the phase for each task execution so that
2153   // the termination protocol of CMTask::do_marking_step works.
2154   _cm->set_phase(_active_workers, false /* concurrent */);
2155   _g1h->set_par_threads(_active_workers);
2156   _workers->run_task(&proc_task_proxy);
2157   _g1h->set_par_threads(0);
2158 }
2159 
2160 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2161   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2162   EnqueueTask& _enq_task;
2163 
2164 public:
2165   G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2166     AbstractGangTask("Enqueue reference objects in parallel"),
2167     _enq_task(enq_task)
2168   { }
2169 
2170   virtual void work(int i) {
2171     _enq_task.work(i);
2172   }
2173 };
2174 
2175 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2176   assert(_workers != NULL, "Need parallel worker threads.");
2177 
2178   G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2179 
2180   _g1h->set_par_threads(_active_workers);
2181   _workers->run_task(&enq_task_proxy);
2182   _g1h->set_par_threads(0);
2183 }
2184 
2185 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2186   ResourceMark rm;
2187   HandleMark   hm;
2188   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2189   ReferenceProcessor* rp = g1h->ref_processor();
2190 
2191   // See the comment in G1CollectedHeap::ref_processing_init()
2192   // about how reference processing currently works in G1.
2193 
2194   // Process weak references.
2195   rp->setup_policy(clear_all_soft_refs);
2196   assert(_markStack.isEmpty(), "mark stack should be empty");
2197 
2198   G1CMIsAliveClosure   g1_is_alive(g1h);
2199   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2200   G1CMDrainMarkingStackClosure
2201     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2202   // We use the work gang from the G1CollectedHeap and we utilize all
2203   // the worker threads.
2204   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2205   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2206 
2207   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2208                                           g1h->workers(), active_workers);
2209 
2210 
2211   if (rp->processing_is_mt()) {
2212     // Set the degree of MT here.  If the discovery is done MT, there
2213     // may have been a different number of threads doing the discovery
2214     // and a different number of discovered lists may have Ref objects.
2215     // That is OK as long as the Reference lists are balanced (see
2216     // balance_all_queues() and balance_queues()).
2217     rp->set_active_mt_degree(active_workers);
2218 
2219     rp->process_discovered_references(&g1_is_alive,
2220                                       &g1_keep_alive,
2221                                       &g1_drain_mark_stack,
2222                                       &par_task_executor);
2223 
2224     // The work routines of the parallel keep_alive and drain_marking_stack
2225     // will set the has_overflown flag if we overflow the global marking
2226     // stack.
2227   } else {
2228     rp->process_discovered_references(&g1_is_alive,
2229                                       &g1_keep_alive,
2230                                       &g1_drain_mark_stack,
2231                                       NULL);
2232 
2233   }
2234 
2235   assert(_markStack.overflow() || _markStack.isEmpty(),
2236       "mark stack should be empty (unless it overflowed)");
2237   if (_markStack.overflow()) {
2238     // Should have been done already when we tried to push an
2239     // entry on to the global mark stack. But let's do it again.
2240     set_has_overflown();
2241   }
2242 
2243   if (rp->processing_is_mt()) {
2244     assert(rp->num_q() == active_workers, "why not");
2245     rp->enqueue_discovered_references(&par_task_executor);
2246   } else {
2247     rp->enqueue_discovered_references();
2248   }
2249 
2250   rp->verify_no_references_recorded();
2251   assert(!rp->discovery_enabled(), "should have been disabled");
2252 
2253   // Now clean up stale oops in StringTable
2254   StringTable::unlink(&g1_is_alive);
2255   // Clean up unreferenced symbols in symbol table.
2256   SymbolTable::unlink();
2257 }
2258 
2259 void ConcurrentMark::swapMarkBitMaps() {
2260   CMBitMapRO* temp = _prevMarkBitMap;
2261   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2262   _nextMarkBitMap  = (CMBitMap*)  temp;
2263 }
2264 
2265 class CMRemarkTask: public AbstractGangTask {
2266 private:
2267   ConcurrentMark *_cm;
2268 
2269 public:
2270   void work(int worker_i) {
2271     // Since all available tasks are actually started, we should
2272     // only proceed if we're supposed to be actived.
2273     if ((size_t)worker_i < _cm->active_tasks()) {
2274       CMTask* task = _cm->task(worker_i);
2275       task->record_start_time();
2276       do {
2277         task->do_marking_step(1000000000.0 /* something very large */,
2278                               true /* do_stealing    */,
2279                               true /* do_termination */);
2280       } while (task->has_aborted() && !_cm->has_overflown());
2281       // If we overflow, then we do not want to restart. We instead
2282       // want to abort remark and do concurrent marking again.
2283       task->record_end_time();
2284     }
2285   }
2286 
2287   CMRemarkTask(ConcurrentMark* cm) :
2288     AbstractGangTask("Par Remark"), _cm(cm) { }
2289 };
2290 
2291 void ConcurrentMark::checkpointRootsFinalWork() {
2292   ResourceMark rm;
2293   HandleMark   hm;
2294   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2295 
2296   g1h->ensure_parsability(false);
2297 
2298   if (G1CollectedHeap::use_parallel_gc_threads()) {
2299     G1CollectedHeap::StrongRootsScope srs(g1h);
2300     // this is remark, so we'll use up all available threads
2301     int active_workers = ParallelGCThreads;
2302     set_phase(active_workers, false /* concurrent */);
2303 
2304     CMRemarkTask remarkTask(this);
2305     // We will start all available threads, even if we decide that the
2306     // active_workers will be fewer. The extra ones will just bail out
2307     // immediately.
2308     int n_workers = g1h->workers()->total_workers();
2309     g1h->set_par_threads(n_workers);
2310     g1h->workers()->run_task(&remarkTask);
2311     g1h->set_par_threads(0);
2312   } else {
2313     G1CollectedHeap::StrongRootsScope srs(g1h);
2314     // this is remark, so we'll use up all available threads
2315     int active_workers = 1;
2316     set_phase(active_workers, false /* concurrent */);
2317 
2318     CMRemarkTask remarkTask(this);
2319     // We will start all available threads, even if we decide that the
2320     // active_workers will be fewer. The extra ones will just bail out
2321     // immediately.
2322     remarkTask.work(0);
2323   }
2324   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2325   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2326 
2327   print_stats();
2328 
2329 #if VERIFY_OBJS_PROCESSED
2330   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2331     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2332                            _scan_obj_cl.objs_processed,
2333                            ThreadLocalObjQueue::objs_enqueued);
2334     guarantee(_scan_obj_cl.objs_processed ==
2335               ThreadLocalObjQueue::objs_enqueued,
2336               "Different number of objs processed and enqueued.");
2337   }
2338 #endif
2339 }
2340 
2341 #ifndef PRODUCT
2342 
2343 class PrintReachableOopClosure: public OopClosure {
2344 private:
2345   G1CollectedHeap* _g1h;
2346   CMBitMapRO*      _bitmap;
2347   outputStream*    _out;
2348   bool             _use_prev_marking;
2349   bool             _all;
2350 
2351 public:
2352   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2353                            outputStream* out,
2354                            bool          use_prev_marking,
2355                            bool          all) :
2356     _g1h(G1CollectedHeap::heap()),
2357     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2358 
2359   void do_oop(narrowOop* p) { do_oop_work(p); }
2360   void do_oop(      oop* p) { do_oop_work(p); }
2361 
2362   template <class T> void do_oop_work(T* p) {
2363     oop         obj = oopDesc::load_decode_heap_oop(p);
2364     const char* str = NULL;
2365     const char* str2 = "";
2366 
2367     if (obj == NULL) {
2368       str = "";
2369     } else if (!_g1h->is_in_g1_reserved(obj)) {
2370       str = " O";
2371     } else {
2372       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2373       guarantee(hr != NULL, "invariant");
2374       bool over_tams = false;
2375       if (_use_prev_marking) {
2376         over_tams = hr->obj_allocated_since_prev_marking(obj);
2377       } else {
2378         over_tams = hr->obj_allocated_since_next_marking(obj);
2379       }
2380       bool marked = _bitmap->isMarked((HeapWord*) obj);
2381 
2382       if (over_tams) {
2383         str = " >";
2384         if (marked) {
2385           str2 = " AND MARKED";
2386         }
2387       } else if (marked) {
2388         str = " M";
2389       } else {
2390         str = " NOT";
2391       }
2392     }
2393 
2394     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2395                    p, (void*) obj, str, str2);
2396   }
2397 };
2398 
2399 class PrintReachableObjectClosure : public ObjectClosure {
2400 private:
2401   CMBitMapRO*   _bitmap;
2402   outputStream* _out;
2403   bool          _use_prev_marking;
2404   bool          _all;
2405   HeapRegion*   _hr;
2406 
2407 public:
2408   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2409                               outputStream* out,
2410                               bool          use_prev_marking,
2411                               bool          all,
2412                               HeapRegion*   hr) :
2413     _bitmap(bitmap), _out(out),
2414     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2415 
2416   void do_object(oop o) {
2417     bool over_tams;
2418     if (_use_prev_marking) {
2419       over_tams = _hr->obj_allocated_since_prev_marking(o);
2420     } else {
2421       over_tams = _hr->obj_allocated_since_next_marking(o);
2422     }
2423     bool marked = _bitmap->isMarked((HeapWord*) o);
2424     bool print_it = _all || over_tams || marked;
2425 
2426     if (print_it) {
2427       _out->print_cr(" "PTR_FORMAT"%s",
2428                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2429       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2430       o->oop_iterate(&oopCl);
2431     }
2432   }
2433 };
2434 
2435 class PrintReachableRegionClosure : public HeapRegionClosure {
2436 private:
2437   CMBitMapRO*   _bitmap;
2438   outputStream* _out;
2439   bool          _use_prev_marking;
2440   bool          _all;
2441 
2442 public:
2443   bool doHeapRegion(HeapRegion* hr) {
2444     HeapWord* b = hr->bottom();
2445     HeapWord* e = hr->end();
2446     HeapWord* t = hr->top();
2447     HeapWord* p = NULL;
2448     if (_use_prev_marking) {
2449       p = hr->prev_top_at_mark_start();
2450     } else {
2451       p = hr->next_top_at_mark_start();
2452     }
2453     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2454                    "TAMS: "PTR_FORMAT, b, e, t, p);
2455     _out->cr();
2456 
2457     HeapWord* from = b;
2458     HeapWord* to   = t;
2459 
2460     if (to > from) {
2461       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2462       _out->cr();
2463       PrintReachableObjectClosure ocl(_bitmap, _out,
2464                                       _use_prev_marking, _all, hr);
2465       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2466       _out->cr();
2467     }
2468 
2469     return false;
2470   }
2471 
2472   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2473                               outputStream* out,
2474                               bool          use_prev_marking,
2475                               bool          all) :
2476     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2477 };
2478 
2479 void ConcurrentMark::print_reachable(const char* str,
2480                                      bool use_prev_marking,
2481                                      bool all) {
2482   gclog_or_tty->cr();
2483   gclog_or_tty->print_cr("== Doing heap dump... ");
2484 
2485   if (G1PrintReachableBaseFile == NULL) {
2486     gclog_or_tty->print_cr("  #### error: no base file defined");
2487     return;
2488   }
2489 
2490   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2491       (JVM_MAXPATHLEN - 1)) {
2492     gclog_or_tty->print_cr("  #### error: file name too long");
2493     return;
2494   }
2495 
2496   char file_name[JVM_MAXPATHLEN];
2497   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2498   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2499 
2500   fileStream fout(file_name);
2501   if (!fout.is_open()) {
2502     gclog_or_tty->print_cr("  #### error: could not open file");
2503     return;
2504   }
2505 
2506   outputStream* out = &fout;
2507 
2508   CMBitMapRO* bitmap = NULL;
2509   if (use_prev_marking) {
2510     bitmap = _prevMarkBitMap;
2511   } else {
2512     bitmap = _nextMarkBitMap;
2513   }
2514 
2515   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2516   out->cr();
2517 
2518   out->print_cr("--- ITERATING OVER REGIONS");
2519   out->cr();
2520   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2521   _g1h->heap_region_iterate(&rcl);
2522   out->cr();
2523 
2524   gclog_or_tty->print_cr("  done");
2525   gclog_or_tty->flush();
2526 }
2527 
2528 #endif // PRODUCT
2529 
2530 // This note is for drainAllSATBBuffers and the code in between.
2531 // In the future we could reuse a task to do this work during an
2532 // evacuation pause (since now tasks are not active and can be claimed
2533 // during an evacuation pause). This was a late change to the code and
2534 // is currently not being taken advantage of.
2535 
2536 class CMGlobalObjectClosure : public ObjectClosure {
2537 private:
2538   ConcurrentMark* _cm;
2539 
2540 public:
2541   void do_object(oop obj) {
2542     _cm->deal_with_reference(obj);
2543   }
2544 
2545   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2546 };
2547 
2548 void ConcurrentMark::deal_with_reference(oop obj) {
2549   if (verbose_high())
2550     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2551                            (void*) obj);
2552 
2553 
2554   HeapWord* objAddr = (HeapWord*) obj;
2555   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2556   if (_g1h->is_in_g1_reserved(objAddr)) {
2557     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2558     HeapRegion* hr = _g1h->heap_region_containing(obj);
2559     if (_g1h->is_obj_ill(obj, hr)) {
2560       if (verbose_high())
2561         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2562                                "marked", (void*) obj);
2563 
2564       // we need to mark it first
2565       if (_nextMarkBitMap->parMark(objAddr)) {
2566         // No OrderAccess:store_load() is needed. It is implicit in the
2567         // CAS done in parMark(objAddr) above
2568         HeapWord* finger = _finger;
2569         if (objAddr < finger) {
2570           if (verbose_high())
2571             gclog_or_tty->print_cr("[global] below the global finger "
2572                                    "("PTR_FORMAT"), pushing it", finger);
2573           if (!mark_stack_push(obj)) {
2574             if (verbose_low())
2575               gclog_or_tty->print_cr("[global] global stack overflow during "
2576                                      "deal_with_reference");
2577           }
2578         }
2579       }
2580     }
2581   }
2582 }
2583 
2584 void ConcurrentMark::drainAllSATBBuffers() {
2585   CMGlobalObjectClosure oc(this);
2586   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2587   satb_mq_set.set_closure(&oc);
2588 
2589   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2590     if (verbose_medium())
2591       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2592   }
2593 
2594   // no need to check whether we should do this, as this is only
2595   // called during an evacuation pause
2596   satb_mq_set.iterate_closure_all_threads();
2597 
2598   satb_mq_set.set_closure(NULL);
2599   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2600 }
2601 
2602 void ConcurrentMark::markPrev(oop p) {
2603   // Note we are overriding the read-only view of the prev map here, via
2604   // the cast.
2605   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2606 }
2607 
2608 void ConcurrentMark::clear(oop p) {
2609   assert(p != NULL && p->is_oop(), "expected an oop");
2610   HeapWord* addr = (HeapWord*)p;
2611   assert(addr >= _nextMarkBitMap->startWord() ||
2612          addr < _nextMarkBitMap->endWord(), "in a region");
2613 
2614   _nextMarkBitMap->clear(addr);
2615 }
2616 
2617 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2618   // Note we are overriding the read-only view of the prev map here, via
2619   // the cast.
2620   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2621   _nextMarkBitMap->clearRange(mr);
2622 }
2623 
2624 HeapRegion*
2625 ConcurrentMark::claim_region(int task_num) {
2626   // "checkpoint" the finger
2627   HeapWord* finger = _finger;
2628 
2629   // _heap_end will not change underneath our feet; it only changes at
2630   // yield points.
2631   while (finger < _heap_end) {
2632     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2633 
2634     // is the gap between reading the finger and doing the CAS too long?
2635 
2636     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2637     HeapWord*   bottom        = curr_region->bottom();
2638     HeapWord*   end           = curr_region->end();
2639     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2640 
2641     if (verbose_low())
2642       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2643                              "["PTR_FORMAT", "PTR_FORMAT"), "
2644                              "limit = "PTR_FORMAT,
2645                              task_num, curr_region, bottom, end, limit);
2646 
2647     HeapWord* res =
2648       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2649     if (res == finger) {
2650       // we succeeded
2651 
2652       // notice that _finger == end cannot be guaranteed here since,
2653       // someone else might have moved the finger even further
2654       assert(_finger >= end, "the finger should have moved forward");
2655 
2656       if (verbose_low())
2657         gclog_or_tty->print_cr("[%d] we were successful with region = "
2658                                PTR_FORMAT, task_num, curr_region);
2659 
2660       if (limit > bottom) {
2661         if (verbose_low())
2662           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2663                                  "returning it ", task_num, curr_region);
2664         return curr_region;
2665       } else {
2666         assert(limit == bottom,
2667                "the region limit should be at bottom");
2668         if (verbose_low())
2669           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2670                                  "returning NULL", task_num, curr_region);
2671         // we return NULL and the caller should try calling
2672         // claim_region() again.
2673         return NULL;
2674       }
2675     } else {
2676       assert(_finger > finger, "the finger should have moved forward");
2677       if (verbose_low())
2678         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2679                                "global finger = "PTR_FORMAT", "
2680                                "our finger = "PTR_FORMAT,
2681                                task_num, _finger, finger);
2682 
2683       // read it again
2684       finger = _finger;
2685     }
2686   }
2687 
2688   return NULL;
2689 }
2690 
2691 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2692   bool result = false;
2693   for (int i = 0; i < (int)_max_task_num; ++i) {
2694     CMTask* the_task = _tasks[i];
2695     MemRegion mr = the_task->aborted_region();
2696     if (mr.start() != NULL) {
2697       assert(mr.end() != NULL, "invariant");
2698       assert(mr.word_size() > 0, "invariant");
2699       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2700       assert(hr != NULL, "invariant");
2701       if (hr->in_collection_set()) {
2702         // The region points into the collection set
2703         the_task->set_aborted_region(MemRegion());
2704         result = true;
2705       }
2706     }
2707   }
2708   return result;
2709 }
2710 
2711 bool ConcurrentMark::has_aborted_regions() {
2712   for (int i = 0; i < (int)_max_task_num; ++i) {
2713     CMTask* the_task = _tasks[i];
2714     MemRegion mr = the_task->aborted_region();
2715     if (mr.start() != NULL) {
2716       assert(mr.end() != NULL, "invariant");
2717       assert(mr.word_size() > 0, "invariant");
2718       return true;
2719     }
2720   }
2721   return false;
2722 }
2723 
2724 void ConcurrentMark::oops_do(OopClosure* cl) {
2725   if (_markStack.size() > 0 && verbose_low())
2726     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2727                            "size = %d", _markStack.size());
2728   // we first iterate over the contents of the mark stack...
2729   _markStack.oops_do(cl);
2730 
2731   for (int i = 0; i < (int)_max_task_num; ++i) {
2732     OopTaskQueue* queue = _task_queues->queue((int)i);
2733 
2734     if (queue->size() > 0 && verbose_low())
2735       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2736                              "size = %d", i, queue->size());
2737 
2738     // ...then over the contents of the all the task queues.
2739     queue->oops_do(cl);
2740   }
2741 
2742   // Invalidate any entries, that are in the region stack, that
2743   // point into the collection set
2744   if (_regionStack.invalidate_entries_into_cset()) {
2745     // otherwise, any gray objects copied during the evacuation pause
2746     // might not be visited.
2747     assert(_should_gray_objects, "invariant");
2748   }
2749 
2750   // Invalidate any aborted regions, recorded in the individual CM
2751   // tasks, that point into the collection set.
2752   if (invalidate_aborted_regions_in_cset()) {
2753     // otherwise, any gray objects copied during the evacuation pause
2754     // might not be visited.
2755     assert(_should_gray_objects, "invariant");
2756   }
2757 
2758 }
2759 
2760 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2761   _markStack.setEmpty();
2762   _markStack.clear_overflow();
2763   _regionStack.setEmpty();
2764   _regionStack.clear_overflow();
2765   if (clear_overflow) {
2766     clear_has_overflown();
2767   } else {
2768     assert(has_overflown(), "pre-condition");
2769   }
2770   _finger = _heap_start;
2771 
2772   for (int i = 0; i < (int)_max_task_num; ++i) {
2773     OopTaskQueue* queue = _task_queues->queue(i);
2774     queue->set_empty();
2775     // Clear any partial regions from the CMTasks
2776     _tasks[i]->clear_aborted_region();
2777   }
2778 }
2779 
2780 void ConcurrentMark::print_stats() {
2781   if (verbose_stats()) {
2782     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2783     for (size_t i = 0; i < _active_tasks; ++i) {
2784       _tasks[i]->print_stats();
2785       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2786     }
2787   }
2788 }
2789 
2790 class CSMarkOopClosure: public OopClosure {
2791   friend class CSMarkBitMapClosure;
2792 
2793   G1CollectedHeap* _g1h;
2794   CMBitMap*        _bm;
2795   ConcurrentMark*  _cm;
2796   oop*             _ms;
2797   jint*            _array_ind_stack;
2798   int              _ms_size;
2799   int              _ms_ind;
2800   int              _array_increment;
2801 
2802   bool push(oop obj, int arr_ind = 0) {
2803     if (_ms_ind == _ms_size) {
2804       gclog_or_tty->print_cr("Mark stack is full.");
2805       return false;
2806     }
2807     _ms[_ms_ind] = obj;
2808     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2809     _ms_ind++;
2810     return true;
2811   }
2812 
2813   oop pop() {
2814     if (_ms_ind == 0) return NULL;
2815     else {
2816       _ms_ind--;
2817       return _ms[_ms_ind];
2818     }
2819   }
2820 
2821   template <class T> bool drain() {
2822     while (_ms_ind > 0) {
2823       oop obj = pop();
2824       assert(obj != NULL, "Since index was non-zero.");
2825       if (obj->is_objArray()) {
2826         jint arr_ind = _array_ind_stack[_ms_ind];
2827         objArrayOop aobj = objArrayOop(obj);
2828         jint len = aobj->length();
2829         jint next_arr_ind = arr_ind + _array_increment;
2830         if (next_arr_ind < len) {
2831           push(obj, next_arr_ind);
2832         }
2833         // Now process this portion of this one.
2834         int lim = MIN2(next_arr_ind, len);
2835         for (int j = arr_ind; j < lim; j++) {
2836           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2837         }
2838 
2839       } else {
2840         obj->oop_iterate(this);
2841       }
2842       if (abort()) return false;
2843     }
2844     return true;
2845   }
2846 
2847 public:
2848   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2849     _g1h(G1CollectedHeap::heap()),
2850     _cm(cm),
2851     _bm(cm->nextMarkBitMap()),
2852     _ms_size(ms_size), _ms_ind(0),
2853     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2854     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2855     _array_increment(MAX2(ms_size/8, 16))
2856   {}
2857 
2858   ~CSMarkOopClosure() {
2859     FREE_C_HEAP_ARRAY(oop, _ms);
2860     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2861   }
2862 
2863   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2864   virtual void do_oop(      oop* p) { do_oop_work(p); }
2865 
2866   template <class T> void do_oop_work(T* p) {
2867     T heap_oop = oopDesc::load_heap_oop(p);
2868     if (oopDesc::is_null(heap_oop)) return;
2869     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2870     if (obj->is_forwarded()) {
2871       // If the object has already been forwarded, we have to make sure
2872       // that it's marked.  So follow the forwarding pointer.  Note that
2873       // this does the right thing for self-forwarding pointers in the
2874       // evacuation failure case.
2875       obj = obj->forwardee();
2876     }
2877     HeapRegion* hr = _g1h->heap_region_containing(obj);
2878     if (hr != NULL) {
2879       if (hr->in_collection_set()) {
2880         if (_g1h->is_obj_ill(obj)) {
2881           _bm->mark((HeapWord*)obj);
2882           if (!push(obj)) {
2883             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2884             set_abort();
2885           }
2886         }
2887       } else {
2888         // Outside the collection set; we need to gray it
2889         _cm->deal_with_reference(obj);
2890       }
2891     }
2892   }
2893 };
2894 
2895 class CSMarkBitMapClosure: public BitMapClosure {
2896   G1CollectedHeap* _g1h;
2897   CMBitMap*        _bitMap;
2898   ConcurrentMark*  _cm;
2899   CSMarkOopClosure _oop_cl;
2900 public:
2901   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2902     _g1h(G1CollectedHeap::heap()),
2903     _bitMap(cm->nextMarkBitMap()),
2904     _oop_cl(cm, ms_size)
2905   {}
2906 
2907   ~CSMarkBitMapClosure() {}
2908 
2909   bool do_bit(size_t offset) {
2910     // convert offset into a HeapWord*
2911     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2912     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2913            "address out of range");
2914     assert(_bitMap->isMarked(addr), "tautology");
2915     oop obj = oop(addr);
2916     if (!obj->is_forwarded()) {
2917       if (!_oop_cl.push(obj)) return false;
2918       if (UseCompressedOops) {
2919         if (!_oop_cl.drain<narrowOop>()) return false;
2920       } else {
2921         if (!_oop_cl.drain<oop>()) return false;
2922       }
2923     }
2924     // Otherwise...
2925     return true;
2926   }
2927 };
2928 
2929 
2930 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2931   CMBitMap* _bm;
2932   CSMarkBitMapClosure _bit_cl;
2933   enum SomePrivateConstants {
2934     MSSize = 1000
2935   };
2936   bool _completed;
2937 public:
2938   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2939     _bm(cm->nextMarkBitMap()),
2940     _bit_cl(cm, MSSize),
2941     _completed(true)
2942   {}
2943 
2944   ~CompleteMarkingInCSHRClosure() {}
2945 
2946   bool doHeapRegion(HeapRegion* r) {
2947     if (!r->evacuation_failed()) {
2948       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2949       if (!mr.is_empty()) {
2950         if (!_bm->iterate(&_bit_cl, mr)) {
2951           _completed = false;
2952           return true;
2953         }
2954       }
2955     }
2956     return false;
2957   }
2958 
2959   bool completed() { return _completed; }
2960 };
2961 
2962 class ClearMarksInHRClosure: public HeapRegionClosure {
2963   CMBitMap* _bm;
2964 public:
2965   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2966 
2967   bool doHeapRegion(HeapRegion* r) {
2968     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2969       MemRegion usedMR = r->used_region();
2970       _bm->clearRange(r->used_region());
2971     }
2972     return false;
2973   }
2974 };
2975 
2976 void ConcurrentMark::complete_marking_in_collection_set() {
2977   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2978 
2979   if (!g1h->mark_in_progress()) {
2980     g1h->g1_policy()->record_mark_closure_time(0.0);
2981     return;
2982   }
2983 
2984   int i = 1;
2985   double start = os::elapsedTime();
2986   while (true) {
2987     i++;
2988     CompleteMarkingInCSHRClosure cmplt(this);
2989     g1h->collection_set_iterate(&cmplt);
2990     if (cmplt.completed()) break;
2991   }
2992   double end_time = os::elapsedTime();
2993   double elapsed_time_ms = (end_time - start) * 1000.0;
2994   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2995 
2996   ClearMarksInHRClosure clr(nextMarkBitMap());
2997   g1h->collection_set_iterate(&clr);
2998 }
2999 
3000 // The next two methods deal with the following optimisation. Some
3001 // objects are gray by being marked and located above the finger. If
3002 // they are copied, during an evacuation pause, below the finger then
3003 // the need to be pushed on the stack. The observation is that, if
3004 // there are no regions in the collection set located above the
3005 // finger, then the above cannot happen, hence we do not need to
3006 // explicitly gray any objects when copying them to below the
3007 // finger. The global stack will be scanned to ensure that, if it
3008 // points to objects being copied, it will update their
3009 // location. There is a tricky situation with the gray objects in
3010 // region stack that are being coped, however. See the comment in
3011 // newCSet().
3012 
3013 void ConcurrentMark::newCSet() {
3014   if (!concurrent_marking_in_progress())
3015     // nothing to do if marking is not in progress
3016     return;
3017 
3018   // find what the lowest finger is among the global and local fingers
3019   _min_finger = _finger;
3020   for (int i = 0; i < (int)_max_task_num; ++i) {
3021     CMTask* task = _tasks[i];
3022     HeapWord* task_finger = task->finger();
3023     if (task_finger != NULL && task_finger < _min_finger)
3024       _min_finger = task_finger;
3025   }
3026 
3027   _should_gray_objects = false;
3028 
3029   // This fixes a very subtle and fustrating bug. It might be the case
3030   // that, during en evacuation pause, heap regions that contain
3031   // objects that are gray (by being in regions contained in the
3032   // region stack) are included in the collection set. Since such gray
3033   // objects will be moved, and because it's not easy to redirect
3034   // region stack entries to point to a new location (because objects
3035   // in one region might be scattered to multiple regions after they
3036   // are copied), one option is to ensure that all marked objects
3037   // copied during a pause are pushed on the stack. Notice, however,
3038   // that this problem can only happen when the region stack is not
3039   // empty during an evacuation pause. So, we make the fix a bit less
3040   // conservative and ensure that regions are pushed on the stack,
3041   // irrespective whether all collection set regions are below the
3042   // finger, if the region stack is not empty. This is expected to be
3043   // a rare case, so I don't think it's necessary to be smarted about it.
3044   if (!region_stack_empty() || has_aborted_regions())
3045     _should_gray_objects = true;
3046 }
3047 
3048 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3049   if (!concurrent_marking_in_progress())
3050     return;
3051 
3052   HeapWord* region_end = hr->end();
3053   if (region_end > _min_finger)
3054     _should_gray_objects = true;
3055 }
3056 
3057 // Resets the region fields of active CMTasks whose values point
3058 // into the collection set.
3059 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3060   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3061   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3062 
3063   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3064     CMTask* task = _tasks[i];
3065     HeapWord* task_finger = task->finger();
3066     if (task_finger != NULL) {
3067       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3068       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3069       if (finger_region->in_collection_set()) {
3070         // The task's current region is in the collection set.
3071         // This region will be evacuated in the current GC and
3072         // the region fields in the task will be stale.
3073         task->giveup_current_region();
3074       }
3075     }
3076   }
3077 }
3078 
3079 // abandon current marking iteration due to a Full GC
3080 void ConcurrentMark::abort() {
3081   // Clear all marks to force marking thread to do nothing
3082   _nextMarkBitMap->clearAll();
3083   // Empty mark stack
3084   clear_marking_state();
3085   for (int i = 0; i < (int)_max_task_num; ++i) {
3086     _tasks[i]->clear_region_fields();
3087   }
3088   _has_aborted = true;
3089 
3090   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3091   satb_mq_set.abandon_partial_marking();
3092   // This can be called either during or outside marking, we'll read
3093   // the expected_active value from the SATB queue set.
3094   satb_mq_set.set_active_all_threads(
3095                                  false, /* new active value */
3096                                  satb_mq_set.is_active() /* expected_active */);
3097 }
3098 
3099 static void print_ms_time_info(const char* prefix, const char* name,
3100                                NumberSeq& ns) {
3101   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3102                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3103   if (ns.num() > 0) {
3104     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3105                            prefix, ns.sd(), ns.maximum());
3106   }
3107 }
3108 
3109 void ConcurrentMark::print_summary_info() {
3110   gclog_or_tty->print_cr(" Concurrent marking:");
3111   print_ms_time_info("  ", "init marks", _init_times);
3112   print_ms_time_info("  ", "remarks", _remark_times);
3113   {
3114     print_ms_time_info("     ", "final marks", _remark_mark_times);
3115     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3116 
3117   }
3118   print_ms_time_info("  ", "cleanups", _cleanup_times);
3119   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3120                          _total_counting_time,
3121                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3122                           (double)_cleanup_times.num()
3123                          : 0.0));
3124   if (G1ScrubRemSets) {
3125     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3126                            _total_rs_scrub_time,
3127                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3128                             (double)_cleanup_times.num()
3129                            : 0.0));
3130   }
3131   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3132                          (_init_times.sum() + _remark_times.sum() +
3133                           _cleanup_times.sum())/1000.0);
3134   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3135                 "(%8.2f s marking, %8.2f s counting).",
3136                 cmThread()->vtime_accum(),
3137                 cmThread()->vtime_mark_accum(),
3138                 cmThread()->vtime_count_accum());
3139 }
3140 
3141 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3142   _parallel_workers->print_worker_threads_on(st);
3143 }
3144 
3145 // Closures
3146 // XXX: there seems to be a lot of code  duplication here;
3147 // should refactor and consolidate the shared code.
3148 
3149 // This closure is used to mark refs into the CMS generation in
3150 // the CMS bit map. Called at the first checkpoint.
3151 
3152 // We take a break if someone is trying to stop the world.
3153 bool ConcurrentMark::do_yield_check(int worker_i) {
3154   if (should_yield()) {
3155     if (worker_i == 0)
3156       _g1h->g1_policy()->record_concurrent_pause();
3157     cmThread()->yield();
3158     if (worker_i == 0)
3159       _g1h->g1_policy()->record_concurrent_pause_end();
3160     return true;
3161   } else {
3162     return false;
3163   }
3164 }
3165 
3166 bool ConcurrentMark::should_yield() {
3167   return cmThread()->should_yield();
3168 }
3169 
3170 bool ConcurrentMark::containing_card_is_marked(void* p) {
3171   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3172   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3173 }
3174 
3175 bool ConcurrentMark::containing_cards_are_marked(void* start,
3176                                                  void* last) {
3177   return
3178     containing_card_is_marked(start) &&
3179     containing_card_is_marked(last);
3180 }
3181 
3182 #ifndef PRODUCT
3183 // for debugging purposes
3184 void ConcurrentMark::print_finger() {
3185   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3186                          _heap_start, _heap_end, _finger);
3187   for (int i = 0; i < (int) _max_task_num; ++i) {
3188     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3189   }
3190   gclog_or_tty->print_cr("");
3191 }
3192 #endif
3193 
3194 // Closure for iteration over bitmaps
3195 class CMBitMapClosure : public BitMapClosure {
3196 private:
3197   // the bitmap that is being iterated over
3198   CMBitMap*                   _nextMarkBitMap;
3199   ConcurrentMark*             _cm;
3200   CMTask*                     _task;
3201   // true if we're scanning a heap region claimed by the task (so that
3202   // we move the finger along), false if we're not, i.e. currently when
3203   // scanning a heap region popped from the region stack (so that we
3204   // do not move the task finger along; it'd be a mistake if we did so).
3205   bool                        _scanning_heap_region;
3206 
3207 public:
3208   CMBitMapClosure(CMTask *task,
3209                   ConcurrentMark* cm,
3210                   CMBitMap* nextMarkBitMap)
3211     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3212 
3213   void set_scanning_heap_region(bool scanning_heap_region) {
3214     _scanning_heap_region = scanning_heap_region;
3215   }
3216 
3217   bool do_bit(size_t offset) {
3218     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3219     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3220     assert( addr < _cm->finger(), "invariant");
3221 
3222     if (_scanning_heap_region) {
3223       statsOnly( _task->increase_objs_found_on_bitmap() );
3224       assert(addr >= _task->finger(), "invariant");
3225       // We move that task's local finger along.
3226       _task->move_finger_to(addr);
3227     } else {
3228       // We move the task's region finger along.
3229       _task->move_region_finger_to(addr);
3230     }
3231 
3232     _task->scan_object(oop(addr));
3233     // we only partially drain the local queue and global stack
3234     _task->drain_local_queue(true);
3235     _task->drain_global_stack(true);
3236 
3237     // if the has_aborted flag has been raised, we need to bail out of
3238     // the iteration
3239     return !_task->has_aborted();
3240   }
3241 };
3242 
3243 // Closure for iterating over objects, currently only used for
3244 // processing SATB buffers.
3245 class CMObjectClosure : public ObjectClosure {
3246 private:
3247   CMTask* _task;
3248 
3249 public:
3250   void do_object(oop obj) {
3251     _task->deal_with_reference(obj);
3252   }
3253 
3254   CMObjectClosure(CMTask* task) : _task(task) { }
3255 };
3256 
3257 // Closure for iterating over object fields
3258 class CMOopClosure : public OopClosure {
3259 private:
3260   G1CollectedHeap*   _g1h;
3261   ConcurrentMark*    _cm;
3262   CMTask*            _task;
3263 
3264 public:
3265   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3266   virtual void do_oop(      oop* p) { do_oop_work(p); }
3267 
3268   template <class T> void do_oop_work(T* p) {
3269     assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
3270     assert(!_g1h->is_on_master_free_list(
3271                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
3272 
3273     oop obj = oopDesc::load_decode_heap_oop(p);
3274     if (_cm->verbose_high())
3275       gclog_or_tty->print_cr("[%d] we're looking at location "
3276                              "*"PTR_FORMAT" = "PTR_FORMAT,
3277                              _task->task_id(), p, (void*) obj);
3278     _task->deal_with_reference(obj);
3279   }
3280 
3281   CMOopClosure(G1CollectedHeap* g1h,
3282                ConcurrentMark* cm,
3283                CMTask* task)
3284     : _g1h(g1h), _cm(cm), _task(task)
3285   {
3286     assert(_ref_processor == NULL, "should be initialized to NULL");
3287 
3288     if (G1UseConcMarkReferenceProcessing) {
3289       _ref_processor = g1h->ref_processor();
3290       assert(_ref_processor != NULL, "should not be NULL");
3291     }
3292   }
3293 };
3294 
3295 void CMTask::setup_for_region(HeapRegion* hr) {
3296   // Separated the asserts so that we know which one fires.
3297   assert(hr != NULL,
3298         "claim_region() should have filtered out continues humongous regions");
3299   assert(!hr->continuesHumongous(),
3300         "claim_region() should have filtered out continues humongous regions");
3301 
3302   if (_cm->verbose_low())
3303     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3304                            _task_id, hr);
3305 
3306   _curr_region  = hr;
3307   _finger       = hr->bottom();
3308   update_region_limit();
3309 }
3310 
3311 void CMTask::update_region_limit() {
3312   HeapRegion* hr            = _curr_region;
3313   HeapWord* bottom          = hr->bottom();
3314   HeapWord* limit           = hr->next_top_at_mark_start();
3315 
3316   if (limit == bottom) {
3317     if (_cm->verbose_low())
3318       gclog_or_tty->print_cr("[%d] found an empty region "
3319                              "["PTR_FORMAT", "PTR_FORMAT")",
3320                              _task_id, bottom, limit);
3321     // The region was collected underneath our feet.
3322     // We set the finger to bottom to ensure that the bitmap
3323     // iteration that will follow this will not do anything.
3324     // (this is not a condition that holds when we set the region up,
3325     // as the region is not supposed to be empty in the first place)
3326     _finger = bottom;
3327   } else if (limit >= _region_limit) {
3328     assert(limit >= _finger, "peace of mind");
3329   } else {
3330     assert(limit < _region_limit, "only way to get here");
3331     // This can happen under some pretty unusual circumstances.  An
3332     // evacuation pause empties the region underneath our feet (NTAMS
3333     // at bottom). We then do some allocation in the region (NTAMS
3334     // stays at bottom), followed by the region being used as a GC
3335     // alloc region (NTAMS will move to top() and the objects
3336     // originally below it will be grayed). All objects now marked in
3337     // the region are explicitly grayed, if below the global finger,
3338     // and we do not need in fact to scan anything else. So, we simply
3339     // set _finger to be limit to ensure that the bitmap iteration
3340     // doesn't do anything.
3341     _finger = limit;
3342   }
3343 
3344   _region_limit = limit;
3345 }
3346 
3347 void CMTask::giveup_current_region() {
3348   assert(_curr_region != NULL, "invariant");
3349   if (_cm->verbose_low())
3350     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3351                            _task_id, _curr_region);
3352   clear_region_fields();
3353 }
3354 
3355 void CMTask::clear_region_fields() {
3356   // Values for these three fields that indicate that we're not
3357   // holding on to a region.
3358   _curr_region   = NULL;
3359   _finger        = NULL;
3360   _region_limit  = NULL;
3361 
3362   _region_finger = NULL;
3363 }
3364 
3365 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3366   guarantee(nextMarkBitMap != NULL, "invariant");
3367 
3368   if (_cm->verbose_low())
3369     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3370 
3371   _nextMarkBitMap                = nextMarkBitMap;
3372   clear_region_fields();
3373   assert(_aborted_region.is_empty(), "should have been cleared");
3374 
3375   _calls                         = 0;
3376   _elapsed_time_ms               = 0.0;
3377   _termination_time_ms           = 0.0;
3378   _termination_start_time_ms     = 0.0;
3379 
3380 #if _MARKING_STATS_
3381   _local_pushes                  = 0;
3382   _local_pops                    = 0;
3383   _local_max_size                = 0;
3384   _objs_scanned                  = 0;
3385   _global_pushes                 = 0;
3386   _global_pops                   = 0;
3387   _global_max_size               = 0;
3388   _global_transfers_to           = 0;
3389   _global_transfers_from         = 0;
3390   _region_stack_pops             = 0;
3391   _regions_claimed               = 0;
3392   _objs_found_on_bitmap          = 0;
3393   _satb_buffers_processed        = 0;
3394   _steal_attempts                = 0;
3395   _steals                        = 0;
3396   _aborted                       = 0;
3397   _aborted_overflow              = 0;
3398   _aborted_cm_aborted            = 0;
3399   _aborted_yield                 = 0;
3400   _aborted_timed_out             = 0;
3401   _aborted_satb                  = 0;
3402   _aborted_termination           = 0;
3403 #endif // _MARKING_STATS_
3404 }
3405 
3406 bool CMTask::should_exit_termination() {
3407   regular_clock_call();
3408   // This is called when we are in the termination protocol. We should
3409   // quit if, for some reason, this task wants to abort or the global
3410   // stack is not empty (this means that we can get work from it).
3411   return !_cm->mark_stack_empty() || has_aborted();
3412 }
3413 
3414 // This determines whether the method below will check both the local
3415 // and global fingers when determining whether to push on the stack a
3416 // gray object (value 1) or whether it will only check the global one
3417 // (value 0). The tradeoffs are that the former will be a bit more
3418 // accurate and possibly push less on the stack, but it might also be
3419 // a little bit slower.
3420 
3421 #define _CHECK_BOTH_FINGERS_      1
3422 
3423 void CMTask::deal_with_reference(oop obj) {
3424   if (_cm->verbose_high())
3425     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3426                            _task_id, (void*) obj);
3427 
3428   ++_refs_reached;
3429 
3430   HeapWord* objAddr = (HeapWord*) obj;
3431   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3432   if (_g1h->is_in_g1_reserved(objAddr)) {
3433     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3434     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3435     if (_g1h->is_obj_ill(obj, hr)) {
3436       if (_cm->verbose_high())
3437         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3438                                _task_id, (void*) obj);
3439 
3440       // we need to mark it first
3441       if (_nextMarkBitMap->parMark(objAddr)) {
3442         // No OrderAccess:store_load() is needed. It is implicit in the
3443         // CAS done in parMark(objAddr) above
3444         HeapWord* global_finger = _cm->finger();
3445 
3446 #if _CHECK_BOTH_FINGERS_
3447         // we will check both the local and global fingers
3448 
3449         if (_finger != NULL && objAddr < _finger) {
3450           if (_cm->verbose_high())
3451             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3452                                    "pushing it", _task_id, _finger);
3453           push(obj);
3454         } else if (_curr_region != NULL && objAddr < _region_limit) {
3455           // do nothing
3456         } else if (objAddr < global_finger) {
3457           // Notice that the global finger might be moving forward
3458           // concurrently. This is not a problem. In the worst case, we
3459           // mark the object while it is above the global finger and, by
3460           // the time we read the global finger, it has moved forward
3461           // passed this object. In this case, the object will probably
3462           // be visited when a task is scanning the region and will also
3463           // be pushed on the stack. So, some duplicate work, but no
3464           // correctness problems.
3465 
3466           if (_cm->verbose_high())
3467             gclog_or_tty->print_cr("[%d] below the global finger "
3468                                    "("PTR_FORMAT"), pushing it",
3469                                    _task_id, global_finger);
3470           push(obj);
3471         } else {
3472           // do nothing
3473         }
3474 #else // _CHECK_BOTH_FINGERS_
3475         // we will only check the global finger
3476 
3477         if (objAddr < global_finger) {
3478           // see long comment above
3479 
3480           if (_cm->verbose_high())
3481             gclog_or_tty->print_cr("[%d] below the global finger "
3482                                    "("PTR_FORMAT"), pushing it",
3483                                    _task_id, global_finger);
3484           push(obj);
3485         }
3486 #endif // _CHECK_BOTH_FINGERS_
3487       }
3488     }
3489   }
3490 }
3491 
3492 void CMTask::push(oop obj) {
3493   HeapWord* objAddr = (HeapWord*) obj;
3494   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3495   assert(!_g1h->is_on_master_free_list(
3496               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3497   assert(!_g1h->is_obj_ill(obj), "invariant");
3498   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3499 
3500   if (_cm->verbose_high())
3501     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3502 
3503   if (!_task_queue->push(obj)) {
3504     // The local task queue looks full. We need to push some entries
3505     // to the global stack.
3506 
3507     if (_cm->verbose_medium())
3508       gclog_or_tty->print_cr("[%d] task queue overflow, "
3509                              "moving entries to the global stack",
3510                              _task_id);
3511     move_entries_to_global_stack();
3512 
3513     // this should succeed since, even if we overflow the global
3514     // stack, we should have definitely removed some entries from the
3515     // local queue. So, there must be space on it.
3516     bool success = _task_queue->push(obj);
3517     assert(success, "invariant");
3518   }
3519 
3520   statsOnly( int tmp_size = _task_queue->size();
3521              if (tmp_size > _local_max_size)
3522                _local_max_size = tmp_size;
3523              ++_local_pushes );
3524 }
3525 
3526 void CMTask::reached_limit() {
3527   assert(_words_scanned >= _words_scanned_limit ||
3528          _refs_reached >= _refs_reached_limit ,
3529          "shouldn't have been called otherwise");
3530   regular_clock_call();
3531 }
3532 
3533 void CMTask::regular_clock_call() {
3534   if (has_aborted())
3535     return;
3536 
3537   // First, we need to recalculate the words scanned and refs reached
3538   // limits for the next clock call.
3539   recalculate_limits();
3540 
3541   // During the regular clock call we do the following
3542 
3543   // (1) If an overflow has been flagged, then we abort.
3544   if (_cm->has_overflown()) {
3545     set_has_aborted();
3546     return;
3547   }
3548 
3549   // If we are not concurrent (i.e. we're doing remark) we don't need
3550   // to check anything else. The other steps are only needed during
3551   // the concurrent marking phase.
3552   if (!concurrent())
3553     return;
3554 
3555   // (2) If marking has been aborted for Full GC, then we also abort.
3556   if (_cm->has_aborted()) {
3557     set_has_aborted();
3558     statsOnly( ++_aborted_cm_aborted );
3559     return;
3560   }
3561 
3562   double curr_time_ms = os::elapsedVTime() * 1000.0;
3563 
3564   // (3) If marking stats are enabled, then we update the step history.
3565 #if _MARKING_STATS_
3566   if (_words_scanned >= _words_scanned_limit)
3567     ++_clock_due_to_scanning;
3568   if (_refs_reached >= _refs_reached_limit)
3569     ++_clock_due_to_marking;
3570 
3571   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3572   _interval_start_time_ms = curr_time_ms;
3573   _all_clock_intervals_ms.add(last_interval_ms);
3574 
3575   if (_cm->verbose_medium()) {
3576     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3577                            "scanned = %d%s, refs reached = %d%s",
3578                            _task_id, last_interval_ms,
3579                            _words_scanned,
3580                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3581                            _refs_reached,
3582                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3583   }
3584 #endif // _MARKING_STATS_
3585 
3586   // (4) We check whether we should yield. If we have to, then we abort.
3587   if (_cm->should_yield()) {
3588     // We should yield. To do this we abort the task. The caller is
3589     // responsible for yielding.
3590     set_has_aborted();
3591     statsOnly( ++_aborted_yield );
3592     return;
3593   }
3594 
3595   // (5) We check whether we've reached our time quota. If we have,
3596   // then we abort.
3597   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3598   if (elapsed_time_ms > _time_target_ms) {
3599     set_has_aborted();
3600     _has_timed_out = true;
3601     statsOnly( ++_aborted_timed_out );
3602     return;
3603   }
3604 
3605   // (6) Finally, we check whether there are enough completed STAB
3606   // buffers available for processing. If there are, we abort.
3607   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3608   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3609     if (_cm->verbose_low())
3610       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3611                              _task_id);
3612     // we do need to process SATB buffers, we'll abort and restart
3613     // the marking task to do so
3614     set_has_aborted();
3615     statsOnly( ++_aborted_satb );
3616     return;
3617   }
3618 }
3619 
3620 void CMTask::recalculate_limits() {
3621   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3622   _words_scanned_limit      = _real_words_scanned_limit;
3623 
3624   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3625   _refs_reached_limit       = _real_refs_reached_limit;
3626 }
3627 
3628 void CMTask::decrease_limits() {
3629   // This is called when we believe that we're going to do an infrequent
3630   // operation which will increase the per byte scanned cost (i.e. move
3631   // entries to/from the global stack). It basically tries to decrease the
3632   // scanning limit so that the clock is called earlier.
3633 
3634   if (_cm->verbose_medium())
3635     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3636 
3637   _words_scanned_limit = _real_words_scanned_limit -
3638     3 * words_scanned_period / 4;
3639   _refs_reached_limit  = _real_refs_reached_limit -
3640     3 * refs_reached_period / 4;
3641 }
3642 
3643 void CMTask::move_entries_to_global_stack() {
3644   // local array where we'll store the entries that will be popped
3645   // from the local queue
3646   oop buffer[global_stack_transfer_size];
3647 
3648   int n = 0;
3649   oop obj;
3650   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3651     buffer[n] = obj;
3652     ++n;
3653   }
3654 
3655   if (n > 0) {
3656     // we popped at least one entry from the local queue
3657 
3658     statsOnly( ++_global_transfers_to; _local_pops += n );
3659 
3660     if (!_cm->mark_stack_push(buffer, n)) {
3661       if (_cm->verbose_low())
3662         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3663       set_has_aborted();
3664     } else {
3665       // the transfer was successful
3666 
3667       if (_cm->verbose_medium())
3668         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3669                                _task_id, n);
3670       statsOnly( int tmp_size = _cm->mark_stack_size();
3671                  if (tmp_size > _global_max_size)
3672                    _global_max_size = tmp_size;
3673                  _global_pushes += n );
3674     }
3675   }
3676 
3677   // this operation was quite expensive, so decrease the limits
3678   decrease_limits();
3679 }
3680 
3681 void CMTask::get_entries_from_global_stack() {
3682   // local array where we'll store the entries that will be popped
3683   // from the global stack.
3684   oop buffer[global_stack_transfer_size];
3685   int n;
3686   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3687   assert(n <= global_stack_transfer_size,
3688          "we should not pop more than the given limit");
3689   if (n > 0) {
3690     // yes, we did actually pop at least one entry
3691 
3692     statsOnly( ++_global_transfers_from; _global_pops += n );
3693     if (_cm->verbose_medium())
3694       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3695                              _task_id, n);
3696     for (int i = 0; i < n; ++i) {
3697       bool success = _task_queue->push(buffer[i]);
3698       // We only call this when the local queue is empty or under a
3699       // given target limit. So, we do not expect this push to fail.
3700       assert(success, "invariant");
3701     }
3702 
3703     statsOnly( int tmp_size = _task_queue->size();
3704                if (tmp_size > _local_max_size)
3705                  _local_max_size = tmp_size;
3706                _local_pushes += n );
3707   }
3708 
3709   // this operation was quite expensive, so decrease the limits
3710   decrease_limits();
3711 }
3712 
3713 void CMTask::drain_local_queue(bool partially) {
3714   if (has_aborted())
3715     return;
3716 
3717   // Decide what the target size is, depending whether we're going to
3718   // drain it partially (so that other tasks can steal if they run out
3719   // of things to do) or totally (at the very end).
3720   size_t target_size;
3721   if (partially)
3722     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3723   else
3724     target_size = 0;
3725 
3726   if (_task_queue->size() > target_size) {
3727     if (_cm->verbose_high())
3728       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3729                              _task_id, target_size);
3730 
3731     oop obj;
3732     bool ret = _task_queue->pop_local(obj);
3733     while (ret) {
3734       statsOnly( ++_local_pops );
3735 
3736       if (_cm->verbose_high())
3737         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3738                                (void*) obj);
3739 
3740       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3741       assert(!_g1h->is_on_master_free_list(
3742                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3743 
3744       scan_object(obj);
3745 
3746       if (_task_queue->size() <= target_size || has_aborted())
3747         ret = false;
3748       else
3749         ret = _task_queue->pop_local(obj);
3750     }
3751 
3752     if (_cm->verbose_high())
3753       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3754                              _task_id, _task_queue->size());
3755   }
3756 }
3757 
3758 void CMTask::drain_global_stack(bool partially) {
3759   if (has_aborted())
3760     return;
3761 
3762   // We have a policy to drain the local queue before we attempt to
3763   // drain the global stack.
3764   assert(partially || _task_queue->size() == 0, "invariant");
3765 
3766   // Decide what the target size is, depending whether we're going to
3767   // drain it partially (so that other tasks can steal if they run out
3768   // of things to do) or totally (at the very end).  Notice that,
3769   // because we move entries from the global stack in chunks or
3770   // because another task might be doing the same, we might in fact
3771   // drop below the target. But, this is not a problem.
3772   size_t target_size;
3773   if (partially)
3774     target_size = _cm->partial_mark_stack_size_target();
3775   else
3776     target_size = 0;
3777 
3778   if (_cm->mark_stack_size() > target_size) {
3779     if (_cm->verbose_low())
3780       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3781                              _task_id, target_size);
3782 
3783     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3784       get_entries_from_global_stack();
3785       drain_local_queue(partially);
3786     }
3787 
3788     if (_cm->verbose_low())
3789       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3790                              _task_id, _cm->mark_stack_size());
3791   }
3792 }
3793 
3794 // SATB Queue has several assumptions on whether to call the par or
3795 // non-par versions of the methods. this is why some of the code is
3796 // replicated. We should really get rid of the single-threaded version
3797 // of the code to simplify things.
3798 void CMTask::drain_satb_buffers() {
3799   if (has_aborted())
3800     return;
3801 
3802   // We set this so that the regular clock knows that we're in the
3803   // middle of draining buffers and doesn't set the abort flag when it
3804   // notices that SATB buffers are available for draining. It'd be
3805   // very counter productive if it did that. :-)
3806   _draining_satb_buffers = true;
3807 
3808   CMObjectClosure oc(this);
3809   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3810   if (G1CollectedHeap::use_parallel_gc_threads())
3811     satb_mq_set.set_par_closure(_task_id, &oc);
3812   else
3813     satb_mq_set.set_closure(&oc);
3814 
3815   // This keeps claiming and applying the closure to completed buffers
3816   // until we run out of buffers or we need to abort.
3817   if (G1CollectedHeap::use_parallel_gc_threads()) {
3818     while (!has_aborted() &&
3819            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3820       if (_cm->verbose_medium())
3821         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3822       statsOnly( ++_satb_buffers_processed );
3823       regular_clock_call();
3824     }
3825   } else {
3826     while (!has_aborted() &&
3827            satb_mq_set.apply_closure_to_completed_buffer()) {
3828       if (_cm->verbose_medium())
3829         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3830       statsOnly( ++_satb_buffers_processed );
3831       regular_clock_call();
3832     }
3833   }
3834 
3835   if (!concurrent() && !has_aborted()) {
3836     // We should only do this during remark.
3837     if (G1CollectedHeap::use_parallel_gc_threads())
3838       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3839     else
3840       satb_mq_set.iterate_closure_all_threads();
3841   }
3842 
3843   _draining_satb_buffers = false;
3844 
3845   assert(has_aborted() ||
3846          concurrent() ||
3847          satb_mq_set.completed_buffers_num() == 0, "invariant");
3848 
3849   if (G1CollectedHeap::use_parallel_gc_threads())
3850     satb_mq_set.set_par_closure(_task_id, NULL);
3851   else
3852     satb_mq_set.set_closure(NULL);
3853 
3854   // again, this was a potentially expensive operation, decrease the
3855   // limits to get the regular clock call early
3856   decrease_limits();
3857 }
3858 
3859 void CMTask::drain_region_stack(BitMapClosure* bc) {
3860   if (has_aborted())
3861     return;
3862 
3863   assert(_region_finger == NULL,
3864          "it should be NULL when we're not scanning a region");
3865 
3866   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3867     if (_cm->verbose_low())
3868       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3869                              _task_id, _cm->region_stack_size());
3870 
3871     MemRegion mr;
3872 
3873     if (!_aborted_region.is_empty()) {
3874       mr = _aborted_region;
3875       _aborted_region = MemRegion();
3876 
3877       if (_cm->verbose_low())
3878         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3879                              _task_id, mr.start(), mr.end());
3880     } else {
3881       mr = _cm->region_stack_pop_lock_free();
3882       // it returns MemRegion() if the pop fails
3883       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3884     }
3885 
3886     while (mr.start() != NULL) {
3887       if (_cm->verbose_medium())
3888         gclog_or_tty->print_cr("[%d] we are scanning region "
3889                                "["PTR_FORMAT", "PTR_FORMAT")",
3890                                _task_id, mr.start(), mr.end());
3891 
3892       assert(mr.end() <= _cm->finger(),
3893              "otherwise the region shouldn't be on the stack");
3894       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3895       if (_nextMarkBitMap->iterate(bc, mr)) {
3896         assert(!has_aborted(),
3897                "cannot abort the task without aborting the bitmap iteration");
3898 
3899         // We finished iterating over the region without aborting.
3900         regular_clock_call();
3901         if (has_aborted())
3902           mr = MemRegion();
3903         else {
3904           mr = _cm->region_stack_pop_lock_free();
3905           // it returns MemRegion() if the pop fails
3906           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3907         }
3908       } else {
3909         assert(has_aborted(), "currently the only way to do so");
3910 
3911         // The only way to abort the bitmap iteration is to return
3912         // false from the do_bit() method. However, inside the
3913         // do_bit() method we move the _region_finger to point to the
3914         // object currently being looked at. So, if we bail out, we
3915         // have definitely set _region_finger to something non-null.
3916         assert(_region_finger != NULL, "invariant");
3917 
3918         // Make sure that any previously aborted region has been
3919         // cleared.
3920         assert(_aborted_region.is_empty(), "aborted region not cleared");
3921 
3922         // The iteration was actually aborted. So now _region_finger
3923         // points to the address of the object we last scanned. If we
3924         // leave it there, when we restart this task, we will rescan
3925         // the object. It is easy to avoid this. We move the finger by
3926         // enough to point to the next possible object header (the
3927         // bitmap knows by how much we need to move it as it knows its
3928         // granularity).
3929         MemRegion newRegion =
3930           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3931 
3932         if (!newRegion.is_empty()) {
3933           if (_cm->verbose_low()) {
3934             gclog_or_tty->print_cr("[%d] recording unscanned region"
3935                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3936                                    _task_id,
3937                                    newRegion.start(), newRegion.end());
3938           }
3939           // Now record the part of the region we didn't scan to
3940           // make sure this task scans it later.
3941           _aborted_region = newRegion;
3942         }
3943         // break from while
3944         mr = MemRegion();
3945       }
3946       _region_finger = NULL;
3947     }
3948 
3949     if (_cm->verbose_low())
3950       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3951                              _task_id, _cm->region_stack_size());
3952   }
3953 }
3954 
3955 void CMTask::print_stats() {
3956   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3957                          _task_id, _calls);
3958   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3959                          _elapsed_time_ms, _termination_time_ms);
3960   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3961                          _step_times_ms.num(), _step_times_ms.avg(),
3962                          _step_times_ms.sd());
3963   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3964                          _step_times_ms.maximum(), _step_times_ms.sum());
3965 
3966 #if _MARKING_STATS_
3967   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3968                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3969                          _all_clock_intervals_ms.sd());
3970   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3971                          _all_clock_intervals_ms.maximum(),
3972                          _all_clock_intervals_ms.sum());
3973   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3974                          _clock_due_to_scanning, _clock_due_to_marking);
3975   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3976                          _objs_scanned, _objs_found_on_bitmap);
3977   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3978                          _local_pushes, _local_pops, _local_max_size);
3979   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3980                          _global_pushes, _global_pops, _global_max_size);
3981   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3982                          _global_transfers_to,_global_transfers_from);
3983   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3984                          _regions_claimed, _region_stack_pops);
3985   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3986   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3987                          _steal_attempts, _steals);
3988   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3989   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3990                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3991   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3992                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3993 #endif // _MARKING_STATS_
3994 }
3995 
3996 /*****************************************************************************
3997 
3998     The do_marking_step(time_target_ms) method is the building block
3999     of the parallel marking framework. It can be called in parallel
4000     with other invocations of do_marking_step() on different tasks
4001     (but only one per task, obviously) and concurrently with the
4002     mutator threads, or during remark, hence it eliminates the need
4003     for two versions of the code. When called during remark, it will
4004     pick up from where the task left off during the concurrent marking
4005     phase. Interestingly, tasks are also claimable during evacuation
4006     pauses too, since do_marking_step() ensures that it aborts before
4007     it needs to yield.
4008 
4009     The data structures that is uses to do marking work are the
4010     following:
4011 
4012       (1) Marking Bitmap. If there are gray objects that appear only
4013       on the bitmap (this happens either when dealing with an overflow
4014       or when the initial marking phase has simply marked the roots
4015       and didn't push them on the stack), then tasks claim heap
4016       regions whose bitmap they then scan to find gray objects. A
4017       global finger indicates where the end of the last claimed region
4018       is. A local finger indicates how far into the region a task has
4019       scanned. The two fingers are used to determine how to gray an
4020       object (i.e. whether simply marking it is OK, as it will be
4021       visited by a task in the future, or whether it needs to be also
4022       pushed on a stack).
4023 
4024       (2) Local Queue. The local queue of the task which is accessed
4025       reasonably efficiently by the task. Other tasks can steal from
4026       it when they run out of work. Throughout the marking phase, a
4027       task attempts to keep its local queue short but not totally
4028       empty, so that entries are available for stealing by other
4029       tasks. Only when there is no more work, a task will totally
4030       drain its local queue.
4031 
4032       (3) Global Mark Stack. This handles local queue overflow. During
4033       marking only sets of entries are moved between it and the local
4034       queues, as access to it requires a mutex and more fine-grain
4035       interaction with it which might cause contention. If it
4036       overflows, then the marking phase should restart and iterate
4037       over the bitmap to identify gray objects. Throughout the marking
4038       phase, tasks attempt to keep the global mark stack at a small
4039       length but not totally empty, so that entries are available for
4040       popping by other tasks. Only when there is no more work, tasks
4041       will totally drain the global mark stack.
4042 
4043       (4) Global Region Stack. Entries on it correspond to areas of
4044       the bitmap that need to be scanned since they contain gray
4045       objects. Pushes on the region stack only happen during
4046       evacuation pauses and typically correspond to areas covered by
4047       GC LABS. If it overflows, then the marking phase should restart
4048       and iterate over the bitmap to identify gray objects. Tasks will
4049       try to totally drain the region stack as soon as possible.
4050 
4051       (5) SATB Buffer Queue. This is where completed SATB buffers are
4052       made available. Buffers are regularly removed from this queue
4053       and scanned for roots, so that the queue doesn't get too
4054       long. During remark, all completed buffers are processed, as
4055       well as the filled in parts of any uncompleted buffers.
4056 
4057     The do_marking_step() method tries to abort when the time target
4058     has been reached. There are a few other cases when the
4059     do_marking_step() method also aborts:
4060 
4061       (1) When the marking phase has been aborted (after a Full GC).
4062 
4063       (2) When a global overflow (either on the global stack or the
4064       region stack) has been triggered. Before the task aborts, it
4065       will actually sync up with the other tasks to ensure that all
4066       the marking data structures (local queues, stacks, fingers etc.)
4067       are re-initialised so that when do_marking_step() completes,
4068       the marking phase can immediately restart.
4069 
4070       (3) When enough completed SATB buffers are available. The
4071       do_marking_step() method only tries to drain SATB buffers right
4072       at the beginning. So, if enough buffers are available, the
4073       marking step aborts and the SATB buffers are processed at
4074       the beginning of the next invocation.
4075 
4076       (4) To yield. when we have to yield then we abort and yield
4077       right at the end of do_marking_step(). This saves us from a lot
4078       of hassle as, by yielding we might allow a Full GC. If this
4079       happens then objects will be compacted underneath our feet, the
4080       heap might shrink, etc. We save checking for this by just
4081       aborting and doing the yield right at the end.
4082 
4083     From the above it follows that the do_marking_step() method should
4084     be called in a loop (or, otherwise, regularly) until it completes.
4085 
4086     If a marking step completes without its has_aborted() flag being
4087     true, it means it has completed the current marking phase (and
4088     also all other marking tasks have done so and have all synced up).
4089 
4090     A method called regular_clock_call() is invoked "regularly" (in
4091     sub ms intervals) throughout marking. It is this clock method that
4092     checks all the abort conditions which were mentioned above and
4093     decides when the task should abort. A work-based scheme is used to
4094     trigger this clock method: when the number of object words the
4095     marking phase has scanned or the number of references the marking
4096     phase has visited reach a given limit. Additional invocations to
4097     the method clock have been planted in a few other strategic places
4098     too. The initial reason for the clock method was to avoid calling
4099     vtime too regularly, as it is quite expensive. So, once it was in
4100     place, it was natural to piggy-back all the other conditions on it
4101     too and not constantly check them throughout the code.
4102 
4103  *****************************************************************************/
4104 
4105 void CMTask::do_marking_step(double time_target_ms,
4106                              bool do_stealing,
4107                              bool do_termination) {
4108   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4109   assert(concurrent() == _cm->concurrent(), "they should be the same");
4110 
4111   assert(concurrent() || _cm->region_stack_empty(),
4112          "the region stack should have been cleared before remark");
4113   assert(concurrent() || !_cm->has_aborted_regions(),
4114          "aborted regions should have been cleared before remark");
4115   assert(_region_finger == NULL,
4116          "this should be non-null only when a region is being scanned");
4117 
4118   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4119   assert(_task_queues != NULL, "invariant");
4120   assert(_task_queue != NULL, "invariant");
4121   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4122 
4123   assert(!_claimed,
4124          "only one thread should claim this task at any one time");
4125 
4126   // OK, this doesn't safeguard again all possible scenarios, as it is
4127   // possible for two threads to set the _claimed flag at the same
4128   // time. But it is only for debugging purposes anyway and it will
4129   // catch most problems.
4130   _claimed = true;
4131 
4132   _start_time_ms = os::elapsedVTime() * 1000.0;
4133   statsOnly( _interval_start_time_ms = _start_time_ms );
4134 
4135   double diff_prediction_ms =
4136     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4137   _time_target_ms = time_target_ms - diff_prediction_ms;
4138 
4139   // set up the variables that are used in the work-based scheme to
4140   // call the regular clock method
4141   _words_scanned = 0;
4142   _refs_reached  = 0;
4143   recalculate_limits();
4144 
4145   // clear all flags
4146   clear_has_aborted();
4147   _has_timed_out = false;
4148   _draining_satb_buffers = false;
4149 
4150   ++_calls;
4151 
4152   if (_cm->verbose_low())
4153     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4154                            "target = %1.2lfms >>>>>>>>>>",
4155                            _task_id, _calls, _time_target_ms);
4156 
4157   // Set up the bitmap and oop closures. Anything that uses them is
4158   // eventually called from this method, so it is OK to allocate these
4159   // statically.
4160   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4161   CMOopClosure    oop_closure(_g1h, _cm, this);
4162   set_oop_closure(&oop_closure);
4163 
4164   if (_cm->has_overflown()) {
4165     // This can happen if the region stack or the mark stack overflows
4166     // during a GC pause and this task, after a yield point,
4167     // restarts. We have to abort as we need to get into the overflow
4168     // protocol which happens right at the end of this task.
4169     set_has_aborted();
4170   }
4171 
4172   // First drain any available SATB buffers. After this, we will not
4173   // look at SATB buffers before the next invocation of this method.
4174   // If enough completed SATB buffers are queued up, the regular clock
4175   // will abort this task so that it restarts.
4176   drain_satb_buffers();
4177   // ...then partially drain the local queue and the global stack
4178   drain_local_queue(true);
4179   drain_global_stack(true);
4180 
4181   // Then totally drain the region stack.  We will not look at
4182   // it again before the next invocation of this method. Entries on
4183   // the region stack are only added during evacuation pauses, for
4184   // which we have to yield. When we do, we abort the task anyway so
4185   // it will look at the region stack again when it restarts.
4186   bitmap_closure.set_scanning_heap_region(false);
4187   drain_region_stack(&bitmap_closure);
4188   // ...then partially drain the local queue and the global stack
4189   drain_local_queue(true);
4190   drain_global_stack(true);
4191 
4192   do {
4193     if (!has_aborted() && _curr_region != NULL) {
4194       // This means that we're already holding on to a region.
4195       assert(_finger != NULL, "if region is not NULL, then the finger "
4196              "should not be NULL either");
4197 
4198       // We might have restarted this task after an evacuation pause
4199       // which might have evacuated the region we're holding on to
4200       // underneath our feet. Let's read its limit again to make sure
4201       // that we do not iterate over a region of the heap that
4202       // contains garbage (update_region_limit() will also move
4203       // _finger to the start of the region if it is found empty).
4204       update_region_limit();
4205       // We will start from _finger not from the start of the region,
4206       // as we might be restarting this task after aborting half-way
4207       // through scanning this region. In this case, _finger points to
4208       // the address where we last found a marked object. If this is a
4209       // fresh region, _finger points to start().
4210       MemRegion mr = MemRegion(_finger, _region_limit);
4211 
4212       if (_cm->verbose_low())
4213         gclog_or_tty->print_cr("[%d] we're scanning part "
4214                                "["PTR_FORMAT", "PTR_FORMAT") "
4215                                "of region "PTR_FORMAT,
4216                                _task_id, _finger, _region_limit, _curr_region);
4217 
4218       // Let's iterate over the bitmap of the part of the
4219       // region that is left.
4220       bitmap_closure.set_scanning_heap_region(true);
4221       if (mr.is_empty() ||
4222           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4223         // We successfully completed iterating over the region. Now,
4224         // let's give up the region.
4225         giveup_current_region();
4226         regular_clock_call();
4227       } else {
4228         assert(has_aborted(), "currently the only way to do so");
4229         // The only way to abort the bitmap iteration is to return
4230         // false from the do_bit() method. However, inside the
4231         // do_bit() method we move the _finger to point to the
4232         // object currently being looked at. So, if we bail out, we
4233         // have definitely set _finger to something non-null.
4234         assert(_finger != NULL, "invariant");
4235 
4236         // Region iteration was actually aborted. So now _finger
4237         // points to the address of the object we last scanned. If we
4238         // leave it there, when we restart this task, we will rescan
4239         // the object. It is easy to avoid this. We move the finger by
4240         // enough to point to the next possible object header (the
4241         // bitmap knows by how much we need to move it as it knows its
4242         // granularity).
4243         assert(_finger < _region_limit, "invariant");
4244         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4245         // Check if bitmap iteration was aborted while scanning the last object
4246         if (new_finger >= _region_limit) {
4247             giveup_current_region();
4248         } else {
4249             move_finger_to(new_finger);
4250         }
4251       }
4252     }
4253     // At this point we have either completed iterating over the
4254     // region we were holding on to, or we have aborted.
4255 
4256     // We then partially drain the local queue and the global stack.
4257     // (Do we really need this?)
4258     drain_local_queue(true);
4259     drain_global_stack(true);
4260 
4261     // Read the note on the claim_region() method on why it might
4262     // return NULL with potentially more regions available for
4263     // claiming and why we have to check out_of_regions() to determine
4264     // whether we're done or not.
4265     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4266       // We are going to try to claim a new region. We should have
4267       // given up on the previous one.
4268       // Separated the asserts so that we know which one fires.
4269       assert(_curr_region  == NULL, "invariant");
4270       assert(_finger       == NULL, "invariant");
4271       assert(_region_limit == NULL, "invariant");
4272       if (_cm->verbose_low())
4273         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4274       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4275       if (claimed_region != NULL) {
4276         // Yes, we managed to claim one
4277         statsOnly( ++_regions_claimed );
4278 
4279         if (_cm->verbose_low())
4280           gclog_or_tty->print_cr("[%d] we successfully claimed "
4281                                  "region "PTR_FORMAT,
4282                                  _task_id, claimed_region);
4283 
4284         setup_for_region(claimed_region);
4285         assert(_curr_region == claimed_region, "invariant");
4286       }
4287       // It is important to call the regular clock here. It might take
4288       // a while to claim a region if, for example, we hit a large
4289       // block of empty regions. So we need to call the regular clock
4290       // method once round the loop to make sure it's called
4291       // frequently enough.
4292       regular_clock_call();
4293     }
4294 
4295     if (!has_aborted() && _curr_region == NULL) {
4296       assert(_cm->out_of_regions(),
4297              "at this point we should be out of regions");
4298     }
4299   } while ( _curr_region != NULL && !has_aborted());
4300 
4301   if (!has_aborted()) {
4302     // We cannot check whether the global stack is empty, since other
4303     // tasks might be pushing objects to it concurrently. We also cannot
4304     // check if the region stack is empty because if a thread is aborting
4305     // it can push a partially done region back.
4306     assert(_cm->out_of_regions(),
4307            "at this point we should be out of regions");
4308 
4309     if (_cm->verbose_low())
4310       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4311 
4312     // Try to reduce the number of available SATB buffers so that
4313     // remark has less work to do.
4314     drain_satb_buffers();
4315   }
4316 
4317   // Since we've done everything else, we can now totally drain the
4318   // local queue and global stack.
4319   drain_local_queue(false);
4320   drain_global_stack(false);
4321 
4322   // Attempt at work stealing from other task's queues.
4323   if (do_stealing && !has_aborted()) {
4324     // We have not aborted. This means that we have finished all that
4325     // we could. Let's try to do some stealing...
4326 
4327     // We cannot check whether the global stack is empty, since other
4328     // tasks might be pushing objects to it concurrently. We also cannot
4329     // check if the region stack is empty because if a thread is aborting
4330     // it can push a partially done region back.
4331     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4332            "only way to reach here");
4333 
4334     if (_cm->verbose_low())
4335       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4336 
4337     while (!has_aborted()) {
4338       oop obj;
4339       statsOnly( ++_steal_attempts );
4340 
4341       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4342         if (_cm->verbose_medium())
4343           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4344                                  _task_id, (void*) obj);
4345 
4346         statsOnly( ++_steals );
4347 
4348         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4349                "any stolen object should be marked");
4350         scan_object(obj);
4351 
4352         // And since we're towards the end, let's totally drain the
4353         // local queue and global stack.
4354         drain_local_queue(false);
4355         drain_global_stack(false);
4356       } else {
4357         break;
4358       }
4359     }
4360   }
4361 
4362   // If we are about to wrap up and go into termination, check if we
4363   // should raise the overflow flag.
4364   if (do_termination && !has_aborted()) {
4365     if (_cm->force_overflow()->should_force()) {
4366       _cm->set_has_overflown();
4367       regular_clock_call();
4368     }
4369   }
4370 
4371   // We still haven't aborted. Now, let's try to get into the
4372   // termination protocol.
4373   if (do_termination && !has_aborted()) {
4374     // We cannot check whether the global stack is empty, since other
4375     // tasks might be concurrently pushing objects on it. We also cannot
4376     // check if the region stack is empty because if a thread is aborting
4377     // it can push a partially done region back.
4378     // Separated the asserts so that we know which one fires.
4379     assert(_cm->out_of_regions(), "only way to reach here");
4380     assert(_task_queue->size() == 0, "only way to reach here");
4381 
4382     if (_cm->verbose_low())
4383       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4384 
4385     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4386     // The CMTask class also extends the TerminatorTerminator class,
4387     // hence its should_exit_termination() method will also decide
4388     // whether to exit the termination protocol or not.
4389     bool finished = _cm->terminator()->offer_termination(this);
4390     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4391     _termination_time_ms +=
4392       termination_end_time_ms - _termination_start_time_ms;
4393 
4394     if (finished) {
4395       // We're all done.
4396 
4397       if (_task_id == 0) {
4398         // let's allow task 0 to do this
4399         if (concurrent()) {
4400           assert(_cm->concurrent_marking_in_progress(), "invariant");
4401           // we need to set this to false before the next
4402           // safepoint. This way we ensure that the marking phase
4403           // doesn't observe any more heap expansions.
4404           _cm->clear_concurrent_marking_in_progress();
4405         }
4406       }
4407 
4408       // We can now guarantee that the global stack is empty, since
4409       // all other tasks have finished. We separated the guarantees so
4410       // that, if a condition is false, we can immediately find out
4411       // which one.
4412       guarantee(_cm->out_of_regions(), "only way to reach here");
4413       guarantee(_aborted_region.is_empty(), "only way to reach here");
4414       guarantee(_cm->region_stack_empty(), "only way to reach here");
4415       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4416       guarantee(_task_queue->size() == 0, "only way to reach here");
4417       guarantee(!_cm->has_overflown(), "only way to reach here");
4418       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4419       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4420 
4421       if (_cm->verbose_low())
4422         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4423     } else {
4424       // Apparently there's more work to do. Let's abort this task. It
4425       // will restart it and we can hopefully find more things to do.
4426 
4427       if (_cm->verbose_low())
4428         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4429 
4430       set_has_aborted();
4431       statsOnly( ++_aborted_termination );
4432     }
4433   }
4434 
4435   // Mainly for debugging purposes to make sure that a pointer to the
4436   // closure which was statically allocated in this frame doesn't
4437   // escape it by accident.
4438   set_oop_closure(NULL);
4439   double end_time_ms = os::elapsedVTime() * 1000.0;
4440   double elapsed_time_ms = end_time_ms - _start_time_ms;
4441   // Update the step history.
4442   _step_times_ms.add(elapsed_time_ms);
4443 
4444   if (has_aborted()) {
4445     // The task was aborted for some reason.
4446 
4447     statsOnly( ++_aborted );
4448 
4449     if (_has_timed_out) {
4450       double diff_ms = elapsed_time_ms - _time_target_ms;
4451       // Keep statistics of how well we did with respect to hitting
4452       // our target only if we actually timed out (if we aborted for
4453       // other reasons, then the results might get skewed).
4454       _marking_step_diffs_ms.add(diff_ms);
4455     }
4456 
4457     if (_cm->has_overflown()) {
4458       // This is the interesting one. We aborted because a global
4459       // overflow was raised. This means we have to restart the
4460       // marking phase and start iterating over regions. However, in
4461       // order to do this we have to make sure that all tasks stop
4462       // what they are doing and re-initialise in a safe manner. We
4463       // will achieve this with the use of two barrier sync points.
4464 
4465       if (_cm->verbose_low())
4466         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4467 
4468       _cm->enter_first_sync_barrier(_task_id);
4469       // When we exit this sync barrier we know that all tasks have
4470       // stopped doing marking work. So, it's now safe to
4471       // re-initialise our data structures. At the end of this method,
4472       // task 0 will clear the global data structures.
4473 
4474       statsOnly( ++_aborted_overflow );
4475 
4476       // We clear the local state of this task...
4477       clear_region_fields();
4478 
4479       // ...and enter the second barrier.
4480       _cm->enter_second_sync_barrier(_task_id);
4481       // At this point everything has bee re-initialised and we're
4482       // ready to restart.
4483     }
4484 
4485     if (_cm->verbose_low()) {
4486       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4487                              "elapsed = %1.2lfms <<<<<<<<<<",
4488                              _task_id, _time_target_ms, elapsed_time_ms);
4489       if (_cm->has_aborted())
4490         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4491                                _task_id);
4492     }
4493   } else {
4494     if (_cm->verbose_low())
4495       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4496                              "elapsed = %1.2lfms <<<<<<<<<<",
4497                              _task_id, _time_target_ms, elapsed_time_ms);
4498   }
4499 
4500   _claimed = false;
4501 }
4502 
4503 CMTask::CMTask(int task_id,
4504                ConcurrentMark* cm,
4505                CMTaskQueue* task_queue,
4506                CMTaskQueueSet* task_queues)
4507   : _g1h(G1CollectedHeap::heap()),
4508     _task_id(task_id), _cm(cm),
4509     _claimed(false),
4510     _nextMarkBitMap(NULL), _hash_seed(17),
4511     _task_queue(task_queue),
4512     _task_queues(task_queues),
4513     _oop_closure(NULL),
4514     _aborted_region(MemRegion()) {
4515   guarantee(task_queue != NULL, "invariant");
4516   guarantee(task_queues != NULL, "invariant");
4517 
4518   statsOnly( _clock_due_to_scanning = 0;
4519              _clock_due_to_marking  = 0 );
4520 
4521   _marking_step_diffs_ms.add(0.5);
4522 }
4523 
4524 // These are formatting macros that are used below to ensure
4525 // consistent formatting. The *_H_* versions are used to format the
4526 // header for a particular value and they should be kept consistent
4527 // with the corresponding macro. Also note that most of the macros add
4528 // the necessary white space (as a prefix) which makes them a bit
4529 // easier to compose.
4530 
4531 // All the output lines are prefixed with this string to be able to
4532 // identify them easily in a large log file.
4533 #define G1PPRL_LINE_PREFIX            "###"
4534 
4535 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4536 #ifdef _LP64
4537 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4538 #else // _LP64
4539 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4540 #endif // _LP64
4541 
4542 // For per-region info
4543 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4544 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4545 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4546 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4547 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4548 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4549 
4550 // For summary info
4551 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4552 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4553 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4554 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4555 
4556 G1PrintRegionLivenessInfoClosure::
4557 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4558   : _out(out),
4559     _total_used_bytes(0), _total_capacity_bytes(0),
4560     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4561     _hum_used_bytes(0), _hum_capacity_bytes(0),
4562     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4563   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4564   MemRegion g1_committed = g1h->g1_committed();
4565   MemRegion g1_reserved = g1h->g1_reserved();
4566   double now = os::elapsedTime();
4567 
4568   // Print the header of the output.
4569   _out->cr();
4570   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4571   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4572                  G1PPRL_SUM_ADDR_FORMAT("committed")
4573                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4574                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4575                  g1_committed.start(), g1_committed.end(),
4576                  g1_reserved.start(), g1_reserved.end(),
4577                  HeapRegion::GrainBytes);
4578   _out->print_cr(G1PPRL_LINE_PREFIX);
4579   _out->print_cr(G1PPRL_LINE_PREFIX
4580                  G1PPRL_TYPE_H_FORMAT
4581                  G1PPRL_ADDR_BASE_H_FORMAT
4582                  G1PPRL_BYTE_H_FORMAT
4583                  G1PPRL_BYTE_H_FORMAT
4584                  G1PPRL_BYTE_H_FORMAT
4585                  G1PPRL_DOUBLE_H_FORMAT,
4586                  "type", "address-range",
4587                  "used", "prev-live", "next-live", "gc-eff");
4588 }
4589 
4590 // It takes as a parameter a reference to one of the _hum_* fields, it
4591 // deduces the corresponding value for a region in a humongous region
4592 // series (either the region size, or what's left if the _hum_* field
4593 // is < the region size), and updates the _hum_* field accordingly.
4594 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4595   size_t bytes = 0;
4596   // The > 0 check is to deal with the prev and next live bytes which
4597   // could be 0.
4598   if (*hum_bytes > 0) {
4599     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4600     *hum_bytes -= bytes;
4601   }
4602   return bytes;
4603 }
4604 
4605 // It deduces the values for a region in a humongous region series
4606 // from the _hum_* fields and updates those accordingly. It assumes
4607 // that that _hum_* fields have already been set up from the "starts
4608 // humongous" region and we visit the regions in address order.
4609 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4610                                                      size_t* capacity_bytes,
4611                                                      size_t* prev_live_bytes,
4612                                                      size_t* next_live_bytes) {
4613   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4614   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4615   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4616   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4617   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4618 }
4619 
4620 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4621   const char* type = "";
4622   HeapWord* bottom       = r->bottom();
4623   HeapWord* end          = r->end();
4624   size_t capacity_bytes  = r->capacity();
4625   size_t used_bytes      = r->used();
4626   size_t prev_live_bytes = r->live_bytes();
4627   size_t next_live_bytes = r->next_live_bytes();
4628   double gc_eff          = r->gc_efficiency();
4629   if (r->used() == 0) {
4630     type = "FREE";
4631   } else if (r->is_survivor()) {
4632     type = "SURV";
4633   } else if (r->is_young()) {
4634     type = "EDEN";
4635   } else if (r->startsHumongous()) {
4636     type = "HUMS";
4637 
4638     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4639            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4640            "they should have been zeroed after the last time we used them");
4641     // Set up the _hum_* fields.
4642     _hum_capacity_bytes  = capacity_bytes;
4643     _hum_used_bytes      = used_bytes;
4644     _hum_prev_live_bytes = prev_live_bytes;
4645     _hum_next_live_bytes = next_live_bytes;
4646     get_hum_bytes(&used_bytes, &capacity_bytes,
4647                   &prev_live_bytes, &next_live_bytes);
4648     end = bottom + HeapRegion::GrainWords;
4649   } else if (r->continuesHumongous()) {
4650     type = "HUMC";
4651     get_hum_bytes(&used_bytes, &capacity_bytes,
4652                   &prev_live_bytes, &next_live_bytes);
4653     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4654   } else {
4655     type = "OLD";
4656   }
4657 
4658   _total_used_bytes      += used_bytes;
4659   _total_capacity_bytes  += capacity_bytes;
4660   _total_prev_live_bytes += prev_live_bytes;
4661   _total_next_live_bytes += next_live_bytes;
4662 
4663   // Print a line for this particular region.
4664   _out->print_cr(G1PPRL_LINE_PREFIX
4665                  G1PPRL_TYPE_FORMAT
4666                  G1PPRL_ADDR_BASE_FORMAT
4667                  G1PPRL_BYTE_FORMAT
4668                  G1PPRL_BYTE_FORMAT
4669                  G1PPRL_BYTE_FORMAT
4670                  G1PPRL_DOUBLE_FORMAT,
4671                  type, bottom, end,
4672                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4673 
4674   return false;
4675 }
4676 
4677 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4678   // Print the footer of the output.
4679   _out->print_cr(G1PPRL_LINE_PREFIX);
4680   _out->print_cr(G1PPRL_LINE_PREFIX
4681                  " SUMMARY"
4682                  G1PPRL_SUM_MB_FORMAT("capacity")
4683                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4684                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4685                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4686                  bytes_to_mb(_total_capacity_bytes),
4687                  bytes_to_mb(_total_used_bytes),
4688                  perc(_total_used_bytes, _total_capacity_bytes),
4689                  bytes_to_mb(_total_prev_live_bytes),
4690                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4691                  bytes_to_mb(_total_next_live_bytes),
4692                  perc(_total_next_live_bytes, _total_capacity_bytes));
4693   _out->cr();
4694 }