1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  32 #include "gc_implementation/g1/g1RemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  35 #include "gc_implementation/shared/vmGCOperations.hpp"
  36 #include "memory/genOopClosures.inline.hpp"
  37 #include "memory/referencePolicy.hpp"
  38 #include "memory/resourceArea.hpp"
  39 #include "oops/oop.inline.hpp"
  40 #include "runtime/handles.inline.hpp"
  41 #include "runtime/java.hpp"
  42 
  43 //
  44 // CMS Bit Map Wrapper
  45 
  46 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  47   _bm((uintptr_t*)NULL,0),
  48   _shifter(shifter) {
  49   _bmStartWord = (HeapWord*)(rs.base());
  50   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  51   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  52                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  53 
  54   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  55   // For now we'll just commit all of the bit map up fromt.
  56   // Later on we'll try to be more parsimonious with swap.
  57   guarantee(_virtual_space.initialize(brs, brs.size()),
  58             "couldn't reseve backing store for CMS bit map");
  59   assert(_virtual_space.committed_size() == brs.size(),
  60          "didn't reserve backing store for all of CMS bit map?");
  61   _bm.set_map((uintptr_t*)_virtual_space.low());
  62   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  63          _bmWordSize, "inconsistency in bit map sizing");
  64   _bm.set_size(_bmWordSize >> _shifter);
  65 }
  66 
  67 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  68                                                HeapWord* limit) const {
  69   // First we must round addr *up* to a possible object boundary.
  70   addr = (HeapWord*)align_size_up((intptr_t)addr,
  71                                   HeapWordSize << _shifter);
  72   size_t addrOffset = heapWordToOffset(addr);
  73   if (limit == NULL) {
  74     limit = _bmStartWord + _bmWordSize;
  75   }
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  86                                                  HeapWord* limit) const {
  87   size_t addrOffset = heapWordToOffset(addr);
  88   if (limit == NULL) {
  89     limit = _bmStartWord + _bmWordSize;
  90   }
  91   size_t limitOffset = heapWordToOffset(limit);
  92   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  93   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  94   assert(nextAddr >= addr, "get_next_one postcondition");
  95   assert(nextAddr == limit || !isMarked(nextAddr),
  96          "get_next_one postcondition");
  97   return nextAddr;
  98 }
  99 
 100 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 101   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 102   return (int) (diff >> _shifter);
 103 }
 104 
 105 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 106   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 107   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 108   if (right > left) {
 109     // Right-open interval [leftOffset, rightOffset).
 110     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 111   } else {
 112     return true;
 113   }
 114 }
 115 
 116 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 117                                              size_t    from_start_index,
 118                                              HeapWord* to_start_word,
 119                                              size_t    word_num) {
 120   _bm.mostly_disjoint_range_union(from_bitmap,
 121                                   from_start_index,
 122                                   heapWordToOffset(to_start_word),
 123                                   word_num);
 124 }
 125 
 126 #ifndef PRODUCT
 127 bool CMBitMapRO::covers(ReservedSpace rs) const {
 128   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 129   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 130          "size inconsistency");
 131   return _bmStartWord == (HeapWord*)(rs.base()) &&
 132          _bmWordSize  == rs.size()>>LogHeapWordSize;
 133 }
 134 #endif
 135 
 136 void CMBitMap::clearAll() {
 137   _bm.clear();
 138   return;
 139 }
 140 
 141 void CMBitMap::markRange(MemRegion mr) {
 142   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 143   assert(!mr.is_empty(), "unexpected empty region");
 144   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 145           ((HeapWord *) mr.end())),
 146          "markRange memory region end is not card aligned");
 147   // convert address range into offset range
 148   _bm.at_put_range(heapWordToOffset(mr.start()),
 149                    heapWordToOffset(mr.end()), true);
 150 }
 151 
 152 void CMBitMap::clearRange(MemRegion mr) {
 153   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 154   assert(!mr.is_empty(), "unexpected empty region");
 155   // convert address range into offset range
 156   _bm.at_put_range(heapWordToOffset(mr.start()),
 157                    heapWordToOffset(mr.end()), false);
 158 }
 159 
 160 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 161                                             HeapWord* end_addr) {
 162   HeapWord* start = getNextMarkedWordAddress(addr);
 163   start = MIN2(start, end_addr);
 164   HeapWord* end   = getNextUnmarkedWordAddress(start);
 165   end = MIN2(end, end_addr);
 166   assert(start <= end, "Consistency check");
 167   MemRegion mr(start, end);
 168   if (!mr.is_empty()) {
 169     clearRange(mr);
 170   }
 171   return mr;
 172 }
 173 
 174 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 175   _base(NULL), _cm(cm)
 176 #ifdef ASSERT
 177   , _drain_in_progress(false)
 178   , _drain_in_progress_yields(false)
 179 #endif
 180 {}
 181 
 182 void CMMarkStack::allocate(size_t size) {
 183   _base = NEW_C_HEAP_ARRAY(oop, size);
 184   if (_base == NULL) {
 185     vm_exit_during_initialization("Failed to allocate "
 186                                   "CM region mark stack");
 187   }
 188   _index = 0;
 189   _capacity = (jint) size;
 190   _oops_do_bound = -1;
 191   NOT_PRODUCT(_max_depth = 0);
 192 }
 193 
 194 CMMarkStack::~CMMarkStack() {
 195   if (_base != NULL) {
 196     FREE_C_HEAP_ARRAY(oop, _base);
 197   }
 198 }
 199 
 200 void CMMarkStack::par_push(oop ptr) {
 201   while (true) {
 202     if (isFull()) {
 203       _overflow = true;
 204       return;
 205     }
 206     // Otherwise...
 207     jint index = _index;
 208     jint next_index = index+1;
 209     jint res = Atomic::cmpxchg(next_index, &_index, index);
 210     if (res == index) {
 211       _base[index] = ptr;
 212       // Note that we don't maintain this atomically.  We could, but it
 213       // doesn't seem necessary.
 214       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 215       return;
 216     }
 217     // Otherwise, we need to try again.
 218   }
 219 }
 220 
 221 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 222   while (true) {
 223     if (isFull()) {
 224       _overflow = true;
 225       return;
 226     }
 227     // Otherwise...
 228     jint index = _index;
 229     jint next_index = index + n;
 230     if (next_index > _capacity) {
 231       _overflow = true;
 232       return;
 233     }
 234     jint res = Atomic::cmpxchg(next_index, &_index, index);
 235     if (res == index) {
 236       for (int i = 0; i < n; i++) {
 237         int ind = index + i;
 238         assert(ind < _capacity, "By overflow test above.");
 239         _base[ind] = ptr_arr[i];
 240       }
 241       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 242       return;
 243     }
 244     // Otherwise, we need to try again.
 245   }
 246 }
 247 
 248 
 249 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 250   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 251   jint start = _index;
 252   jint next_index = start + n;
 253   if (next_index > _capacity) {
 254     _overflow = true;
 255     return;
 256   }
 257   // Otherwise.
 258   _index = next_index;
 259   for (int i = 0; i < n; i++) {
 260     int ind = start + i;
 261     assert(ind < _capacity, "By overflow test above.");
 262     _base[ind] = ptr_arr[i];
 263   }
 264 }
 265 
 266 
 267 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 268   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 269   jint index = _index;
 270   if (index == 0) {
 271     *n = 0;
 272     return false;
 273   } else {
 274     int k = MIN2(max, index);
 275     jint new_ind = index - k;
 276     for (int j = 0; j < k; j++) {
 277       ptr_arr[j] = _base[new_ind + j];
 278     }
 279     _index = new_ind;
 280     *n = k;
 281     return true;
 282   }
 283 }
 284 
 285 
 286 CMRegionStack::CMRegionStack() : _base(NULL) {}
 287 
 288 void CMRegionStack::allocate(size_t size) {
 289   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 290   if (_base == NULL) {
 291     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 292   }
 293   _index = 0;
 294   _capacity = (jint) size;
 295 }
 296 
 297 CMRegionStack::~CMRegionStack() {
 298   if (_base != NULL) {
 299     FREE_C_HEAP_ARRAY(oop, _base);
 300   }
 301 }
 302 
 303 void CMRegionStack::push_lock_free(MemRegion mr) {
 304   assert(mr.word_size() > 0, "Precondition");
 305   while (true) {
 306     jint index = _index;
 307 
 308     if (index >= _capacity) {
 309       _overflow = true;
 310       return;
 311     }
 312     // Otherwise...
 313     jint next_index = index+1;
 314     jint res = Atomic::cmpxchg(next_index, &_index, index);
 315     if (res == index) {
 316       _base[index] = mr;
 317       return;
 318     }
 319     // Otherwise, we need to try again.
 320   }
 321 }
 322 
 323 // Lock-free pop of the region stack. Called during the concurrent
 324 // marking / remark phases. Should only be called in tandem with
 325 // other lock-free pops.
 326 MemRegion CMRegionStack::pop_lock_free() {
 327   while (true) {
 328     jint index = _index;
 329 
 330     if (index == 0) {
 331       return MemRegion();
 332     }
 333     // Otherwise...
 334     jint next_index = index-1;
 335     jint res = Atomic::cmpxchg(next_index, &_index, index);
 336     if (res == index) {
 337       MemRegion mr = _base[next_index];
 338       if (mr.start() != NULL) {
 339         assert(mr.end() != NULL, "invariant");
 340         assert(mr.word_size() > 0, "invariant");
 341         return mr;
 342       } else {
 343         // that entry was invalidated... let's skip it
 344         assert(mr.end() == NULL, "invariant");
 345       }
 346     }
 347     // Otherwise, we need to try again.
 348   }
 349 }
 350 
 351 #if 0
 352 // The routines that manipulate the region stack with a lock are
 353 // not currently used. They should be retained, however, as a
 354 // diagnostic aid.
 355 
 356 void CMRegionStack::push_with_lock(MemRegion mr) {
 357   assert(mr.word_size() > 0, "Precondition");
 358   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 359 
 360   if (isFull()) {
 361     _overflow = true;
 362     return;
 363   }
 364 
 365   _base[_index] = mr;
 366   _index += 1;
 367 }
 368 
 369 MemRegion CMRegionStack::pop_with_lock() {
 370   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 371 
 372   while (true) {
 373     if (_index == 0) {
 374       return MemRegion();
 375     }
 376     _index -= 1;
 377 
 378     MemRegion mr = _base[_index];
 379     if (mr.start() != NULL) {
 380       assert(mr.end() != NULL, "invariant");
 381       assert(mr.word_size() > 0, "invariant");
 382       return mr;
 383     } else {
 384       // that entry was invalidated... let's skip it
 385       assert(mr.end() == NULL, "invariant");
 386     }
 387   }
 388 }
 389 #endif
 390 
 391 bool CMRegionStack::invalidate_entries_into_cset() {
 392   bool result = false;
 393   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 394   for (int i = 0; i < _oops_do_bound; ++i) {
 395     MemRegion mr = _base[i];
 396     if (mr.start() != NULL) {
 397       assert(mr.end() != NULL, "invariant");
 398       assert(mr.word_size() > 0, "invariant");
 399       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 400       assert(hr != NULL, "invariant");
 401       if (hr->in_collection_set()) {
 402         // The region points into the collection set
 403         _base[i] = MemRegion();
 404         result = true;
 405       }
 406     } else {
 407       // that entry was invalidated... let's skip it
 408       assert(mr.end() == NULL, "invariant");
 409     }
 410   }
 411   return result;
 412 }
 413 
 414 template<class OopClosureClass>
 415 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 416   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 417          || SafepointSynchronize::is_at_safepoint(),
 418          "Drain recursion must be yield-safe.");
 419   bool res = true;
 420   debug_only(_drain_in_progress = true);
 421   debug_only(_drain_in_progress_yields = yield_after);
 422   while (!isEmpty()) {
 423     oop newOop = pop();
 424     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 425     assert(newOop->is_oop(), "Expected an oop");
 426     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 427            "only grey objects on this stack");
 428     // iterate over the oops in this oop, marking and pushing
 429     // the ones in CMS generation.
 430     newOop->oop_iterate(cl);
 431     if (yield_after && _cm->do_yield_check()) {
 432       res = false;
 433       break;
 434     }
 435   }
 436   debug_only(_drain_in_progress = false);
 437   return res;
 438 }
 439 
 440 void CMMarkStack::oops_do(OopClosure* f) {
 441   if (_index == 0) return;
 442   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 443          "Bound must be set.");
 444   for (int i = 0; i < _oops_do_bound; i++) {
 445     f->do_oop(&_base[i]);
 446   }
 447   _oops_do_bound = -1;
 448 }
 449 
 450 bool ConcurrentMark::not_yet_marked(oop obj) const {
 451   return (_g1h->is_obj_ill(obj)
 452           || (_g1h->is_in_permanent(obj)
 453               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 454 }
 455 
 456 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 457 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 458 #endif // _MSC_VER
 459 
 460 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 461                                int max_regions) :
 462   _markBitMap1(rs, MinObjAlignment - 1),
 463   _markBitMap2(rs, MinObjAlignment - 1),
 464 
 465   _parallel_marking_threads(0),
 466   _sleep_factor(0.0),
 467   _marking_task_overhead(1.0),
 468   _cleanup_sleep_factor(0.0),
 469   _cleanup_task_overhead(1.0),
 470   _cleanup_list("Cleanup List"),
 471   _region_bm(max_regions, false /* in_resource_area*/),
 472   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 473            CardTableModRefBS::card_shift,
 474            false /* in_resource_area*/),
 475   _prevMarkBitMap(&_markBitMap1),
 476   _nextMarkBitMap(&_markBitMap2),
 477   _at_least_one_mark_complete(false),
 478 
 479   _markStack(this),
 480   _regionStack(),
 481   // _finger set in set_non_marking_state
 482 
 483   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 484   // _active_tasks set in set_non_marking_state
 485   // _tasks set inside the constructor
 486   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 487   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 488 
 489   _has_overflown(false),
 490   _concurrent(false),
 491   _has_aborted(false),
 492   _restart_for_overflow(false),
 493   _concurrent_marking_in_progress(false),
 494   _should_gray_objects(false),
 495 
 496   // _verbose_level set below
 497 
 498   _init_times(),
 499   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 500   _cleanup_times(),
 501   _total_counting_time(0.0),
 502   _total_rs_scrub_time(0.0),
 503 
 504   _parallel_workers(NULL) {
 505   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 506   if (verbose_level < no_verbose) {
 507     verbose_level = no_verbose;
 508   }
 509   if (verbose_level > high_verbose) {
 510     verbose_level = high_verbose;
 511   }
 512   _verbose_level = verbose_level;
 513 
 514   if (verbose_low()) {
 515     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 516                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 517   }
 518 
 519   _markStack.allocate(MarkStackSize);
 520   _regionStack.allocate(G1MarkRegionStackSize);
 521 
 522   // Create & start a ConcurrentMark thread.
 523   _cmThread = new ConcurrentMarkThread(this);
 524   assert(cmThread() != NULL, "CM Thread should have been created");
 525   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 526 
 527   _g1h = G1CollectedHeap::heap();
 528   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 529   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 530   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 531 
 532   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 533   satb_qs.set_buffer_size(G1SATBBufferSize);
 534 
 535   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 536   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 537 
 538   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 539   _active_tasks = _max_task_num;
 540   for (int i = 0; i < (int) _max_task_num; ++i) {
 541     CMTaskQueue* task_queue = new CMTaskQueue();
 542     task_queue->initialize();
 543     _task_queues->register_queue(i, task_queue);
 544 
 545     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 546     _accum_task_vtime[i] = 0.0;
 547   }
 548 
 549   if (ConcGCThreads > ParallelGCThreads) {
 550     vm_exit_during_initialization("Can't have more ConcGCThreads "
 551                                   "than ParallelGCThreads.");
 552   }
 553   if (ParallelGCThreads == 0) {
 554     // if we are not running with any parallel GC threads we will not
 555     // spawn any marking threads either
 556     _parallel_marking_threads =   0;
 557     _sleep_factor             = 0.0;
 558     _marking_task_overhead    = 1.0;
 559   } else {
 560     if (ConcGCThreads > 0) {
 561       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 562       // if both are set
 563 
 564       _parallel_marking_threads = ConcGCThreads;
 565       _sleep_factor             = 0.0;
 566       _marking_task_overhead    = 1.0;
 567     } else if (G1MarkingOverheadPercent > 0) {
 568       // we will calculate the number of parallel marking threads
 569       // based on a target overhead with respect to the soft real-time
 570       // goal
 571 
 572       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 573       double overall_cm_overhead =
 574         (double) MaxGCPauseMillis * marking_overhead /
 575         (double) GCPauseIntervalMillis;
 576       double cpu_ratio = 1.0 / (double) os::processor_count();
 577       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 578       double marking_task_overhead =
 579         overall_cm_overhead / marking_thread_num *
 580                                                 (double) os::processor_count();
 581       double sleep_factor =
 582                          (1.0 - marking_task_overhead) / marking_task_overhead;
 583 
 584       _parallel_marking_threads = (size_t) marking_thread_num;
 585       _sleep_factor             = sleep_factor;
 586       _marking_task_overhead    = marking_task_overhead;
 587     } else {
 588       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 589       _sleep_factor             = 0.0;
 590       _marking_task_overhead    = 1.0;
 591     }
 592 
 593     if (parallel_marking_threads() > 1) {
 594       _cleanup_task_overhead = 1.0;
 595     } else {
 596       _cleanup_task_overhead = marking_task_overhead();
 597     }
 598     _cleanup_sleep_factor =
 599                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 600 
 601 #if 0
 602     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 603     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 604     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 605     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 606     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 607 #endif
 608 
 609     guarantee(parallel_marking_threads() > 0, "peace of mind");
 610     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 611          (int) _parallel_marking_threads, false, true);
 612     if (_parallel_workers == NULL) {
 613       vm_exit_during_initialization("Failed necessary allocation.");
 614     } else {
 615       _parallel_workers->initialize_workers();
 616     }
 617   }
 618 
 619   // so that the call below can read a sensible value
 620   _heap_start = (HeapWord*) rs.base();
 621   set_non_marking_state();
 622 }
 623 
 624 void ConcurrentMark::update_g1_committed(bool force) {
 625   // If concurrent marking is not in progress, then we do not need to
 626   // update _heap_end. This has a subtle and important
 627   // side-effect. Imagine that two evacuation pauses happen between
 628   // marking completion and remark. The first one can grow the
 629   // heap (hence now the finger is below the heap end). Then, the
 630   // second one could unnecessarily push regions on the region
 631   // stack. This causes the invariant that the region stack is empty
 632   // at the beginning of remark to be false. By ensuring that we do
 633   // not observe heap expansions after marking is complete, then we do
 634   // not have this problem.
 635   if (!concurrent_marking_in_progress() && !force) return;
 636 
 637   MemRegion committed = _g1h->g1_committed();
 638   assert(committed.start() == _heap_start, "start shouldn't change");
 639   HeapWord* new_end = committed.end();
 640   if (new_end > _heap_end) {
 641     // The heap has been expanded.
 642 
 643     _heap_end = new_end;
 644   }
 645   // Notice that the heap can also shrink. However, this only happens
 646   // during a Full GC (at least currently) and the entire marking
 647   // phase will bail out and the task will not be restarted. So, let's
 648   // do nothing.
 649 }
 650 
 651 void ConcurrentMark::reset() {
 652   // Starting values for these two. This should be called in a STW
 653   // phase. CM will be notified of any future g1_committed expansions
 654   // will be at the end of evacuation pauses, when tasks are
 655   // inactive.
 656   MemRegion committed = _g1h->g1_committed();
 657   _heap_start = committed.start();
 658   _heap_end   = committed.end();
 659 
 660   // Separated the asserts so that we know which one fires.
 661   assert(_heap_start != NULL, "heap bounds should look ok");
 662   assert(_heap_end != NULL, "heap bounds should look ok");
 663   assert(_heap_start < _heap_end, "heap bounds should look ok");
 664 
 665   // reset all the marking data structures and any necessary flags
 666   clear_marking_state();
 667 
 668   if (verbose_low()) {
 669     gclog_or_tty->print_cr("[global] resetting");
 670   }
 671 
 672   // We do reset all of them, since different phases will use
 673   // different number of active threads. So, it's easiest to have all
 674   // of them ready.
 675   for (int i = 0; i < (int) _max_task_num; ++i) {
 676     _tasks[i]->reset(_nextMarkBitMap);
 677   }
 678 
 679   // we need this to make sure that the flag is on during the evac
 680   // pause with initial mark piggy-backed
 681   set_concurrent_marking_in_progress();
 682 }
 683 
 684 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 685   assert(active_tasks <= _max_task_num, "we should not have more");
 686 
 687   _active_tasks = active_tasks;
 688   // Need to update the three data structures below according to the
 689   // number of active threads for this phase.
 690   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 691   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 692   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 693 
 694   _concurrent = concurrent;
 695   // We propagate this to all tasks, not just the active ones.
 696   for (int i = 0; i < (int) _max_task_num; ++i)
 697     _tasks[i]->set_concurrent(concurrent);
 698 
 699   if (concurrent) {
 700     set_concurrent_marking_in_progress();
 701   } else {
 702     // We currently assume that the concurrent flag has been set to
 703     // false before we start remark. At this point we should also be
 704     // in a STW phase.
 705     assert(!concurrent_marking_in_progress(), "invariant");
 706     assert(_finger == _heap_end, "only way to get here");
 707     update_g1_committed(true);
 708   }
 709 }
 710 
 711 void ConcurrentMark::set_non_marking_state() {
 712   // We set the global marking state to some default values when we're
 713   // not doing marking.
 714   clear_marking_state();
 715   _active_tasks = 0;
 716   clear_concurrent_marking_in_progress();
 717 }
 718 
 719 ConcurrentMark::~ConcurrentMark() {
 720   for (int i = 0; i < (int) _max_task_num; ++i) {
 721     delete _task_queues->queue(i);
 722     delete _tasks[i];
 723   }
 724   delete _task_queues;
 725   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 726 }
 727 
 728 // This closure is used to mark refs into the g1 generation
 729 // from external roots in the CMS bit map.
 730 // Called at the first checkpoint.
 731 //
 732 
 733 void ConcurrentMark::clearNextBitmap() {
 734   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 735   G1CollectorPolicy* g1p = g1h->g1_policy();
 736 
 737   // Make sure that the concurrent mark thread looks to still be in
 738   // the current cycle.
 739   guarantee(cmThread()->during_cycle(), "invariant");
 740 
 741   // We are finishing up the current cycle by clearing the next
 742   // marking bitmap and getting it ready for the next cycle. During
 743   // this time no other cycle can start. So, let's make sure that this
 744   // is the case.
 745   guarantee(!g1h->mark_in_progress(), "invariant");
 746 
 747   // clear the mark bitmap (no grey objects to start with).
 748   // We need to do this in chunks and offer to yield in between
 749   // each chunk.
 750   HeapWord* start  = _nextMarkBitMap->startWord();
 751   HeapWord* end    = _nextMarkBitMap->endWord();
 752   HeapWord* cur    = start;
 753   size_t chunkSize = M;
 754   while (cur < end) {
 755     HeapWord* next = cur + chunkSize;
 756     if (next > end) {
 757       next = end;
 758     }
 759     MemRegion mr(cur,next);
 760     _nextMarkBitMap->clearRange(mr);
 761     cur = next;
 762     do_yield_check();
 763 
 764     // Repeat the asserts from above. We'll do them as asserts here to
 765     // minimize their overhead on the product. However, we'll have
 766     // them as guarantees at the beginning / end of the bitmap
 767     // clearing to get some checking in the product.
 768     assert(cmThread()->during_cycle(), "invariant");
 769     assert(!g1h->mark_in_progress(), "invariant");
 770   }
 771 
 772   // Repeat the asserts from above.
 773   guarantee(cmThread()->during_cycle(), "invariant");
 774   guarantee(!g1h->mark_in_progress(), "invariant");
 775 }
 776 
 777 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 778 public:
 779   bool doHeapRegion(HeapRegion* r) {
 780     if (!r->continuesHumongous()) {
 781       r->note_start_of_marking(true);
 782     }
 783     return false;
 784   }
 785 };
 786 
 787 void ConcurrentMark::checkpointRootsInitialPre() {
 788   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 789   G1CollectorPolicy* g1p = g1h->g1_policy();
 790 
 791   _has_aborted = false;
 792 
 793 #ifndef PRODUCT
 794   if (G1PrintReachableAtInitialMark) {
 795     print_reachable("at-cycle-start",
 796                     VerifyOption_G1UsePrevMarking, true /* all */);
 797   }
 798 #endif
 799 
 800   // Initialise marking structures. This has to be done in a STW phase.
 801   reset();
 802 }
 803 
 804 class CMMarkRootsClosure: public OopsInGenClosure {
 805 private:
 806   ConcurrentMark*  _cm;
 807   G1CollectedHeap* _g1h;
 808   bool             _do_barrier;
 809 
 810 public:
 811   CMMarkRootsClosure(ConcurrentMark* cm,
 812                      G1CollectedHeap* g1h,
 813                      bool do_barrier) : _cm(cm), _g1h(g1h),
 814                                         _do_barrier(do_barrier) { }
 815 
 816   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 817   virtual void do_oop(      oop* p) { do_oop_work(p); }
 818 
 819   template <class T> void do_oop_work(T* p) {
 820     T heap_oop = oopDesc::load_heap_oop(p);
 821     if (!oopDesc::is_null(heap_oop)) {
 822       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 823       assert(obj->is_oop() || obj->mark() == NULL,
 824              "expected an oop, possibly with mark word displaced");
 825       HeapWord* addr = (HeapWord*)obj;
 826       if (_g1h->is_in_g1_reserved(addr)) {
 827         _cm->grayRoot(obj);
 828       }
 829     }
 830     if (_do_barrier) {
 831       assert(!_g1h->is_in_g1_reserved(p),
 832              "Should be called on external roots");
 833       do_barrier(p);
 834     }
 835   }
 836 };
 837 
 838 void ConcurrentMark::checkpointRootsInitialPost() {
 839   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 840 
 841   // If we force an overflow during remark, the remark operation will
 842   // actually abort and we'll restart concurrent marking. If we always
 843   // force an oveflow during remark we'll never actually complete the
 844   // marking phase. So, we initilize this here, at the start of the
 845   // cycle, so that at the remaining overflow number will decrease at
 846   // every remark and we'll eventually not need to cause one.
 847   force_overflow_stw()->init();
 848 
 849   // For each region note start of marking.
 850   NoteStartOfMarkHRClosure startcl;
 851   g1h->heap_region_iterate(&startcl);
 852 
 853   // Start Concurrent Marking weak-reference discovery.
 854   ReferenceProcessor* rp = g1h->ref_processor_cm();
 855   assert(!rp->discovery_enabled(), "Precondition");
 856   rp->verify_no_references_recorded();
 857   rp->enable_discovery(); // enable ("weak") refs discovery
 858   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 859 
 860   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 861   // This is the start of  the marking cycle, we're expected all
 862   // threads to have SATB queues with active set to false.
 863   satb_mq_set.set_active_all_threads(true, /* new active value */
 864                                      false /* expected_active */);
 865 
 866   // update_g1_committed() will be called at the end of an evac pause
 867   // when marking is on. So, it's also called at the end of the
 868   // initial-mark pause to update the heap end, if the heap expands
 869   // during it. No need to call it here.
 870 }
 871 
 872 // Checkpoint the roots into this generation from outside
 873 // this generation. [Note this initial checkpoint need only
 874 // be approximate -- we'll do a catch up phase subsequently.]
 875 void ConcurrentMark::checkpointRootsInitial() {
 876   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 877   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 878 
 879   double start = os::elapsedTime();
 880 
 881   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 882   g1p->record_concurrent_mark_init_start();
 883   checkpointRootsInitialPre();
 884 
 885   // YSR: when concurrent precleaning is in place, we'll
 886   // need to clear the cached card table here
 887 
 888   ResourceMark rm;
 889   HandleMark  hm;
 890 
 891   g1h->ensure_parsability(false);
 892   g1h->perm_gen()->save_marks();
 893 
 894   CMMarkRootsClosure notOlder(this, g1h, false);
 895   CMMarkRootsClosure older(this, g1h, true);
 896 
 897   g1h->set_marking_started();
 898   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 899 
 900   g1h->process_strong_roots(true,    // activate StrongRootsScope
 901                             false,   // fake perm gen collection
 902                             SharedHeap::SO_AllClasses,
 903                             &notOlder, // Regular roots
 904                             NULL,     // do not visit active blobs
 905                             &older    // Perm Gen Roots
 906                             );
 907   checkpointRootsInitialPost();
 908 
 909   // Statistics.
 910   double end = os::elapsedTime();
 911   _init_times.add((end - start) * 1000.0);
 912 
 913   g1p->record_concurrent_mark_init_end();
 914 }
 915 
 916 /*
 917  * Notice that in the next two methods, we actually leave the STS
 918  * during the barrier sync and join it immediately afterwards. If we
 919  * do not do this, the following deadlock can occur: one thread could
 920  * be in the barrier sync code, waiting for the other thread to also
 921  * sync up, whereas another one could be trying to yield, while also
 922  * waiting for the other threads to sync up too.
 923  *
 924  * Note, however, that this code is also used during remark and in
 925  * this case we should not attempt to leave / enter the STS, otherwise
 926  * we'll either hit an asseert (debug / fastdebug) or deadlock
 927  * (product). So we should only leave / enter the STS if we are
 928  * operating concurrently.
 929  *
 930  * Because the thread that does the sync barrier has left the STS, it
 931  * is possible to be suspended for a Full GC or an evacuation pause
 932  * could occur. This is actually safe, since the entering the sync
 933  * barrier is one of the last things do_marking_step() does, and it
 934  * doesn't manipulate any data structures afterwards.
 935  */
 936 
 937 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 938   if (verbose_low()) {
 939     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 940   }
 941 
 942   if (concurrent()) {
 943     ConcurrentGCThread::stsLeave();
 944   }
 945   _first_overflow_barrier_sync.enter();
 946   if (concurrent()) {
 947     ConcurrentGCThread::stsJoin();
 948   }
 949   // at this point everyone should have synced up and not be doing any
 950   // more work
 951 
 952   if (verbose_low()) {
 953     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 954   }
 955 
 956   // let task 0 do this
 957   if (task_num == 0) {
 958     // task 0 is responsible for clearing the global data structures
 959     // We should be here because of an overflow. During STW we should
 960     // not clear the overflow flag since we rely on it being true when
 961     // we exit this method to abort the pause and restart concurent
 962     // marking.
 963     clear_marking_state(concurrent() /* clear_overflow */);
 964     force_overflow()->update();
 965 
 966     if (PrintGC) {
 967       gclog_or_tty->date_stamp(PrintGCDateStamps);
 968       gclog_or_tty->stamp(PrintGCTimeStamps);
 969       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 970     }
 971   }
 972 
 973   // after this, each task should reset its own data structures then
 974   // then go into the second barrier
 975 }
 976 
 977 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 978   if (verbose_low()) {
 979     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 980   }
 981 
 982   if (concurrent()) {
 983     ConcurrentGCThread::stsLeave();
 984   }
 985   _second_overflow_barrier_sync.enter();
 986   if (concurrent()) {
 987     ConcurrentGCThread::stsJoin();
 988   }
 989   // at this point everything should be re-initialised and ready to go
 990 
 991   if (verbose_low()) {
 992     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 993   }
 994 }
 995 
 996 #ifndef PRODUCT
 997 void ForceOverflowSettings::init() {
 998   _num_remaining = G1ConcMarkForceOverflow;
 999   _force = false;
1000   update();
1001 }
1002 
1003 void ForceOverflowSettings::update() {
1004   if (_num_remaining > 0) {
1005     _num_remaining -= 1;
1006     _force = true;
1007   } else {
1008     _force = false;
1009   }
1010 }
1011 
1012 bool ForceOverflowSettings::should_force() {
1013   if (_force) {
1014     _force = false;
1015     return true;
1016   } else {
1017     return false;
1018   }
1019 }
1020 #endif // !PRODUCT
1021 
1022 void ConcurrentMark::grayRoot(oop p) {
1023   HeapWord* addr = (HeapWord*) p;
1024   // We can't really check against _heap_start and _heap_end, since it
1025   // is possible during an evacuation pause with piggy-backed
1026   // initial-mark that the committed space is expanded during the
1027   // pause without CM observing this change. So the assertions below
1028   // is a bit conservative; but better than nothing.
1029   assert(_g1h->g1_committed().contains(addr),
1030          "address should be within the heap bounds");
1031 
1032   if (!_nextMarkBitMap->isMarked(addr)) {
1033     _nextMarkBitMap->parMark(addr);
1034   }
1035 }
1036 
1037 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1038   // The objects on the region have already been marked "in bulk" by
1039   // the caller. We only need to decide whether to push the region on
1040   // the region stack or not.
1041 
1042   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1043     // We're done with marking and waiting for remark. We do not need to
1044     // push anything else on the region stack.
1045     return;
1046   }
1047 
1048   HeapWord* finger = _finger;
1049 
1050   if (verbose_low()) {
1051     gclog_or_tty->print_cr("[global] attempting to push "
1052                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1053                            PTR_FORMAT, mr.start(), mr.end(), finger);
1054   }
1055 
1056   if (mr.start() < finger) {
1057     // The finger is always heap region aligned and it is not possible
1058     // for mr to span heap regions.
1059     assert(mr.end() <= finger, "invariant");
1060 
1061     // Separated the asserts so that we know which one fires.
1062     assert(mr.start() <= mr.end(),
1063            "region boundaries should fall within the committed space");
1064     assert(_heap_start <= mr.start(),
1065            "region boundaries should fall within the committed space");
1066     assert(mr.end() <= _heap_end,
1067            "region boundaries should fall within the committed space");
1068     if (verbose_low()) {
1069       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1070                              "below the finger, pushing it",
1071                              mr.start(), mr.end());
1072     }
1073 
1074     if (!region_stack_push_lock_free(mr)) {
1075       if (verbose_low()) {
1076         gclog_or_tty->print_cr("[global] region stack has overflown.");
1077       }
1078     }
1079   }
1080 }
1081 
1082 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1083   // The object is not marked by the caller. We need to at least mark
1084   // it and maybe push in on the stack.
1085 
1086   HeapWord* addr = (HeapWord*)p;
1087   if (!_nextMarkBitMap->isMarked(addr)) {
1088     // We definitely need to mark it, irrespective whether we bail out
1089     // because we're done with marking.
1090     if (_nextMarkBitMap->parMark(addr)) {
1091       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1092         // If we're done with concurrent marking and we're waiting for
1093         // remark, then we're not pushing anything on the stack.
1094         return;
1095       }
1096 
1097       // No OrderAccess:store_load() is needed. It is implicit in the
1098       // CAS done in parMark(addr) above
1099       HeapWord* finger = _finger;
1100 
1101       if (addr < finger) {
1102         if (!mark_stack_push(oop(addr))) {
1103           if (verbose_low()) {
1104             gclog_or_tty->print_cr("[global] global stack overflow "
1105                                    "during parMark");
1106           }
1107         }
1108       }
1109     }
1110   }
1111 }
1112 
1113 class CMConcurrentMarkingTask: public AbstractGangTask {
1114 private:
1115   ConcurrentMark*       _cm;
1116   ConcurrentMarkThread* _cmt;
1117 
1118 public:
1119   void work(int worker_i) {
1120     assert(Thread::current()->is_ConcurrentGC_thread(),
1121            "this should only be done by a conc GC thread");
1122     ResourceMark rm;
1123 
1124     double start_vtime = os::elapsedVTime();
1125 
1126     ConcurrentGCThread::stsJoin();
1127 
1128     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1129     CMTask* the_task = _cm->task(worker_i);
1130     the_task->record_start_time();
1131     if (!_cm->has_aborted()) {
1132       do {
1133         double start_vtime_sec = os::elapsedVTime();
1134         double start_time_sec = os::elapsedTime();
1135         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1136 
1137         the_task->do_marking_step(mark_step_duration_ms,
1138                                   true /* do_stealing    */,
1139                                   true /* do_termination */);
1140 
1141         double end_time_sec = os::elapsedTime();
1142         double end_vtime_sec = os::elapsedVTime();
1143         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1144         double elapsed_time_sec = end_time_sec - start_time_sec;
1145         _cm->clear_has_overflown();
1146 
1147         bool ret = _cm->do_yield_check(worker_i);
1148 
1149         jlong sleep_time_ms;
1150         if (!_cm->has_aborted() && the_task->has_aborted()) {
1151           sleep_time_ms =
1152             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1153           ConcurrentGCThread::stsLeave();
1154           os::sleep(Thread::current(), sleep_time_ms, false);
1155           ConcurrentGCThread::stsJoin();
1156         }
1157         double end_time2_sec = os::elapsedTime();
1158         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1159 
1160 #if 0
1161           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1162                                  "overhead %1.4lf",
1163                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1164                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1165           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1166                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1167 #endif
1168       } while (!_cm->has_aborted() && the_task->has_aborted());
1169     }
1170     the_task->record_end_time();
1171     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1172 
1173     ConcurrentGCThread::stsLeave();
1174 
1175     double end_vtime = os::elapsedVTime();
1176     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1177   }
1178 
1179   CMConcurrentMarkingTask(ConcurrentMark* cm,
1180                           ConcurrentMarkThread* cmt) :
1181       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1182 
1183   ~CMConcurrentMarkingTask() { }
1184 };
1185 
1186 void ConcurrentMark::markFromRoots() {
1187   // we might be tempted to assert that:
1188   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1189   //        "inconsistent argument?");
1190   // However that wouldn't be right, because it's possible that
1191   // a safepoint is indeed in progress as a younger generation
1192   // stop-the-world GC happens even as we mark in this generation.
1193 
1194   _restart_for_overflow = false;
1195 
1196   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1197   force_overflow_conc()->init();
1198   set_phase(active_workers, true /* concurrent */);
1199 
1200   CMConcurrentMarkingTask markingTask(this, cmThread());
1201   if (parallel_marking_threads() > 0) {
1202     _parallel_workers->run_task(&markingTask);
1203   } else {
1204     markingTask.work(0);
1205   }
1206   print_stats();
1207 }
1208 
1209 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1210   // world is stopped at this checkpoint
1211   assert(SafepointSynchronize::is_at_safepoint(),
1212          "world should be stopped");
1213 
1214   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1215 
1216   // If a full collection has happened, we shouldn't do this.
1217   if (has_aborted()) {
1218     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1219     return;
1220   }
1221 
1222   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1223 
1224   if (VerifyDuringGC) {
1225     HandleMark hm;  // handle scope
1226     gclog_or_tty->print(" VerifyDuringGC:(before)");
1227     Universe::heap()->prepare_for_verify();
1228     Universe::verify(/* allow dirty */ true,
1229                      /* silent      */ false,
1230                      /* option      */ VerifyOption_G1UsePrevMarking);
1231   }
1232 
1233   G1CollectorPolicy* g1p = g1h->g1_policy();
1234   g1p->record_concurrent_mark_remark_start();
1235 
1236   double start = os::elapsedTime();
1237 
1238   checkpointRootsFinalWork();
1239 
1240   double mark_work_end = os::elapsedTime();
1241 
1242   weakRefsWork(clear_all_soft_refs);
1243 
1244   if (has_overflown()) {
1245     // Oops.  We overflowed.  Restart concurrent marking.
1246     _restart_for_overflow = true;
1247     // Clear the flag. We do not need it any more.
1248     clear_has_overflown();
1249     if (G1TraceMarkStackOverflow) {
1250       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1251     }
1252   } else {
1253     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1254     // We're done with marking.
1255     // This is the end of  the marking cycle, we're expected all
1256     // threads to have SATB queues with active set to true.
1257     satb_mq_set.set_active_all_threads(false, /* new active value */
1258                                        true /* expected_active */);
1259 
1260     if (VerifyDuringGC) {
1261       HandleMark hm;  // handle scope
1262       gclog_or_tty->print(" VerifyDuringGC:(after)");
1263       Universe::heap()->prepare_for_verify();
1264       Universe::verify(/* allow dirty */ true,
1265                        /* silent      */ false,
1266                        /* option      */ VerifyOption_G1UseNextMarking);
1267     }
1268     assert(!restart_for_overflow(), "sanity");
1269   }
1270 
1271   // Reset the marking state if marking completed
1272   if (!restart_for_overflow()) {
1273     set_non_marking_state();
1274   }
1275 
1276 #if VERIFY_OBJS_PROCESSED
1277   _scan_obj_cl.objs_processed = 0;
1278   ThreadLocalObjQueue::objs_enqueued = 0;
1279 #endif
1280 
1281   // Statistics
1282   double now = os::elapsedTime();
1283   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1284   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1285   _remark_times.add((now - start) * 1000.0);
1286 
1287   g1p->record_concurrent_mark_remark_end();
1288 }
1289 
1290 #define CARD_BM_TEST_MODE 0
1291 
1292 class CalcLiveObjectsClosure: public HeapRegionClosure {
1293 
1294   CMBitMapRO* _bm;
1295   ConcurrentMark* _cm;
1296   bool _changed;
1297   bool _yield;
1298   size_t _words_done;
1299   size_t _tot_live;
1300   size_t _tot_used;
1301   size_t _regions_done;
1302   double _start_vtime_sec;
1303 
1304   BitMap* _region_bm;
1305   BitMap* _card_bm;
1306   intptr_t _bottom_card_num;
1307   bool _final;
1308 
1309   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1310     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1311 #if CARD_BM_TEST_MODE
1312       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1313 #else
1314       _card_bm->par_at_put(i - _bottom_card_num, 1);
1315 #endif
1316     }
1317   }
1318 
1319 public:
1320   CalcLiveObjectsClosure(bool final,
1321                          CMBitMapRO *bm, ConcurrentMark *cm,
1322                          BitMap* region_bm, BitMap* card_bm) :
1323     _bm(bm), _cm(cm), _changed(false), _yield(true),
1324     _words_done(0), _tot_live(0), _tot_used(0),
1325     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1326     _regions_done(0), _start_vtime_sec(0.0)
1327   {
1328     _bottom_card_num =
1329       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1330                CardTableModRefBS::card_shift);
1331   }
1332 
1333   // It takes a region that's not empty (i.e., it has at least one
1334   // live object in it and sets its corresponding bit on the region
1335   // bitmap to 1. If the region is "starts humongous" it will also set
1336   // to 1 the bits on the region bitmap that correspond to its
1337   // associated "continues humongous" regions.
1338   void set_bit_for_region(HeapRegion* hr) {
1339     assert(!hr->continuesHumongous(), "should have filtered those out");
1340 
1341     size_t index = hr->hrs_index();
1342     if (!hr->startsHumongous()) {
1343       // Normal (non-humongous) case: just set the bit.
1344       _region_bm->par_at_put((BitMap::idx_t) index, true);
1345     } else {
1346       // Starts humongous case: calculate how many regions are part of
1347       // this humongous region and then set the bit range. It might
1348       // have been a bit more efficient to look at the object that
1349       // spans these humongous regions to calculate their number from
1350       // the object's size. However, it's a good idea to calculate
1351       // this based on the metadata itself, and not the region
1352       // contents, so that this code is not aware of what goes into
1353       // the humongous regions (in case this changes in the future).
1354       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1355       size_t end_index = index + 1;
1356       while (end_index < g1h->n_regions()) {
1357         HeapRegion* chr = g1h->region_at(end_index);
1358         if (!chr->continuesHumongous()) break;
1359         end_index += 1;
1360       }
1361       _region_bm->par_at_put_range((BitMap::idx_t) index,
1362                                    (BitMap::idx_t) end_index, true);
1363     }
1364   }
1365 
1366   bool doHeapRegion(HeapRegion* hr) {
1367     if (!_final && _regions_done == 0) {
1368       _start_vtime_sec = os::elapsedVTime();
1369     }
1370 
1371     if (hr->continuesHumongous()) {
1372       // We will ignore these here and process them when their
1373       // associated "starts humongous" region is processed (see
1374       // set_bit_for_heap_region()). Note that we cannot rely on their
1375       // associated "starts humongous" region to have their bit set to
1376       // 1 since, due to the region chunking in the parallel region
1377       // iteration, a "continues humongous" region might be visited
1378       // before its associated "starts humongous".
1379       return false;
1380     }
1381 
1382     HeapWord* nextTop = hr->next_top_at_mark_start();
1383     HeapWord* start   = hr->top_at_conc_mark_count();
1384     assert(hr->bottom() <= start && start <= hr->end() &&
1385            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1386            start <= nextTop,
1387            "Preconditions.");
1388     // Otherwise, record the number of word's we'll examine.
1389     size_t words_done = (nextTop - start);
1390     // Find the first marked object at or after "start".
1391     start = _bm->getNextMarkedWordAddress(start, nextTop);
1392     size_t marked_bytes = 0;
1393 
1394     // Below, the term "card num" means the result of shifting an address
1395     // by the card shift -- address 0 corresponds to card number 0.  One
1396     // must subtract the card num of the bottom of the heap to obtain a
1397     // card table index.
1398     // The first card num of the sequence of live cards currently being
1399     // constructed.  -1 ==> no sequence.
1400     intptr_t start_card_num = -1;
1401     // The last card num of the sequence of live cards currently being
1402     // constructed.  -1 ==> no sequence.
1403     intptr_t last_card_num = -1;
1404 
1405     while (start < nextTop) {
1406       if (_yield && _cm->do_yield_check()) {
1407         // We yielded.  It might be for a full collection, in which case
1408         // all bets are off; terminate the traversal.
1409         if (_cm->has_aborted()) {
1410           _changed = false;
1411           return true;
1412         } else {
1413           // Otherwise, it might be a collection pause, and the region
1414           // we're looking at might be in the collection set.  We'll
1415           // abandon this region.
1416           return false;
1417         }
1418       }
1419       oop obj = oop(start);
1420       int obj_sz = obj->size();
1421       // The card num of the start of the current object.
1422       intptr_t obj_card_num =
1423         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1424 
1425       HeapWord* obj_last = start + obj_sz - 1;
1426       intptr_t obj_last_card_num =
1427         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1428 
1429       if (obj_card_num != last_card_num) {
1430         if (start_card_num == -1) {
1431           assert(last_card_num == -1, "Both or neither.");
1432           start_card_num = obj_card_num;
1433         } else {
1434           assert(last_card_num != -1, "Both or neither.");
1435           assert(obj_card_num >= last_card_num, "Inv");
1436           if ((obj_card_num - last_card_num) > 1) {
1437             // Mark the last run, and start a new one.
1438             mark_card_num_range(start_card_num, last_card_num);
1439             start_card_num = obj_card_num;
1440           }
1441         }
1442 #if CARD_BM_TEST_MODE
1443         /*
1444         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1445                                obj_card_num - _bottom_card_num,
1446                                obj_last_card_num - _bottom_card_num);
1447         */
1448         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1449           _card_bm->par_at_put(j - _bottom_card_num, 1);
1450         }
1451 #endif
1452       }
1453       // In any case, we set the last card num.
1454       last_card_num = obj_last_card_num;
1455 
1456       marked_bytes += (size_t)obj_sz * HeapWordSize;
1457       // Find the next marked object after this one.
1458       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1459       _changed = true;
1460     }
1461     // Handle the last range, if any.
1462     if (start_card_num != -1) {
1463       mark_card_num_range(start_card_num, last_card_num);
1464     }
1465     if (_final) {
1466       // Mark the allocated-since-marking portion...
1467       HeapWord* tp = hr->top();
1468       if (nextTop < tp) {
1469         start_card_num =
1470           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1471         last_card_num =
1472           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1473         mark_card_num_range(start_card_num, last_card_num);
1474         // This definitely means the region has live objects.
1475         set_bit_for_region(hr);
1476       }
1477     }
1478 
1479     hr->add_to_marked_bytes(marked_bytes);
1480     // Update the live region bitmap.
1481     if (marked_bytes > 0) {
1482       set_bit_for_region(hr);
1483     }
1484     hr->set_top_at_conc_mark_count(nextTop);
1485     _tot_live += hr->next_live_bytes();
1486     _tot_used += hr->used();
1487     _words_done = words_done;
1488 
1489     if (!_final) {
1490       ++_regions_done;
1491       if (_regions_done % 10 == 0) {
1492         double end_vtime_sec = os::elapsedVTime();
1493         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1494         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1495           jlong sleep_time_ms =
1496             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1497           os::sleep(Thread::current(), sleep_time_ms, false);
1498           _start_vtime_sec = end_vtime_sec;
1499         }
1500       }
1501     }
1502 
1503     return false;
1504   }
1505 
1506   bool changed() { return _changed;  }
1507   void reset()   { _changed = false; _words_done = 0; }
1508   void no_yield() { _yield = false; }
1509   size_t words_done() { return _words_done; }
1510   size_t tot_live() { return _tot_live; }
1511   size_t tot_used() { return _tot_used; }
1512 };
1513 
1514 
1515 void ConcurrentMark::calcDesiredRegions() {
1516   _region_bm.clear();
1517   _card_bm.clear();
1518   CalcLiveObjectsClosure calccl(false /*final*/,
1519                                 nextMarkBitMap(), this,
1520                                 &_region_bm, &_card_bm);
1521   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1522   g1h->heap_region_iterate(&calccl);
1523 
1524   do {
1525     calccl.reset();
1526     g1h->heap_region_iterate(&calccl);
1527   } while (calccl.changed());
1528 }
1529 
1530 class G1ParFinalCountTask: public AbstractGangTask {
1531 protected:
1532   G1CollectedHeap* _g1h;
1533   CMBitMap* _bm;
1534   size_t _n_workers;
1535   size_t *_live_bytes;
1536   size_t *_used_bytes;
1537   BitMap* _region_bm;
1538   BitMap* _card_bm;
1539 public:
1540   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1541                       BitMap* region_bm, BitMap* card_bm)
1542     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1543       _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1544     if (ParallelGCThreads > 0) {
1545       _n_workers = _g1h->workers()->total_workers();
1546     } else {
1547       _n_workers = 1;
1548     }
1549     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1550     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1551   }
1552 
1553   ~G1ParFinalCountTask() {
1554     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1555     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1556   }
1557 
1558   void work(int i) {
1559     CalcLiveObjectsClosure calccl(true /*final*/,
1560                                   _bm, _g1h->concurrent_mark(),
1561                                   _region_bm, _card_bm);
1562     calccl.no_yield();
1563     if (G1CollectedHeap::use_parallel_gc_threads()) {
1564       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1565                                             HeapRegion::FinalCountClaimValue);
1566     } else {
1567       _g1h->heap_region_iterate(&calccl);
1568     }
1569     assert(calccl.complete(), "Shouldn't have yielded!");
1570 
1571     assert((size_t) i < _n_workers, "invariant");
1572     _live_bytes[i] = calccl.tot_live();
1573     _used_bytes[i] = calccl.tot_used();
1574   }
1575   size_t live_bytes()  {
1576     size_t live_bytes = 0;
1577     for (size_t i = 0; i < _n_workers; ++i)
1578       live_bytes += _live_bytes[i];
1579     return live_bytes;
1580   }
1581   size_t used_bytes()  {
1582     size_t used_bytes = 0;
1583     for (size_t i = 0; i < _n_workers; ++i)
1584       used_bytes += _used_bytes[i];
1585     return used_bytes;
1586   }
1587 };
1588 
1589 class G1ParNoteEndTask;
1590 
1591 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1592   G1CollectedHeap* _g1;
1593   int _worker_num;
1594   size_t _max_live_bytes;
1595   size_t _regions_claimed;
1596   size_t _freed_bytes;
1597   FreeRegionList* _local_cleanup_list;
1598   HumongousRegionSet* _humongous_proxy_set;
1599   HRRSCleanupTask* _hrrs_cleanup_task;
1600   double _claimed_region_time;
1601   double _max_region_time;
1602 
1603 public:
1604   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1605                              int worker_num,
1606                              FreeRegionList* local_cleanup_list,
1607                              HumongousRegionSet* humongous_proxy_set,
1608                              HRRSCleanupTask* hrrs_cleanup_task);
1609   size_t freed_bytes() { return _freed_bytes; }
1610 
1611   bool doHeapRegion(HeapRegion *r);
1612 
1613   size_t max_live_bytes() { return _max_live_bytes; }
1614   size_t regions_claimed() { return _regions_claimed; }
1615   double claimed_region_time_sec() { return _claimed_region_time; }
1616   double max_region_time_sec() { return _max_region_time; }
1617 };
1618 
1619 class G1ParNoteEndTask: public AbstractGangTask {
1620   friend class G1NoteEndOfConcMarkClosure;
1621 
1622 protected:
1623   G1CollectedHeap* _g1h;
1624   size_t _max_live_bytes;
1625   size_t _freed_bytes;
1626   FreeRegionList* _cleanup_list;
1627 
1628 public:
1629   G1ParNoteEndTask(G1CollectedHeap* g1h,
1630                    FreeRegionList* cleanup_list) :
1631     AbstractGangTask("G1 note end"), _g1h(g1h),
1632     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1633 
1634   void work(int i) {
1635     double start = os::elapsedTime();
1636     FreeRegionList local_cleanup_list("Local Cleanup List");
1637     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1638     HRRSCleanupTask hrrs_cleanup_task;
1639     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1640                                            &humongous_proxy_set,
1641                                            &hrrs_cleanup_task);
1642     if (G1CollectedHeap::use_parallel_gc_threads()) {
1643       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1644                                             HeapRegion::NoteEndClaimValue);
1645     } else {
1646       _g1h->heap_region_iterate(&g1_note_end);
1647     }
1648     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1649 
1650     // Now update the lists
1651     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1652                                             NULL /* free_list */,
1653                                             &humongous_proxy_set,
1654                                             true /* par */);
1655     {
1656       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1657       _max_live_bytes += g1_note_end.max_live_bytes();
1658       _freed_bytes += g1_note_end.freed_bytes();
1659 
1660       // If we iterate over the global cleanup list at the end of
1661       // cleanup to do this printing we will not guarantee to only
1662       // generate output for the newly-reclaimed regions (the list
1663       // might not be empty at the beginning of cleanup; we might
1664       // still be working on its previous contents). So we do the
1665       // printing here, before we append the new regions to the global
1666       // cleanup list.
1667 
1668       G1HRPrinter* hr_printer = _g1h->hr_printer();
1669       if (hr_printer->is_active()) {
1670         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1671         while (iter.more_available()) {
1672           HeapRegion* hr = iter.get_next();
1673           hr_printer->cleanup(hr);
1674         }
1675       }
1676 
1677       _cleanup_list->add_as_tail(&local_cleanup_list);
1678       assert(local_cleanup_list.is_empty(), "post-condition");
1679 
1680       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1681     }
1682     double end = os::elapsedTime();
1683     if (G1PrintParCleanupStats) {
1684       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1685                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1686                           i, start, end, (end-start)*1000.0,
1687                           g1_note_end.regions_claimed(),
1688                           g1_note_end.claimed_region_time_sec()*1000.0,
1689                           g1_note_end.max_region_time_sec()*1000.0);
1690     }
1691   }
1692   size_t max_live_bytes() { return _max_live_bytes; }
1693   size_t freed_bytes() { return _freed_bytes; }
1694 };
1695 
1696 class G1ParScrubRemSetTask: public AbstractGangTask {
1697 protected:
1698   G1RemSet* _g1rs;
1699   BitMap* _region_bm;
1700   BitMap* _card_bm;
1701 public:
1702   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1703                        BitMap* region_bm, BitMap* card_bm) :
1704     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1705     _region_bm(region_bm), _card_bm(card_bm)
1706   {}
1707 
1708   void work(int i) {
1709     if (G1CollectedHeap::use_parallel_gc_threads()) {
1710       _g1rs->scrub_par(_region_bm, _card_bm, i,
1711                        HeapRegion::ScrubRemSetClaimValue);
1712     } else {
1713       _g1rs->scrub(_region_bm, _card_bm);
1714     }
1715   }
1716 
1717 };
1718 
1719 G1NoteEndOfConcMarkClosure::
1720 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1721                            int worker_num,
1722                            FreeRegionList* local_cleanup_list,
1723                            HumongousRegionSet* humongous_proxy_set,
1724                            HRRSCleanupTask* hrrs_cleanup_task)
1725   : _g1(g1), _worker_num(worker_num),
1726     _max_live_bytes(0), _regions_claimed(0),
1727     _freed_bytes(0),
1728     _claimed_region_time(0.0), _max_region_time(0.0),
1729     _local_cleanup_list(local_cleanup_list),
1730     _humongous_proxy_set(humongous_proxy_set),
1731     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1732 
1733 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1734   // We use a claim value of zero here because all regions
1735   // were claimed with value 1 in the FinalCount task.
1736   hr->reset_gc_time_stamp();
1737   if (!hr->continuesHumongous()) {
1738     double start = os::elapsedTime();
1739     _regions_claimed++;
1740     hr->note_end_of_marking();
1741     _max_live_bytes += hr->max_live_bytes();
1742     _g1->free_region_if_empty(hr,
1743                               &_freed_bytes,
1744                               _local_cleanup_list,
1745                               _humongous_proxy_set,
1746                               _hrrs_cleanup_task,
1747                               true /* par */);
1748     double region_time = (os::elapsedTime() - start);
1749     _claimed_region_time += region_time;
1750     if (region_time > _max_region_time) {
1751       _max_region_time = region_time;
1752     }
1753   }
1754   return false;
1755 }
1756 
1757 void ConcurrentMark::cleanup() {
1758   // world is stopped at this checkpoint
1759   assert(SafepointSynchronize::is_at_safepoint(),
1760          "world should be stopped");
1761   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1762 
1763   // If a full collection has happened, we shouldn't do this.
1764   if (has_aborted()) {
1765     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1766     return;
1767   }
1768 
1769   g1h->verify_region_sets_optional();
1770 
1771   if (VerifyDuringGC) {
1772     HandleMark hm;  // handle scope
1773     gclog_or_tty->print(" VerifyDuringGC:(before)");
1774     Universe::heap()->prepare_for_verify();
1775     Universe::verify(/* allow dirty */ true,
1776                      /* silent      */ false,
1777                      /* option      */ VerifyOption_G1UsePrevMarking);
1778   }
1779 
1780   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1781   g1p->record_concurrent_mark_cleanup_start();
1782 
1783   double start = os::elapsedTime();
1784 
1785   HeapRegionRemSet::reset_for_cleanup_tasks();
1786 
1787   // Do counting once more with the world stopped for good measure.
1788   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1789                                         &_region_bm, &_card_bm);
1790   if (G1CollectedHeap::use_parallel_gc_threads()) {
1791     assert(g1h->check_heap_region_claim_values(
1792                                                HeapRegion::InitialClaimValue),
1793            "sanity check");
1794 
1795     int n_workers = g1h->workers()->total_workers();
1796     g1h->set_par_threads(n_workers);
1797     g1h->workers()->run_task(&g1_par_count_task);
1798     g1h->set_par_threads(0);
1799 
1800     assert(g1h->check_heap_region_claim_values(
1801                                              HeapRegion::FinalCountClaimValue),
1802            "sanity check");
1803   } else {
1804     g1_par_count_task.work(0);
1805   }
1806 
1807   size_t known_garbage_bytes =
1808     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1809 #if 0
1810   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1811                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1812                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1813                          (double) known_garbage_bytes / (double) (1024 * 1024));
1814 #endif // 0
1815   g1p->set_known_garbage_bytes(known_garbage_bytes);
1816 
1817   size_t start_used_bytes = g1h->used();
1818   _at_least_one_mark_complete = true;
1819   g1h->set_marking_complete();
1820 
1821   double count_end = os::elapsedTime();
1822   double this_final_counting_time = (count_end - start);
1823   if (G1PrintParCleanupStats) {
1824     gclog_or_tty->print_cr("Cleanup:");
1825     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1826                            this_final_counting_time*1000.0);
1827   }
1828   _total_counting_time += this_final_counting_time;
1829 
1830   if (G1PrintRegionLivenessInfo) {
1831     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1832     _g1h->heap_region_iterate(&cl);
1833   }
1834 
1835   // Install newly created mark bitMap as "prev".
1836   swapMarkBitMaps();
1837 
1838   g1h->reset_gc_time_stamp();
1839 
1840   // Note end of marking in all heap regions.
1841   double note_end_start = os::elapsedTime();
1842   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1843   if (G1CollectedHeap::use_parallel_gc_threads()) {
1844     int n_workers = g1h->workers()->total_workers();
1845     g1h->set_par_threads(n_workers);
1846     g1h->workers()->run_task(&g1_par_note_end_task);
1847     g1h->set_par_threads(0);
1848 
1849     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1850            "sanity check");
1851   } else {
1852     g1_par_note_end_task.work(0);
1853   }
1854 
1855   if (!cleanup_list_is_empty()) {
1856     // The cleanup list is not empty, so we'll have to process it
1857     // concurrently. Notify anyone else that might be wanting free
1858     // regions that there will be more free regions coming soon.
1859     g1h->set_free_regions_coming();
1860   }
1861   double note_end_end = os::elapsedTime();
1862   if (G1PrintParCleanupStats) {
1863     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1864                            (note_end_end - note_end_start)*1000.0);
1865   }
1866 
1867 
1868   // call below, since it affects the metric by which we sort the heap
1869   // regions.
1870   if (G1ScrubRemSets) {
1871     double rs_scrub_start = os::elapsedTime();
1872     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1873     if (G1CollectedHeap::use_parallel_gc_threads()) {
1874       int n_workers = g1h->workers()->total_workers();
1875       g1h->set_par_threads(n_workers);
1876       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1877       g1h->set_par_threads(0);
1878 
1879       assert(g1h->check_heap_region_claim_values(
1880                                             HeapRegion::ScrubRemSetClaimValue),
1881              "sanity check");
1882     } else {
1883       g1_par_scrub_rs_task.work(0);
1884     }
1885 
1886     double rs_scrub_end = os::elapsedTime();
1887     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1888     _total_rs_scrub_time += this_rs_scrub_time;
1889   }
1890 
1891   // this will also free any regions totally full of garbage objects,
1892   // and sort the regions.
1893   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1894                         g1_par_note_end_task.freed_bytes(),
1895                         g1_par_note_end_task.max_live_bytes());
1896 
1897   // Statistics.
1898   double end = os::elapsedTime();
1899   _cleanup_times.add((end - start) * 1000.0);
1900 
1901   // G1CollectedHeap::heap()->print();
1902   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1903   // G1CollectedHeap::heap()->get_gc_time_stamp());
1904 
1905   if (PrintGC || PrintGCDetails) {
1906     g1h->print_size_transition(gclog_or_tty,
1907                                start_used_bytes,
1908                                g1h->used(),
1909                                g1h->capacity());
1910   }
1911 
1912   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1913   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1914 
1915   // Clean up will have freed any regions completely full of garbage.
1916   // Update the soft reference policy with the new heap occupancy.
1917   Universe::update_heap_info_at_gc();
1918 
1919   // We need to make this be a "collection" so any collection pause that
1920   // races with it goes around and waits for completeCleanup to finish.
1921   g1h->increment_total_collections();
1922 
1923   if (VerifyDuringGC) {
1924     HandleMark hm;  // handle scope
1925     gclog_or_tty->print(" VerifyDuringGC:(after)");
1926     Universe::heap()->prepare_for_verify();
1927     Universe::verify(/* allow dirty */ true,
1928                      /* silent      */ false,
1929                      /* option      */ VerifyOption_G1UsePrevMarking);
1930   }
1931 
1932   g1h->verify_region_sets_optional();
1933 }
1934 
1935 void ConcurrentMark::completeCleanup() {
1936   if (has_aborted()) return;
1937 
1938   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1939 
1940   _cleanup_list.verify_optional();
1941   FreeRegionList tmp_free_list("Tmp Free List");
1942 
1943   if (G1ConcRegionFreeingVerbose) {
1944     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1945                            "cleanup list has "SIZE_FORMAT" entries",
1946                            _cleanup_list.length());
1947   }
1948 
1949   // Noone else should be accessing the _cleanup_list at this point,
1950   // so it's not necessary to take any locks
1951   while (!_cleanup_list.is_empty()) {
1952     HeapRegion* hr = _cleanup_list.remove_head();
1953     assert(hr != NULL, "the list was not empty");
1954     hr->par_clear();
1955     tmp_free_list.add_as_tail(hr);
1956 
1957     // Instead of adding one region at a time to the secondary_free_list,
1958     // we accumulate them in the local list and move them a few at a
1959     // time. This also cuts down on the number of notify_all() calls
1960     // we do during this process. We'll also append the local list when
1961     // _cleanup_list is empty (which means we just removed the last
1962     // region from the _cleanup_list).
1963     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1964         _cleanup_list.is_empty()) {
1965       if (G1ConcRegionFreeingVerbose) {
1966         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1967                                "appending "SIZE_FORMAT" entries to the "
1968                                "secondary_free_list, clean list still has "
1969                                SIZE_FORMAT" entries",
1970                                tmp_free_list.length(),
1971                                _cleanup_list.length());
1972       }
1973 
1974       {
1975         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1976         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1977         SecondaryFreeList_lock->notify_all();
1978       }
1979 
1980       if (G1StressConcRegionFreeing) {
1981         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1982           os::sleep(Thread::current(), (jlong) 1, false);
1983         }
1984       }
1985     }
1986   }
1987   assert(tmp_free_list.is_empty(), "post-condition");
1988 }
1989 
1990 // Support closures for reference procssing in G1
1991 
1992 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1993   HeapWord* addr = (HeapWord*)obj;
1994   return addr != NULL &&
1995          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1996 }
1997 
1998 class G1CMKeepAliveClosure: public OopClosure {
1999   G1CollectedHeap* _g1;
2000   ConcurrentMark*  _cm;
2001   CMBitMap*        _bitMap;
2002  public:
2003   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
2004                        CMBitMap* bitMap) :
2005     _g1(g1), _cm(cm),
2006     _bitMap(bitMap) {}
2007 
2008   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2009   virtual void do_oop(      oop* p) { do_oop_work(p); }
2010 
2011   template <class T> void do_oop_work(T* p) {
2012     oop obj = oopDesc::load_decode_heap_oop(p);
2013     HeapWord* addr = (HeapWord*)obj;
2014 
2015     if (_cm->verbose_high()) {
2016       gclog_or_tty->print_cr("\t[0] we're looking at location "
2017                              "*"PTR_FORMAT" = "PTR_FORMAT,
2018                              p, (void*) obj);
2019     }
2020 
2021     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2022       _bitMap->mark(addr);
2023       _cm->mark_stack_push(obj);
2024     }
2025   }
2026 };
2027 
2028 class G1CMDrainMarkingStackClosure: public VoidClosure {
2029   CMMarkStack*                  _markStack;
2030   CMBitMap*                     _bitMap;
2031   G1CMKeepAliveClosure*         _oopClosure;
2032  public:
2033   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2034                                G1CMKeepAliveClosure* oopClosure) :
2035     _bitMap(bitMap),
2036     _markStack(markStack),
2037     _oopClosure(oopClosure)
2038   {}
2039 
2040   void do_void() {
2041     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2042   }
2043 };
2044 
2045 // 'Keep Alive' closure used by parallel reference processing.
2046 // An instance of this closure is used in the parallel reference processing
2047 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2048 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2049 // placed on to discovered ref lists once so we can mark and push with no
2050 // need to check whether the object has already been marked. Using the
2051 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2052 // operating on the global mark stack. This means that an individual
2053 // worker would be doing lock-free pushes while it processes its own
2054 // discovered ref list followed by drain call. If the discovered ref lists
2055 // are unbalanced then this could cause interference with the other
2056 // workers. Using a CMTask (and its embedded local data structures)
2057 // avoids that potential interference.
2058 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2059   ConcurrentMark*  _cm;
2060   CMTask*          _task;
2061   CMBitMap*        _bitMap;
2062   int              _ref_counter_limit;
2063   int              _ref_counter;
2064  public:
2065   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2066                                   CMTask* task,
2067                                   CMBitMap* bitMap) :
2068     _cm(cm), _task(task), _bitMap(bitMap),
2069     _ref_counter_limit(G1RefProcDrainInterval)
2070   {
2071     assert(_ref_counter_limit > 0, "sanity");
2072     _ref_counter = _ref_counter_limit;
2073   }
2074 
2075   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2076   virtual void do_oop(      oop* p) { do_oop_work(p); }
2077 
2078   template <class T> void do_oop_work(T* p) {
2079     if (!_cm->has_overflown()) {
2080       oop obj = oopDesc::load_decode_heap_oop(p);
2081       if (_cm->verbose_high()) {
2082         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2083                                "*"PTR_FORMAT" = "PTR_FORMAT,
2084                                _task->task_id(), p, (void*) obj);
2085       }
2086 
2087       _task->deal_with_reference(obj);
2088       _ref_counter--;
2089 
2090       if (_ref_counter == 0) {
2091         // We have dealt with _ref_counter_limit references, pushing them and objects
2092         // reachable from them on to the local stack (and possibly the global stack).
2093         // Call do_marking_step() to process these entries. We call the routine in a
2094         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2095         // with the entries that we've pushed as a result of the deal_with_reference
2096         // calls above) or we overflow.
2097         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2098         // while there may still be some work to do. (See the comment at the
2099         // beginning of CMTask::do_marking_step() for those conditions - one of which
2100         // is reaching the specified time target.) It is only when
2101         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2102         // that the marking has completed.
2103         do {
2104           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2105           _task->do_marking_step(mark_step_duration_ms,
2106                                  false /* do_stealing    */,
2107                                  false /* do_termination */);
2108         } while (_task->has_aborted() && !_cm->has_overflown());
2109         _ref_counter = _ref_counter_limit;
2110       }
2111     } else {
2112       if (_cm->verbose_high()) {
2113          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2114       }
2115     }
2116   }
2117 };
2118 
2119 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2120   ConcurrentMark* _cm;
2121   CMTask* _task;
2122  public:
2123   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2124     _cm(cm), _task(task)
2125   {}
2126 
2127   void do_void() {
2128     do {
2129       if (_cm->verbose_high()) {
2130         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2131                                _task->task_id());
2132       }
2133 
2134       // We call CMTask::do_marking_step() to completely drain the local and
2135       // global marking stacks. The routine is called in a loop, which we'll
2136       // exit if there's nothing more to do (i.e. we'completely drained the
2137       // entries that were pushed as a result of applying the
2138       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2139       // lists above) or we overflow the global marking stack.
2140       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2141       // while there may still be some work to do. (See the comment at the
2142       // beginning of CMTask::do_marking_step() for those conditions - one of which
2143       // is reaching the specified time target.) It is only when
2144       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2145       // that the marking has completed.
2146 
2147       _task->do_marking_step(1000000000.0 /* something very large */,
2148                              true /* do_stealing    */,
2149                              true /* do_termination */);
2150     } while (_task->has_aborted() && !_cm->has_overflown());
2151   }
2152 };
2153 
2154 // Implementation of AbstractRefProcTaskExecutor for parallel
2155 // reference processing at the end of G1 concurrent marking
2156 
2157 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2158 private:
2159   G1CollectedHeap* _g1h;
2160   ConcurrentMark*  _cm;
2161   CMBitMap*        _bitmap;
2162   WorkGang*        _workers;
2163   int              _active_workers;
2164 
2165 public:
2166   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2167                         ConcurrentMark* cm,
2168                         CMBitMap* bitmap,
2169                         WorkGang* workers,
2170                         int n_workers) :
2171     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2172     _workers(workers), _active_workers(n_workers)
2173   { }
2174 
2175   // Executes the given task using concurrent marking worker threads.
2176   virtual void execute(ProcessTask& task);
2177   virtual void execute(EnqueueTask& task);
2178 };
2179 
2180 class G1CMRefProcTaskProxy: public AbstractGangTask {
2181   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2182   ProcessTask&     _proc_task;
2183   G1CollectedHeap* _g1h;
2184   ConcurrentMark*  _cm;
2185   CMBitMap*        _bitmap;
2186 
2187 public:
2188   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2189                      G1CollectedHeap* g1h,
2190                      ConcurrentMark* cm,
2191                      CMBitMap* bitmap) :
2192     AbstractGangTask("Process reference objects in parallel"),
2193     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2194   {}
2195 
2196   virtual void work(int i) {
2197     CMTask* marking_task = _cm->task(i);
2198     G1CMIsAliveClosure g1_is_alive(_g1h);
2199     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2200     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2201 
2202     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2203   }
2204 };
2205 
2206 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2207   assert(_workers != NULL, "Need parallel worker threads.");
2208 
2209   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2210 
2211   // We need to reset the phase for each task execution so that
2212   // the termination protocol of CMTask::do_marking_step works.
2213   _cm->set_phase(_active_workers, false /* concurrent */);
2214   _g1h->set_par_threads(_active_workers);
2215   _workers->run_task(&proc_task_proxy);
2216   _g1h->set_par_threads(0);
2217 }
2218 
2219 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2220   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2221   EnqueueTask& _enq_task;
2222 
2223 public:
2224   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2225     AbstractGangTask("Enqueue reference objects in parallel"),
2226     _enq_task(enq_task)
2227   { }
2228 
2229   virtual void work(int i) {
2230     _enq_task.work(i);
2231   }
2232 };
2233 
2234 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2235   assert(_workers != NULL, "Need parallel worker threads.");
2236 
2237   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2238 
2239   _g1h->set_par_threads(_active_workers);
2240   _workers->run_task(&enq_task_proxy);
2241   _g1h->set_par_threads(0);
2242 }
2243 
2244 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2245   ResourceMark rm;
2246   HandleMark   hm;
2247   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2248   ReferenceProcessor* rp = g1h->ref_processor_cm();
2249 
2250   // See the comment in G1CollectedHeap::ref_processing_init()
2251   // about how reference processing currently works in G1.
2252 
2253   // Process weak references.
2254   rp->setup_policy(clear_all_soft_refs);
2255   assert(_markStack.isEmpty(), "mark stack should be empty");
2256 
2257   G1CMIsAliveClosure   g1_is_alive(g1h);
2258   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2259   G1CMDrainMarkingStackClosure
2260     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2261   // We use the work gang from the G1CollectedHeap and we utilize all
2262   // the worker threads.
2263   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2264   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2265 
2266   G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2267                                           g1h->workers(), active_workers);
2268 
2269 
2270   if (rp->processing_is_mt()) {
2271     // Set the degree of MT here.  If the discovery is done MT, there
2272     // may have been a different number of threads doing the discovery
2273     // and a different number of discovered lists may have Ref objects.
2274     // That is OK as long as the Reference lists are balanced (see
2275     // balance_all_queues() and balance_queues()).
2276     rp->set_active_mt_degree(active_workers);
2277 
2278     rp->process_discovered_references(&g1_is_alive,
2279                                       &g1_keep_alive,
2280                                       &g1_drain_mark_stack,
2281                                       &par_task_executor);
2282 
2283     // The work routines of the parallel keep_alive and drain_marking_stack
2284     // will set the has_overflown flag if we overflow the global marking
2285     // stack.
2286   } else {
2287     rp->process_discovered_references(&g1_is_alive,
2288                                       &g1_keep_alive,
2289                                       &g1_drain_mark_stack,
2290                                       NULL);
2291 
2292   }
2293 
2294   assert(_markStack.overflow() || _markStack.isEmpty(),
2295       "mark stack should be empty (unless it overflowed)");
2296   if (_markStack.overflow()) {
2297     // Should have been done already when we tried to push an
2298     // entry on to the global mark stack. But let's do it again.
2299     set_has_overflown();
2300   }
2301 
2302   if (rp->processing_is_mt()) {
2303     assert(rp->num_q() == active_workers, "why not");
2304     rp->enqueue_discovered_references(&par_task_executor);
2305   } else {
2306     rp->enqueue_discovered_references();
2307   }
2308 
2309   rp->verify_no_references_recorded();
2310   assert(!rp->discovery_enabled(), "Post condition");
2311 
2312   // Now clean up stale oops in StringTable
2313   StringTable::unlink(&g1_is_alive);
2314   // Clean up unreferenced symbols in symbol table.
2315   SymbolTable::unlink();
2316 }
2317 
2318 void ConcurrentMark::swapMarkBitMaps() {
2319   CMBitMapRO* temp = _prevMarkBitMap;
2320   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2321   _nextMarkBitMap  = (CMBitMap*)  temp;
2322 }
2323 
2324 class CMRemarkTask: public AbstractGangTask {
2325 private:
2326   ConcurrentMark *_cm;
2327 
2328 public:
2329   void work(int worker_i) {
2330     // Since all available tasks are actually started, we should
2331     // only proceed if we're supposed to be actived.
2332     if ((size_t)worker_i < _cm->active_tasks()) {
2333       CMTask* task = _cm->task(worker_i);
2334       task->record_start_time();
2335       do {
2336         task->do_marking_step(1000000000.0 /* something very large */,
2337                               true /* do_stealing    */,
2338                               true /* do_termination */);
2339       } while (task->has_aborted() && !_cm->has_overflown());
2340       // If we overflow, then we do not want to restart. We instead
2341       // want to abort remark and do concurrent marking again.
2342       task->record_end_time();
2343     }
2344   }
2345 
2346   CMRemarkTask(ConcurrentMark* cm) :
2347     AbstractGangTask("Par Remark"), _cm(cm) { }
2348 };
2349 
2350 void ConcurrentMark::checkpointRootsFinalWork() {
2351   ResourceMark rm;
2352   HandleMark   hm;
2353   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2354 
2355   g1h->ensure_parsability(false);
2356 
2357   if (G1CollectedHeap::use_parallel_gc_threads()) {
2358     G1CollectedHeap::StrongRootsScope srs(g1h);
2359     // this is remark, so we'll use up all available threads
2360     int active_workers = ParallelGCThreads;
2361     set_phase(active_workers, false /* concurrent */);
2362 
2363     CMRemarkTask remarkTask(this);
2364     // We will start all available threads, even if we decide that the
2365     // active_workers will be fewer. The extra ones will just bail out
2366     // immediately.
2367     int n_workers = g1h->workers()->total_workers();
2368     g1h->set_par_threads(n_workers);
2369     g1h->workers()->run_task(&remarkTask);
2370     g1h->set_par_threads(0);
2371   } else {
2372     G1CollectedHeap::StrongRootsScope srs(g1h);
2373     // this is remark, so we'll use up all available threads
2374     int active_workers = 1;
2375     set_phase(active_workers, false /* concurrent */);
2376 
2377     CMRemarkTask remarkTask(this);
2378     // We will start all available threads, even if we decide that the
2379     // active_workers will be fewer. The extra ones will just bail out
2380     // immediately.
2381     remarkTask.work(0);
2382   }
2383   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2384   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2385 
2386   print_stats();
2387 
2388 #if VERIFY_OBJS_PROCESSED
2389   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2390     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2391                            _scan_obj_cl.objs_processed,
2392                            ThreadLocalObjQueue::objs_enqueued);
2393     guarantee(_scan_obj_cl.objs_processed ==
2394               ThreadLocalObjQueue::objs_enqueued,
2395               "Different number of objs processed and enqueued.");
2396   }
2397 #endif
2398 }
2399 
2400 #ifndef PRODUCT
2401 
2402 class PrintReachableOopClosure: public OopClosure {
2403 private:
2404   G1CollectedHeap* _g1h;
2405   outputStream*    _out;
2406   VerifyOption     _vo;
2407   bool             _all;
2408 
2409 public:
2410   PrintReachableOopClosure(outputStream* out,
2411                            VerifyOption  vo,
2412                            bool          all) :
2413     _g1h(G1CollectedHeap::heap()),
2414     _out(out), _vo(vo), _all(all) { }
2415 
2416   void do_oop(narrowOop* p) { do_oop_work(p); }
2417   void do_oop(      oop* p) { do_oop_work(p); }
2418 
2419   template <class T> void do_oop_work(T* p) {
2420     oop         obj = oopDesc::load_decode_heap_oop(p);
2421     const char* str = NULL;
2422     const char* str2 = "";
2423 
2424     if (obj == NULL) {
2425       str = "";
2426     } else if (!_g1h->is_in_g1_reserved(obj)) {
2427       str = " O";
2428     } else {
2429       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2430       guarantee(hr != NULL, "invariant");
2431       bool over_tams = false;
2432       bool marked = false;
2433 
2434       switch (_vo) {
2435         case VerifyOption_G1UsePrevMarking:
2436           over_tams = hr->obj_allocated_since_prev_marking(obj);
2437           marked = _g1h->isMarkedPrev(obj);
2438           break;
2439         case VerifyOption_G1UseNextMarking:
2440           over_tams = hr->obj_allocated_since_next_marking(obj);
2441           marked = _g1h->isMarkedNext(obj);
2442           break;
2443         case VerifyOption_G1UseMarkWord:
2444           marked = obj->is_gc_marked();
2445           break;
2446         default:
2447           ShouldNotReachHere();
2448       }
2449 
2450       if (over_tams) {
2451         str = " >";
2452         if (marked) {
2453           str2 = " AND MARKED";
2454         }
2455       } else if (marked) {
2456         str = " M";
2457       } else {
2458         str = " NOT";
2459       }
2460     }
2461 
2462     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2463                    p, (void*) obj, str, str2);
2464   }
2465 };
2466 
2467 class PrintReachableObjectClosure : public ObjectClosure {
2468 private:
2469   G1CollectedHeap* _g1h;
2470   outputStream*    _out;
2471   VerifyOption     _vo;
2472   bool             _all;
2473   HeapRegion*      _hr;
2474 
2475 public:
2476   PrintReachableObjectClosure(outputStream* out,
2477                               VerifyOption  vo,
2478                               bool          all,
2479                               HeapRegion*   hr) :
2480     _g1h(G1CollectedHeap::heap()),
2481     _out(out), _vo(vo), _all(all), _hr(hr) { }
2482 
2483   void do_object(oop o) {
2484     bool over_tams = false;
2485     bool marked = false;
2486 
2487     switch (_vo) {
2488       case VerifyOption_G1UsePrevMarking:
2489         over_tams = _hr->obj_allocated_since_prev_marking(o);
2490         marked = _g1h->isMarkedPrev(o);
2491         break;
2492       case VerifyOption_G1UseNextMarking:
2493         over_tams = _hr->obj_allocated_since_next_marking(o);
2494         marked = _g1h->isMarkedNext(o);
2495         break;
2496       case VerifyOption_G1UseMarkWord:
2497         marked = o->is_gc_marked();
2498         break;
2499       default:
2500         ShouldNotReachHere();
2501     }
2502     bool print_it = _all || over_tams || marked;
2503 
2504     if (print_it) {
2505       _out->print_cr(" "PTR_FORMAT"%s",
2506                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2507       PrintReachableOopClosure oopCl(_out, _vo, _all);
2508       o->oop_iterate(&oopCl);
2509     }
2510   }
2511 };
2512 
2513 class PrintReachableRegionClosure : public HeapRegionClosure {
2514 private:
2515   outputStream* _out;
2516   VerifyOption  _vo;
2517   bool          _all;
2518 
2519 public:
2520   bool doHeapRegion(HeapRegion* hr) {
2521     HeapWord* b = hr->bottom();
2522     HeapWord* e = hr->end();
2523     HeapWord* t = hr->top();
2524     HeapWord* p = NULL;
2525 
2526     switch (_vo) {
2527       case VerifyOption_G1UsePrevMarking:
2528         p = hr->prev_top_at_mark_start();
2529         break;
2530       case VerifyOption_G1UseNextMarking:
2531         p = hr->next_top_at_mark_start();
2532         break;
2533       case VerifyOption_G1UseMarkWord:
2534         // When we are verifying marking using the mark word
2535         // TAMS has no relevance.
2536         assert(p == NULL, "post-condition");
2537         break;
2538       default:
2539         ShouldNotReachHere();
2540     }
2541     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2542                    "TAMS: "PTR_FORMAT, b, e, t, p);
2543     _out->cr();
2544 
2545     HeapWord* from = b;
2546     HeapWord* to   = t;
2547 
2548     if (to > from) {
2549       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2550       _out->cr();
2551       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2552       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2553       _out->cr();
2554     }
2555 
2556     return false;
2557   }
2558 
2559   PrintReachableRegionClosure(outputStream* out,
2560                               VerifyOption  vo,
2561                               bool          all) :
2562     _out(out), _vo(vo), _all(all) { }
2563 };
2564 
2565 static const char* verify_option_to_tams(VerifyOption vo) {
2566   switch (vo) {
2567     case VerifyOption_G1UsePrevMarking:
2568       return "PTAMS";
2569     case VerifyOption_G1UseNextMarking:
2570       return "NTAMS";
2571     default:
2572       return "NONE";
2573   }
2574 }
2575 
2576 void ConcurrentMark::print_reachable(const char* str,
2577                                      VerifyOption vo,
2578                                      bool all) {
2579   gclog_or_tty->cr();
2580   gclog_or_tty->print_cr("== Doing heap dump... ");
2581 
2582   if (G1PrintReachableBaseFile == NULL) {
2583     gclog_or_tty->print_cr("  #### error: no base file defined");
2584     return;
2585   }
2586 
2587   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2588       (JVM_MAXPATHLEN - 1)) {
2589     gclog_or_tty->print_cr("  #### error: file name too long");
2590     return;
2591   }
2592 
2593   char file_name[JVM_MAXPATHLEN];
2594   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2595   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2596 
2597   fileStream fout(file_name);
2598   if (!fout.is_open()) {
2599     gclog_or_tty->print_cr("  #### error: could not open file");
2600     return;
2601   }
2602 
2603   outputStream* out = &fout;
2604   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2605   out->cr();
2606 
2607   out->print_cr("--- ITERATING OVER REGIONS");
2608   out->cr();
2609   PrintReachableRegionClosure rcl(out, vo, all);
2610   _g1h->heap_region_iterate(&rcl);
2611   out->cr();
2612 
2613   gclog_or_tty->print_cr("  done");
2614   gclog_or_tty->flush();
2615 }
2616 
2617 #endif // PRODUCT
2618 
2619 // This note is for drainAllSATBBuffers and the code in between.
2620 // In the future we could reuse a task to do this work during an
2621 // evacuation pause (since now tasks are not active and can be claimed
2622 // during an evacuation pause). This was a late change to the code and
2623 // is currently not being taken advantage of.
2624 
2625 class CMGlobalObjectClosure : public ObjectClosure {
2626 private:
2627   ConcurrentMark* _cm;
2628 
2629 public:
2630   void do_object(oop obj) {
2631     _cm->deal_with_reference(obj);
2632   }
2633 
2634   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2635 };
2636 
2637 void ConcurrentMark::deal_with_reference(oop obj) {
2638   if (verbose_high()) {
2639     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2640                            (void*) obj);
2641   }
2642 
2643   HeapWord* objAddr = (HeapWord*) obj;
2644   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2645   if (_g1h->is_in_g1_reserved(objAddr)) {
2646     assert(obj != NULL, "null check is implicit");
2647     if (!_nextMarkBitMap->isMarked(objAddr)) {
2648       // Only get the containing region if the object is not marked on the
2649       // bitmap (otherwise, it's a waste of time since we won't do
2650       // anything with it).
2651       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2652       if (!hr->obj_allocated_since_next_marking(obj)) {
2653         if (verbose_high()) {
2654           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2655                                  "marked", (void*) obj);
2656         }
2657 
2658         // we need to mark it first
2659         if (_nextMarkBitMap->parMark(objAddr)) {
2660           // No OrderAccess:store_load() is needed. It is implicit in the
2661           // CAS done in parMark(objAddr) above
2662           HeapWord* finger = _finger;
2663           if (objAddr < finger) {
2664             if (verbose_high()) {
2665               gclog_or_tty->print_cr("[global] below the global finger "
2666                                      "("PTR_FORMAT"), pushing it", finger);
2667             }
2668             if (!mark_stack_push(obj)) {
2669               if (verbose_low()) {
2670                 gclog_or_tty->print_cr("[global] global stack overflow during "
2671                                        "deal_with_reference");
2672               }
2673             }
2674           }
2675         }
2676       }
2677     }
2678   }
2679 }
2680 
2681 void ConcurrentMark::drainAllSATBBuffers() {
2682   CMGlobalObjectClosure oc(this);
2683   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2684   satb_mq_set.set_closure(&oc);
2685 
2686   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2687     if (verbose_medium()) {
2688       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2689     }
2690   }
2691 
2692   // no need to check whether we should do this, as this is only
2693   // called during an evacuation pause
2694   satb_mq_set.iterate_closure_all_threads();
2695 
2696   satb_mq_set.set_closure(NULL);
2697   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2698 }
2699 
2700 void ConcurrentMark::markPrev(oop p) {
2701   // Note we are overriding the read-only view of the prev map here, via
2702   // the cast.
2703   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2704 }
2705 
2706 void ConcurrentMark::clear(oop p) {
2707   assert(p != NULL && p->is_oop(), "expected an oop");
2708   HeapWord* addr = (HeapWord*)p;
2709   assert(addr >= _nextMarkBitMap->startWord() ||
2710          addr < _nextMarkBitMap->endWord(), "in a region");
2711 
2712   _nextMarkBitMap->clear(addr);
2713 }
2714 
2715 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2716   // Note we are overriding the read-only view of the prev map here, via
2717   // the cast.
2718   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2719   _nextMarkBitMap->clearRange(mr);
2720 }
2721 
2722 HeapRegion*
2723 ConcurrentMark::claim_region(int task_num) {
2724   // "checkpoint" the finger
2725   HeapWord* finger = _finger;
2726 
2727   // _heap_end will not change underneath our feet; it only changes at
2728   // yield points.
2729   while (finger < _heap_end) {
2730     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2731 
2732     // Note on how this code handles humongous regions. In the
2733     // normal case the finger will reach the start of a "starts
2734     // humongous" (SH) region. Its end will either be the end of the
2735     // last "continues humongous" (CH) region in the sequence, or the
2736     // standard end of the SH region (if the SH is the only region in
2737     // the sequence). That way claim_region() will skip over the CH
2738     // regions. However, there is a subtle race between a CM thread
2739     // executing this method and a mutator thread doing a humongous
2740     // object allocation. The two are not mutually exclusive as the CM
2741     // thread does not need to hold the Heap_lock when it gets
2742     // here. So there is a chance that claim_region() will come across
2743     // a free region that's in the progress of becoming a SH or a CH
2744     // region. In the former case, it will either
2745     //   a) Miss the update to the region's end, in which case it will
2746     //      visit every subsequent CH region, will find their bitmaps
2747     //      empty, and do nothing, or
2748     //   b) Will observe the update of the region's end (in which case
2749     //      it will skip the subsequent CH regions).
2750     // If it comes across a region that suddenly becomes CH, the
2751     // scenario will be similar to b). So, the race between
2752     // claim_region() and a humongous object allocation might force us
2753     // to do a bit of unnecessary work (due to some unnecessary bitmap
2754     // iterations) but it should not introduce and correctness issues.
2755     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2756     HeapWord*   bottom        = curr_region->bottom();
2757     HeapWord*   end           = curr_region->end();
2758     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2759 
2760     if (verbose_low()) {
2761       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2762                              "["PTR_FORMAT", "PTR_FORMAT"), "
2763                              "limit = "PTR_FORMAT,
2764                              task_num, curr_region, bottom, end, limit);
2765     }
2766 
2767     // Is the gap between reading the finger and doing the CAS too long?
2768     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2769     if (res == finger) {
2770       // we succeeded
2771 
2772       // notice that _finger == end cannot be guaranteed here since,
2773       // someone else might have moved the finger even further
2774       assert(_finger >= end, "the finger should have moved forward");
2775 
2776       if (verbose_low()) {
2777         gclog_or_tty->print_cr("[%d] we were successful with region = "
2778                                PTR_FORMAT, task_num, curr_region);
2779       }
2780 
2781       if (limit > bottom) {
2782         if (verbose_low()) {
2783           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2784                                  "returning it ", task_num, curr_region);
2785         }
2786         return curr_region;
2787       } else {
2788         assert(limit == bottom,
2789                "the region limit should be at bottom");
2790         if (verbose_low()) {
2791           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2792                                  "returning NULL", task_num, curr_region);
2793         }
2794         // we return NULL and the caller should try calling
2795         // claim_region() again.
2796         return NULL;
2797       }
2798     } else {
2799       assert(_finger > finger, "the finger should have moved forward");
2800       if (verbose_low()) {
2801         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2802                                "global finger = "PTR_FORMAT", "
2803                                "our finger = "PTR_FORMAT,
2804                                task_num, _finger, finger);
2805       }
2806 
2807       // read it again
2808       finger = _finger;
2809     }
2810   }
2811 
2812   return NULL;
2813 }
2814 
2815 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2816   bool result = false;
2817   for (int i = 0; i < (int)_max_task_num; ++i) {
2818     CMTask* the_task = _tasks[i];
2819     MemRegion mr = the_task->aborted_region();
2820     if (mr.start() != NULL) {
2821       assert(mr.end() != NULL, "invariant");
2822       assert(mr.word_size() > 0, "invariant");
2823       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2824       assert(hr != NULL, "invariant");
2825       if (hr->in_collection_set()) {
2826         // The region points into the collection set
2827         the_task->set_aborted_region(MemRegion());
2828         result = true;
2829       }
2830     }
2831   }
2832   return result;
2833 }
2834 
2835 bool ConcurrentMark::has_aborted_regions() {
2836   for (int i = 0; i < (int)_max_task_num; ++i) {
2837     CMTask* the_task = _tasks[i];
2838     MemRegion mr = the_task->aborted_region();
2839     if (mr.start() != NULL) {
2840       assert(mr.end() != NULL, "invariant");
2841       assert(mr.word_size() > 0, "invariant");
2842       return true;
2843     }
2844   }
2845   return false;
2846 }
2847 
2848 void ConcurrentMark::oops_do(OopClosure* cl) {
2849   if (_markStack.size() > 0 && verbose_low()) {
2850     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2851                            "size = %d", _markStack.size());
2852   }
2853   // we first iterate over the contents of the mark stack...
2854   _markStack.oops_do(cl);
2855 
2856   for (int i = 0; i < (int)_max_task_num; ++i) {
2857     OopTaskQueue* queue = _task_queues->queue((int)i);
2858 
2859     if (queue->size() > 0 && verbose_low()) {
2860       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2861                              "size = %d", i, queue->size());
2862     }
2863 
2864     // ...then over the contents of the all the task queues.
2865     queue->oops_do(cl);
2866   }
2867 
2868   // Invalidate any entries, that are in the region stack, that
2869   // point into the collection set
2870   if (_regionStack.invalidate_entries_into_cset()) {
2871     // otherwise, any gray objects copied during the evacuation pause
2872     // might not be visited.
2873     assert(_should_gray_objects, "invariant");
2874   }
2875 
2876   // Invalidate any aborted regions, recorded in the individual CM
2877   // tasks, that point into the collection set.
2878   if (invalidate_aborted_regions_in_cset()) {
2879     // otherwise, any gray objects copied during the evacuation pause
2880     // might not be visited.
2881     assert(_should_gray_objects, "invariant");
2882   }
2883 
2884 }
2885 
2886 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2887   _markStack.setEmpty();
2888   _markStack.clear_overflow();
2889   _regionStack.setEmpty();
2890   _regionStack.clear_overflow();
2891   if (clear_overflow) {
2892     clear_has_overflown();
2893   } else {
2894     assert(has_overflown(), "pre-condition");
2895   }
2896   _finger = _heap_start;
2897 
2898   for (int i = 0; i < (int)_max_task_num; ++i) {
2899     OopTaskQueue* queue = _task_queues->queue(i);
2900     queue->set_empty();
2901     // Clear any partial regions from the CMTasks
2902     _tasks[i]->clear_aborted_region();
2903   }
2904 }
2905 
2906 void ConcurrentMark::print_stats() {
2907   if (verbose_stats()) {
2908     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2909     for (size_t i = 0; i < _active_tasks; ++i) {
2910       _tasks[i]->print_stats();
2911       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2912     }
2913   }
2914 }
2915 
2916 class CSMarkOopClosure: public OopClosure {
2917   friend class CSMarkBitMapClosure;
2918 
2919   G1CollectedHeap* _g1h;
2920   CMBitMap*        _bm;
2921   ConcurrentMark*  _cm;
2922   oop*             _ms;
2923   jint*            _array_ind_stack;
2924   int              _ms_size;
2925   int              _ms_ind;
2926   int              _array_increment;
2927 
2928   bool push(oop obj, int arr_ind = 0) {
2929     if (_ms_ind == _ms_size) {
2930       gclog_or_tty->print_cr("Mark stack is full.");
2931       return false;
2932     }
2933     _ms[_ms_ind] = obj;
2934     if (obj->is_objArray()) {
2935       _array_ind_stack[_ms_ind] = arr_ind;
2936     }
2937     _ms_ind++;
2938     return true;
2939   }
2940 
2941   oop pop() {
2942     if (_ms_ind == 0) {
2943       return NULL;
2944     } else {
2945       _ms_ind--;
2946       return _ms[_ms_ind];
2947     }
2948   }
2949 
2950   template <class T> bool drain() {
2951     while (_ms_ind > 0) {
2952       oop obj = pop();
2953       assert(obj != NULL, "Since index was non-zero.");
2954       if (obj->is_objArray()) {
2955         jint arr_ind = _array_ind_stack[_ms_ind];
2956         objArrayOop aobj = objArrayOop(obj);
2957         jint len = aobj->length();
2958         jint next_arr_ind = arr_ind + _array_increment;
2959         if (next_arr_ind < len) {
2960           push(obj, next_arr_ind);
2961         }
2962         // Now process this portion of this one.
2963         int lim = MIN2(next_arr_ind, len);
2964         for (int j = arr_ind; j < lim; j++) {
2965           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2966         }
2967 
2968       } else {
2969         obj->oop_iterate(this);
2970       }
2971       if (abort()) return false;
2972     }
2973     return true;
2974   }
2975 
2976 public:
2977   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2978     _g1h(G1CollectedHeap::heap()),
2979     _cm(cm),
2980     _bm(cm->nextMarkBitMap()),
2981     _ms_size(ms_size), _ms_ind(0),
2982     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2983     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2984     _array_increment(MAX2(ms_size/8, 16))
2985   {}
2986 
2987   ~CSMarkOopClosure() {
2988     FREE_C_HEAP_ARRAY(oop, _ms);
2989     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2990   }
2991 
2992   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2993   virtual void do_oop(      oop* p) { do_oop_work(p); }
2994 
2995   template <class T> void do_oop_work(T* p) {
2996     T heap_oop = oopDesc::load_heap_oop(p);
2997     if (oopDesc::is_null(heap_oop)) return;
2998     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2999     if (obj->is_forwarded()) {
3000       // If the object has already been forwarded, we have to make sure
3001       // that it's marked.  So follow the forwarding pointer.  Note that
3002       // this does the right thing for self-forwarding pointers in the
3003       // evacuation failure case.
3004       obj = obj->forwardee();
3005     }
3006     HeapRegion* hr = _g1h->heap_region_containing(obj);
3007     if (hr != NULL) {
3008       if (hr->in_collection_set()) {
3009         if (_g1h->is_obj_ill(obj)) {
3010           _bm->mark((HeapWord*)obj);
3011           if (!push(obj)) {
3012             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
3013             set_abort();
3014           }
3015         }
3016       } else {
3017         // Outside the collection set; we need to gray it
3018         _cm->deal_with_reference(obj);
3019       }
3020     }
3021   }
3022 };
3023 
3024 class CSMarkBitMapClosure: public BitMapClosure {
3025   G1CollectedHeap* _g1h;
3026   CMBitMap*        _bitMap;
3027   ConcurrentMark*  _cm;
3028   CSMarkOopClosure _oop_cl;
3029 public:
3030   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
3031     _g1h(G1CollectedHeap::heap()),
3032     _bitMap(cm->nextMarkBitMap()),
3033     _oop_cl(cm, ms_size)
3034   {}
3035 
3036   ~CSMarkBitMapClosure() {}
3037 
3038   bool do_bit(size_t offset) {
3039     // convert offset into a HeapWord*
3040     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3041     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3042            "address out of range");
3043     assert(_bitMap->isMarked(addr), "tautology");
3044     oop obj = oop(addr);
3045     if (!obj->is_forwarded()) {
3046       if (!_oop_cl.push(obj)) return false;
3047       if (UseCompressedOops) {
3048         if (!_oop_cl.drain<narrowOop>()) return false;
3049       } else {
3050         if (!_oop_cl.drain<oop>()) return false;
3051       }
3052     }
3053     // Otherwise...
3054     return true;
3055   }
3056 };
3057 
3058 
3059 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
3060   CMBitMap* _bm;
3061   CSMarkBitMapClosure _bit_cl;
3062   enum SomePrivateConstants {
3063     MSSize = 1000
3064   };
3065   bool _completed;
3066 public:
3067   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3068     _bm(cm->nextMarkBitMap()),
3069     _bit_cl(cm, MSSize),
3070     _completed(true)
3071   {}
3072 
3073   ~CompleteMarkingInCSHRClosure() {}
3074 
3075   bool doHeapRegion(HeapRegion* r) {
3076     if (!r->evacuation_failed()) {
3077       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3078       if (!mr.is_empty()) {
3079         if (!_bm->iterate(&_bit_cl, mr)) {
3080           _completed = false;
3081           return true;
3082         }
3083       }
3084     }
3085     return false;
3086   }
3087 
3088   bool completed() { return _completed; }
3089 };
3090 
3091 class ClearMarksInHRClosure: public HeapRegionClosure {
3092   CMBitMap* _bm;
3093 public:
3094   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3095 
3096   bool doHeapRegion(HeapRegion* r) {
3097     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3098       MemRegion usedMR = r->used_region();
3099       _bm->clearRange(r->used_region());
3100     }
3101     return false;
3102   }
3103 };
3104 
3105 void ConcurrentMark::complete_marking_in_collection_set() {
3106   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3107 
3108   if (!g1h->mark_in_progress()) {
3109     g1h->g1_policy()->record_mark_closure_time(0.0);
3110     return;
3111   }
3112 
3113   int i = 1;
3114   double start = os::elapsedTime();
3115   while (true) {
3116     i++;
3117     CompleteMarkingInCSHRClosure cmplt(this);
3118     g1h->collection_set_iterate(&cmplt);
3119     if (cmplt.completed()) break;
3120   }
3121   double end_time = os::elapsedTime();
3122   double elapsed_time_ms = (end_time - start) * 1000.0;
3123   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3124 
3125   ClearMarksInHRClosure clr(nextMarkBitMap());
3126   g1h->collection_set_iterate(&clr);
3127 }
3128 
3129 // The next two methods deal with the following optimisation. Some
3130 // objects are gray by being marked and located above the finger. If
3131 // they are copied, during an evacuation pause, below the finger then
3132 // the need to be pushed on the stack. The observation is that, if
3133 // there are no regions in the collection set located above the
3134 // finger, then the above cannot happen, hence we do not need to
3135 // explicitly gray any objects when copying them to below the
3136 // finger. The global stack will be scanned to ensure that, if it
3137 // points to objects being copied, it will update their
3138 // location. There is a tricky situation with the gray objects in
3139 // region stack that are being coped, however. See the comment in
3140 // newCSet().
3141 
3142 void ConcurrentMark::newCSet() {
3143   if (!concurrent_marking_in_progress()) {
3144     // nothing to do if marking is not in progress
3145     return;
3146   }
3147 
3148   // find what the lowest finger is among the global and local fingers
3149   _min_finger = _finger;
3150   for (int i = 0; i < (int)_max_task_num; ++i) {
3151     CMTask* task = _tasks[i];
3152     HeapWord* task_finger = task->finger();
3153     if (task_finger != NULL && task_finger < _min_finger) {
3154       _min_finger = task_finger;
3155     }
3156   }
3157 
3158   _should_gray_objects = false;
3159 
3160   // This fixes a very subtle and fustrating bug. It might be the case
3161   // that, during en evacuation pause, heap regions that contain
3162   // objects that are gray (by being in regions contained in the
3163   // region stack) are included in the collection set. Since such gray
3164   // objects will be moved, and because it's not easy to redirect
3165   // region stack entries to point to a new location (because objects
3166   // in one region might be scattered to multiple regions after they
3167   // are copied), one option is to ensure that all marked objects
3168   // copied during a pause are pushed on the stack. Notice, however,
3169   // that this problem can only happen when the region stack is not
3170   // empty during an evacuation pause. So, we make the fix a bit less
3171   // conservative and ensure that regions are pushed on the stack,
3172   // irrespective whether all collection set regions are below the
3173   // finger, if the region stack is not empty. This is expected to be
3174   // a rare case, so I don't think it's necessary to be smarted about it.
3175   if (!region_stack_empty() || has_aborted_regions()) {
3176     _should_gray_objects = true;
3177   }
3178 }
3179 
3180 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3181   if (!concurrent_marking_in_progress()) return;
3182 
3183   HeapWord* region_end = hr->end();
3184   if (region_end > _min_finger) {
3185     _should_gray_objects = true;
3186   }
3187 }
3188 
3189 // Resets the region fields of active CMTasks whose values point
3190 // into the collection set.
3191 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3192   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3193   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3194 
3195   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3196     CMTask* task = _tasks[i];
3197     HeapWord* task_finger = task->finger();
3198     if (task_finger != NULL) {
3199       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3200       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3201       if (finger_region->in_collection_set()) {
3202         // The task's current region is in the collection set.
3203         // This region will be evacuated in the current GC and
3204         // the region fields in the task will be stale.
3205         task->giveup_current_region();
3206       }
3207     }
3208   }
3209 }
3210 
3211 // abandon current marking iteration due to a Full GC
3212 void ConcurrentMark::abort() {
3213   // Clear all marks to force marking thread to do nothing
3214   _nextMarkBitMap->clearAll();
3215   // Empty mark stack
3216   clear_marking_state();
3217   for (int i = 0; i < (int)_max_task_num; ++i) {
3218     _tasks[i]->clear_region_fields();
3219   }
3220   _has_aborted = true;
3221 
3222   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3223   satb_mq_set.abandon_partial_marking();
3224   // This can be called either during or outside marking, we'll read
3225   // the expected_active value from the SATB queue set.
3226   satb_mq_set.set_active_all_threads(
3227                                  false, /* new active value */
3228                                  satb_mq_set.is_active() /* expected_active */);
3229 }
3230 
3231 static void print_ms_time_info(const char* prefix, const char* name,
3232                                NumberSeq& ns) {
3233   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3234                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3235   if (ns.num() > 0) {
3236     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3237                            prefix, ns.sd(), ns.maximum());
3238   }
3239 }
3240 
3241 void ConcurrentMark::print_summary_info() {
3242   gclog_or_tty->print_cr(" Concurrent marking:");
3243   print_ms_time_info("  ", "init marks", _init_times);
3244   print_ms_time_info("  ", "remarks", _remark_times);
3245   {
3246     print_ms_time_info("     ", "final marks", _remark_mark_times);
3247     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3248 
3249   }
3250   print_ms_time_info("  ", "cleanups", _cleanup_times);
3251   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3252                          _total_counting_time,
3253                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3254                           (double)_cleanup_times.num()
3255                          : 0.0));
3256   if (G1ScrubRemSets) {
3257     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3258                            _total_rs_scrub_time,
3259                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3260                             (double)_cleanup_times.num()
3261                            : 0.0));
3262   }
3263   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3264                          (_init_times.sum() + _remark_times.sum() +
3265                           _cleanup_times.sum())/1000.0);
3266   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3267                 "(%8.2f s marking, %8.2f s counting).",
3268                 cmThread()->vtime_accum(),
3269                 cmThread()->vtime_mark_accum(),
3270                 cmThread()->vtime_count_accum());
3271 }
3272 
3273 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3274   _parallel_workers->print_worker_threads_on(st);
3275 }
3276 
3277 // Closures
3278 // XXX: there seems to be a lot of code  duplication here;
3279 // should refactor and consolidate the shared code.
3280 
3281 // This closure is used to mark refs into the CMS generation in
3282 // the CMS bit map. Called at the first checkpoint.
3283 
3284 // We take a break if someone is trying to stop the world.
3285 bool ConcurrentMark::do_yield_check(int worker_i) {
3286   if (should_yield()) {
3287     if (worker_i == 0) {
3288       _g1h->g1_policy()->record_concurrent_pause();
3289     }
3290     cmThread()->yield();
3291     if (worker_i == 0) {
3292       _g1h->g1_policy()->record_concurrent_pause_end();
3293     }
3294     return true;
3295   } else {
3296     return false;
3297   }
3298 }
3299 
3300 bool ConcurrentMark::should_yield() {
3301   return cmThread()->should_yield();
3302 }
3303 
3304 bool ConcurrentMark::containing_card_is_marked(void* p) {
3305   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3306   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3307 }
3308 
3309 bool ConcurrentMark::containing_cards_are_marked(void* start,
3310                                                  void* last) {
3311   return containing_card_is_marked(start) &&
3312          containing_card_is_marked(last);
3313 }
3314 
3315 #ifndef PRODUCT
3316 // for debugging purposes
3317 void ConcurrentMark::print_finger() {
3318   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3319                          _heap_start, _heap_end, _finger);
3320   for (int i = 0; i < (int) _max_task_num; ++i) {
3321     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3322   }
3323   gclog_or_tty->print_cr("");
3324 }
3325 #endif
3326 
3327 void CMTask::scan_object(oop obj) {
3328   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3329 
3330   if (_cm->verbose_high()) {
3331     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3332                            _task_id, (void*) obj);
3333   }
3334 
3335   size_t obj_size = obj->size();
3336   _words_scanned += obj_size;
3337 
3338   obj->oop_iterate(_cm_oop_closure);
3339   statsOnly( ++_objs_scanned );
3340   check_limits();
3341 }
3342 
3343 // Closure for iteration over bitmaps
3344 class CMBitMapClosure : public BitMapClosure {
3345 private:
3346   // the bitmap that is being iterated over
3347   CMBitMap*                   _nextMarkBitMap;
3348   ConcurrentMark*             _cm;
3349   CMTask*                     _task;
3350   // true if we're scanning a heap region claimed by the task (so that
3351   // we move the finger along), false if we're not, i.e. currently when
3352   // scanning a heap region popped from the region stack (so that we
3353   // do not move the task finger along; it'd be a mistake if we did so).
3354   bool                        _scanning_heap_region;
3355 
3356 public:
3357   CMBitMapClosure(CMTask *task,
3358                   ConcurrentMark* cm,
3359                   CMBitMap* nextMarkBitMap)
3360     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3361 
3362   void set_scanning_heap_region(bool scanning_heap_region) {
3363     _scanning_heap_region = scanning_heap_region;
3364   }
3365 
3366   bool do_bit(size_t offset) {
3367     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3368     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3369     assert( addr < _cm->finger(), "invariant");
3370 
3371     if (_scanning_heap_region) {
3372       statsOnly( _task->increase_objs_found_on_bitmap() );
3373       assert(addr >= _task->finger(), "invariant");
3374       // We move that task's local finger along.
3375       _task->move_finger_to(addr);
3376     } else {
3377       // We move the task's region finger along.
3378       _task->move_region_finger_to(addr);
3379     }
3380 
3381     _task->scan_object(oop(addr));
3382     // we only partially drain the local queue and global stack
3383     _task->drain_local_queue(true);
3384     _task->drain_global_stack(true);
3385 
3386     // if the has_aborted flag has been raised, we need to bail out of
3387     // the iteration
3388     return !_task->has_aborted();
3389   }
3390 };
3391 
3392 // Closure for iterating over objects, currently only used for
3393 // processing SATB buffers.
3394 class CMObjectClosure : public ObjectClosure {
3395 private:
3396   CMTask* _task;
3397 
3398 public:
3399   void do_object(oop obj) {
3400     _task->deal_with_reference(obj);
3401   }
3402 
3403   CMObjectClosure(CMTask* task) : _task(task) { }
3404 };
3405 
3406 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3407                                ConcurrentMark* cm,
3408                                CMTask* task)
3409   : _g1h(g1h), _cm(cm), _task(task) {
3410   assert(_ref_processor == NULL, "should be initialized to NULL");
3411 
3412   if (G1UseConcMarkReferenceProcessing) {
3413     _ref_processor = g1h->ref_processor_cm();
3414     assert(_ref_processor != NULL, "should not be NULL");
3415   }
3416 }
3417 
3418 void CMTask::setup_for_region(HeapRegion* hr) {
3419   // Separated the asserts so that we know which one fires.
3420   assert(hr != NULL,
3421         "claim_region() should have filtered out continues humongous regions");
3422   assert(!hr->continuesHumongous(),
3423         "claim_region() should have filtered out continues humongous regions");
3424 
3425   if (_cm->verbose_low()) {
3426     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3427                            _task_id, hr);
3428   }
3429 
3430   _curr_region  = hr;
3431   _finger       = hr->bottom();
3432   update_region_limit();
3433 }
3434 
3435 void CMTask::update_region_limit() {
3436   HeapRegion* hr            = _curr_region;
3437   HeapWord* bottom          = hr->bottom();
3438   HeapWord* limit           = hr->next_top_at_mark_start();
3439 
3440   if (limit == bottom) {
3441     if (_cm->verbose_low()) {
3442       gclog_or_tty->print_cr("[%d] found an empty region "
3443                              "["PTR_FORMAT", "PTR_FORMAT")",
3444                              _task_id, bottom, limit);
3445     }
3446     // The region was collected underneath our feet.
3447     // We set the finger to bottom to ensure that the bitmap
3448     // iteration that will follow this will not do anything.
3449     // (this is not a condition that holds when we set the region up,
3450     // as the region is not supposed to be empty in the first place)
3451     _finger = bottom;
3452   } else if (limit >= _region_limit) {
3453     assert(limit >= _finger, "peace of mind");
3454   } else {
3455     assert(limit < _region_limit, "only way to get here");
3456     // This can happen under some pretty unusual circumstances.  An
3457     // evacuation pause empties the region underneath our feet (NTAMS
3458     // at bottom). We then do some allocation in the region (NTAMS
3459     // stays at bottom), followed by the region being used as a GC
3460     // alloc region (NTAMS will move to top() and the objects
3461     // originally below it will be grayed). All objects now marked in
3462     // the region are explicitly grayed, if below the global finger,
3463     // and we do not need in fact to scan anything else. So, we simply
3464     // set _finger to be limit to ensure that the bitmap iteration
3465     // doesn't do anything.
3466     _finger = limit;
3467   }
3468 
3469   _region_limit = limit;
3470 }
3471 
3472 void CMTask::giveup_current_region() {
3473   assert(_curr_region != NULL, "invariant");
3474   if (_cm->verbose_low()) {
3475     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3476                            _task_id, _curr_region);
3477   }
3478   clear_region_fields();
3479 }
3480 
3481 void CMTask::clear_region_fields() {
3482   // Values for these three fields that indicate that we're not
3483   // holding on to a region.
3484   _curr_region   = NULL;
3485   _finger        = NULL;
3486   _region_limit  = NULL;
3487 
3488   _region_finger = NULL;
3489 }
3490 
3491 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3492   if (cm_oop_closure == NULL) {
3493     assert(_cm_oop_closure != NULL, "invariant");
3494   } else {
3495     assert(_cm_oop_closure == NULL, "invariant");
3496   }
3497   _cm_oop_closure = cm_oop_closure;
3498 }
3499 
3500 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3501   guarantee(nextMarkBitMap != NULL, "invariant");
3502 
3503   if (_cm->verbose_low()) {
3504     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3505   }
3506 
3507   _nextMarkBitMap                = nextMarkBitMap;
3508   clear_region_fields();
3509   assert(_aborted_region.is_empty(), "should have been cleared");
3510 
3511   _calls                         = 0;
3512   _elapsed_time_ms               = 0.0;
3513   _termination_time_ms           = 0.0;
3514   _termination_start_time_ms     = 0.0;
3515 
3516 #if _MARKING_STATS_
3517   _local_pushes                  = 0;
3518   _local_pops                    = 0;
3519   _local_max_size                = 0;
3520   _objs_scanned                  = 0;
3521   _global_pushes                 = 0;
3522   _global_pops                   = 0;
3523   _global_max_size               = 0;
3524   _global_transfers_to           = 0;
3525   _global_transfers_from         = 0;
3526   _region_stack_pops             = 0;
3527   _regions_claimed               = 0;
3528   _objs_found_on_bitmap          = 0;
3529   _satb_buffers_processed        = 0;
3530   _steal_attempts                = 0;
3531   _steals                        = 0;
3532   _aborted                       = 0;
3533   _aborted_overflow              = 0;
3534   _aborted_cm_aborted            = 0;
3535   _aborted_yield                 = 0;
3536   _aborted_timed_out             = 0;
3537   _aborted_satb                  = 0;
3538   _aborted_termination           = 0;
3539 #endif // _MARKING_STATS_
3540 }
3541 
3542 bool CMTask::should_exit_termination() {
3543   regular_clock_call();
3544   // This is called when we are in the termination protocol. We should
3545   // quit if, for some reason, this task wants to abort or the global
3546   // stack is not empty (this means that we can get work from it).
3547   return !_cm->mark_stack_empty() || has_aborted();
3548 }
3549 
3550 void CMTask::reached_limit() {
3551   assert(_words_scanned >= _words_scanned_limit ||
3552          _refs_reached >= _refs_reached_limit ,
3553          "shouldn't have been called otherwise");
3554   regular_clock_call();
3555 }
3556 
3557 void CMTask::regular_clock_call() {
3558   if (has_aborted()) return;
3559 
3560   // First, we need to recalculate the words scanned and refs reached
3561   // limits for the next clock call.
3562   recalculate_limits();
3563 
3564   // During the regular clock call we do the following
3565 
3566   // (1) If an overflow has been flagged, then we abort.
3567   if (_cm->has_overflown()) {
3568     set_has_aborted();
3569     return;
3570   }
3571 
3572   // If we are not concurrent (i.e. we're doing remark) we don't need
3573   // to check anything else. The other steps are only needed during
3574   // the concurrent marking phase.
3575   if (!concurrent()) return;
3576 
3577   // (2) If marking has been aborted for Full GC, then we also abort.
3578   if (_cm->has_aborted()) {
3579     set_has_aborted();
3580     statsOnly( ++_aborted_cm_aborted );
3581     return;
3582   }
3583 
3584   double curr_time_ms = os::elapsedVTime() * 1000.0;
3585 
3586   // (3) If marking stats are enabled, then we update the step history.
3587 #if _MARKING_STATS_
3588   if (_words_scanned >= _words_scanned_limit) {
3589     ++_clock_due_to_scanning;
3590   }
3591   if (_refs_reached >= _refs_reached_limit) {
3592     ++_clock_due_to_marking;
3593   }
3594 
3595   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3596   _interval_start_time_ms = curr_time_ms;
3597   _all_clock_intervals_ms.add(last_interval_ms);
3598 
3599   if (_cm->verbose_medium()) {
3600       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3601                         "scanned = %d%s, refs reached = %d%s",
3602                         _task_id, last_interval_ms,
3603                         _words_scanned,
3604                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3605                         _refs_reached,
3606                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3607   }
3608 #endif // _MARKING_STATS_
3609 
3610   // (4) We check whether we should yield. If we have to, then we abort.
3611   if (_cm->should_yield()) {
3612     // We should yield. To do this we abort the task. The caller is
3613     // responsible for yielding.
3614     set_has_aborted();
3615     statsOnly( ++_aborted_yield );
3616     return;
3617   }
3618 
3619   // (5) We check whether we've reached our time quota. If we have,
3620   // then we abort.
3621   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3622   if (elapsed_time_ms > _time_target_ms) {
3623     set_has_aborted();
3624     _has_timed_out = true;
3625     statsOnly( ++_aborted_timed_out );
3626     return;
3627   }
3628 
3629   // (6) Finally, we check whether there are enough completed STAB
3630   // buffers available for processing. If there are, we abort.
3631   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3632   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3633     if (_cm->verbose_low()) {
3634       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3635                              _task_id);
3636     }
3637     // we do need to process SATB buffers, we'll abort and restart
3638     // the marking task to do so
3639     set_has_aborted();
3640     statsOnly( ++_aborted_satb );
3641     return;
3642   }
3643 }
3644 
3645 void CMTask::recalculate_limits() {
3646   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3647   _words_scanned_limit      = _real_words_scanned_limit;
3648 
3649   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3650   _refs_reached_limit       = _real_refs_reached_limit;
3651 }
3652 
3653 void CMTask::decrease_limits() {
3654   // This is called when we believe that we're going to do an infrequent
3655   // operation which will increase the per byte scanned cost (i.e. move
3656   // entries to/from the global stack). It basically tries to decrease the
3657   // scanning limit so that the clock is called earlier.
3658 
3659   if (_cm->verbose_medium()) {
3660     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3661   }
3662 
3663   _words_scanned_limit = _real_words_scanned_limit -
3664     3 * words_scanned_period / 4;
3665   _refs_reached_limit  = _real_refs_reached_limit -
3666     3 * refs_reached_period / 4;
3667 }
3668 
3669 void CMTask::move_entries_to_global_stack() {
3670   // local array where we'll store the entries that will be popped
3671   // from the local queue
3672   oop buffer[global_stack_transfer_size];
3673 
3674   int n = 0;
3675   oop obj;
3676   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3677     buffer[n] = obj;
3678     ++n;
3679   }
3680 
3681   if (n > 0) {
3682     // we popped at least one entry from the local queue
3683 
3684     statsOnly( ++_global_transfers_to; _local_pops += n );
3685 
3686     if (!_cm->mark_stack_push(buffer, n)) {
3687       if (_cm->verbose_low()) {
3688         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3689                                _task_id);
3690       }
3691       set_has_aborted();
3692     } else {
3693       // the transfer was successful
3694 
3695       if (_cm->verbose_medium()) {
3696         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3697                                _task_id, n);
3698       }
3699       statsOnly( int tmp_size = _cm->mark_stack_size();
3700                  if (tmp_size > _global_max_size) {
3701                    _global_max_size = tmp_size;
3702                  }
3703                  _global_pushes += n );
3704     }
3705   }
3706 
3707   // this operation was quite expensive, so decrease the limits
3708   decrease_limits();
3709 }
3710 
3711 void CMTask::get_entries_from_global_stack() {
3712   // local array where we'll store the entries that will be popped
3713   // from the global stack.
3714   oop buffer[global_stack_transfer_size];
3715   int n;
3716   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3717   assert(n <= global_stack_transfer_size,
3718          "we should not pop more than the given limit");
3719   if (n > 0) {
3720     // yes, we did actually pop at least one entry
3721 
3722     statsOnly( ++_global_transfers_from; _global_pops += n );
3723     if (_cm->verbose_medium()) {
3724       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3725                              _task_id, n);
3726     }
3727     for (int i = 0; i < n; ++i) {
3728       bool success = _task_queue->push(buffer[i]);
3729       // We only call this when the local queue is empty or under a
3730       // given target limit. So, we do not expect this push to fail.
3731       assert(success, "invariant");
3732     }
3733 
3734     statsOnly( int tmp_size = _task_queue->size();
3735                if (tmp_size > _local_max_size) {
3736                  _local_max_size = tmp_size;
3737                }
3738                _local_pushes += n );
3739   }
3740 
3741   // this operation was quite expensive, so decrease the limits
3742   decrease_limits();
3743 }
3744 
3745 void CMTask::drain_local_queue(bool partially) {
3746   if (has_aborted()) return;
3747 
3748   // Decide what the target size is, depending whether we're going to
3749   // drain it partially (so that other tasks can steal if they run out
3750   // of things to do) or totally (at the very end).
3751   size_t target_size;
3752   if (partially) {
3753     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3754   } else {
3755     target_size = 0;
3756   }
3757 
3758   if (_task_queue->size() > target_size) {
3759     if (_cm->verbose_high()) {
3760       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3761                              _task_id, target_size);
3762     }
3763 
3764     oop obj;
3765     bool ret = _task_queue->pop_local(obj);
3766     while (ret) {
3767       statsOnly( ++_local_pops );
3768 
3769       if (_cm->verbose_high()) {
3770         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3771                                (void*) obj);
3772       }
3773 
3774       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3775       assert(!_g1h->is_on_master_free_list(
3776                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3777 
3778       scan_object(obj);
3779 
3780       if (_task_queue->size() <= target_size || has_aborted()) {
3781         ret = false;
3782       } else {
3783         ret = _task_queue->pop_local(obj);
3784       }
3785     }
3786 
3787     if (_cm->verbose_high()) {
3788       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3789                              _task_id, _task_queue->size());
3790     }
3791   }
3792 }
3793 
3794 void CMTask::drain_global_stack(bool partially) {
3795   if (has_aborted()) return;
3796 
3797   // We have a policy to drain the local queue before we attempt to
3798   // drain the global stack.
3799   assert(partially || _task_queue->size() == 0, "invariant");
3800 
3801   // Decide what the target size is, depending whether we're going to
3802   // drain it partially (so that other tasks can steal if they run out
3803   // of things to do) or totally (at the very end).  Notice that,
3804   // because we move entries from the global stack in chunks or
3805   // because another task might be doing the same, we might in fact
3806   // drop below the target. But, this is not a problem.
3807   size_t target_size;
3808   if (partially) {
3809     target_size = _cm->partial_mark_stack_size_target();
3810   } else {
3811     target_size = 0;
3812   }
3813 
3814   if (_cm->mark_stack_size() > target_size) {
3815     if (_cm->verbose_low()) {
3816       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3817                              _task_id, target_size);
3818     }
3819 
3820     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3821       get_entries_from_global_stack();
3822       drain_local_queue(partially);
3823     }
3824 
3825     if (_cm->verbose_low()) {
3826       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3827                              _task_id, _cm->mark_stack_size());
3828     }
3829   }
3830 }
3831 
3832 // SATB Queue has several assumptions on whether to call the par or
3833 // non-par versions of the methods. this is why some of the code is
3834 // replicated. We should really get rid of the single-threaded version
3835 // of the code to simplify things.
3836 void CMTask::drain_satb_buffers() {
3837   if (has_aborted()) return;
3838 
3839   // We set this so that the regular clock knows that we're in the
3840   // middle of draining buffers and doesn't set the abort flag when it
3841   // notices that SATB buffers are available for draining. It'd be
3842   // very counter productive if it did that. :-)
3843   _draining_satb_buffers = true;
3844 
3845   CMObjectClosure oc(this);
3846   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3847   if (G1CollectedHeap::use_parallel_gc_threads()) {
3848     satb_mq_set.set_par_closure(_task_id, &oc);
3849   } else {
3850     satb_mq_set.set_closure(&oc);
3851   }
3852 
3853   // This keeps claiming and applying the closure to completed buffers
3854   // until we run out of buffers or we need to abort.
3855   if (G1CollectedHeap::use_parallel_gc_threads()) {
3856     while (!has_aborted() &&
3857            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3858       if (_cm->verbose_medium()) {
3859         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3860       }
3861       statsOnly( ++_satb_buffers_processed );
3862       regular_clock_call();
3863     }
3864   } else {
3865     while (!has_aborted() &&
3866            satb_mq_set.apply_closure_to_completed_buffer()) {
3867       if (_cm->verbose_medium()) {
3868         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3869       }
3870       statsOnly( ++_satb_buffers_processed );
3871       regular_clock_call();
3872     }
3873   }
3874 
3875   if (!concurrent() && !has_aborted()) {
3876     // We should only do this during remark.
3877     if (G1CollectedHeap::use_parallel_gc_threads()) {
3878       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3879     } else {
3880       satb_mq_set.iterate_closure_all_threads();
3881     }
3882   }
3883 
3884   _draining_satb_buffers = false;
3885 
3886   assert(has_aborted() ||
3887          concurrent() ||
3888          satb_mq_set.completed_buffers_num() == 0, "invariant");
3889 
3890   if (G1CollectedHeap::use_parallel_gc_threads()) {
3891     satb_mq_set.set_par_closure(_task_id, NULL);
3892   } else {
3893     satb_mq_set.set_closure(NULL);
3894   }
3895 
3896   // again, this was a potentially expensive operation, decrease the
3897   // limits to get the regular clock call early
3898   decrease_limits();
3899 }
3900 
3901 void CMTask::drain_region_stack(BitMapClosure* bc) {
3902   if (has_aborted()) return;
3903 
3904   assert(_region_finger == NULL,
3905          "it should be NULL when we're not scanning a region");
3906 
3907   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3908     if (_cm->verbose_low()) {
3909       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3910                              _task_id, _cm->region_stack_size());
3911     }
3912 
3913     MemRegion mr;
3914 
3915     if (!_aborted_region.is_empty()) {
3916       mr = _aborted_region;
3917       _aborted_region = MemRegion();
3918 
3919       if (_cm->verbose_low()) {
3920         gclog_or_tty->print_cr("[%d] scanning aborted region "
3921                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3922                                _task_id, mr.start(), mr.end());
3923       }
3924     } else {
3925       mr = _cm->region_stack_pop_lock_free();
3926       // it returns MemRegion() if the pop fails
3927       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3928     }
3929 
3930     while (mr.start() != NULL) {
3931       if (_cm->verbose_medium()) {
3932         gclog_or_tty->print_cr("[%d] we are scanning region "
3933                                "["PTR_FORMAT", "PTR_FORMAT")",
3934                                _task_id, mr.start(), mr.end());
3935       }
3936 
3937       assert(mr.end() <= _cm->finger(),
3938              "otherwise the region shouldn't be on the stack");
3939       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3940       if (_nextMarkBitMap->iterate(bc, mr)) {
3941         assert(!has_aborted(),
3942                "cannot abort the task without aborting the bitmap iteration");
3943 
3944         // We finished iterating over the region without aborting.
3945         regular_clock_call();
3946         if (has_aborted()) {
3947           mr = MemRegion();
3948         } else {
3949           mr = _cm->region_stack_pop_lock_free();
3950           // it returns MemRegion() if the pop fails
3951           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3952         }
3953       } else {
3954         assert(has_aborted(), "currently the only way to do so");
3955 
3956         // The only way to abort the bitmap iteration is to return
3957         // false from the do_bit() method. However, inside the
3958         // do_bit() method we move the _region_finger to point to the
3959         // object currently being looked at. So, if we bail out, we
3960         // have definitely set _region_finger to something non-null.
3961         assert(_region_finger != NULL, "invariant");
3962 
3963         // Make sure that any previously aborted region has been
3964         // cleared.
3965         assert(_aborted_region.is_empty(), "aborted region not cleared");
3966 
3967         // The iteration was actually aborted. So now _region_finger
3968         // points to the address of the object we last scanned. If we
3969         // leave it there, when we restart this task, we will rescan
3970         // the object. It is easy to avoid this. We move the finger by
3971         // enough to point to the next possible object header (the
3972         // bitmap knows by how much we need to move it as it knows its
3973         // granularity).
3974         MemRegion newRegion =
3975           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3976 
3977         if (!newRegion.is_empty()) {
3978           if (_cm->verbose_low()) {
3979             gclog_or_tty->print_cr("[%d] recording unscanned region"
3980                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3981                                    _task_id,
3982                                    newRegion.start(), newRegion.end());
3983           }
3984           // Now record the part of the region we didn't scan to
3985           // make sure this task scans it later.
3986           _aborted_region = newRegion;
3987         }
3988         // break from while
3989         mr = MemRegion();
3990       }
3991       _region_finger = NULL;
3992     }
3993 
3994     if (_cm->verbose_low()) {
3995       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3996                              _task_id, _cm->region_stack_size());
3997     }
3998   }
3999 }
4000 
4001 void CMTask::print_stats() {
4002   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4003                          _task_id, _calls);
4004   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4005                          _elapsed_time_ms, _termination_time_ms);
4006   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4007                          _step_times_ms.num(), _step_times_ms.avg(),
4008                          _step_times_ms.sd());
4009   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4010                          _step_times_ms.maximum(), _step_times_ms.sum());
4011 
4012 #if _MARKING_STATS_
4013   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4014                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4015                          _all_clock_intervals_ms.sd());
4016   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4017                          _all_clock_intervals_ms.maximum(),
4018                          _all_clock_intervals_ms.sum());
4019   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4020                          _clock_due_to_scanning, _clock_due_to_marking);
4021   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4022                          _objs_scanned, _objs_found_on_bitmap);
4023   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4024                          _local_pushes, _local_pops, _local_max_size);
4025   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4026                          _global_pushes, _global_pops, _global_max_size);
4027   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4028                          _global_transfers_to,_global_transfers_from);
4029   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4030                          _regions_claimed, _region_stack_pops);
4031   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4032   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4033                          _steal_attempts, _steals);
4034   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4035   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4036                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4037   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4038                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4039 #endif // _MARKING_STATS_
4040 }
4041 
4042 /*****************************************************************************
4043 
4044     The do_marking_step(time_target_ms) method is the building block
4045     of the parallel marking framework. It can be called in parallel
4046     with other invocations of do_marking_step() on different tasks
4047     (but only one per task, obviously) and concurrently with the
4048     mutator threads, or during remark, hence it eliminates the need
4049     for two versions of the code. When called during remark, it will
4050     pick up from where the task left off during the concurrent marking
4051     phase. Interestingly, tasks are also claimable during evacuation
4052     pauses too, since do_marking_step() ensures that it aborts before
4053     it needs to yield.
4054 
4055     The data structures that is uses to do marking work are the
4056     following:
4057 
4058       (1) Marking Bitmap. If there are gray objects that appear only
4059       on the bitmap (this happens either when dealing with an overflow
4060       or when the initial marking phase has simply marked the roots
4061       and didn't push them on the stack), then tasks claim heap
4062       regions whose bitmap they then scan to find gray objects. A
4063       global finger indicates where the end of the last claimed region
4064       is. A local finger indicates how far into the region a task has
4065       scanned. The two fingers are used to determine how to gray an
4066       object (i.e. whether simply marking it is OK, as it will be
4067       visited by a task in the future, or whether it needs to be also
4068       pushed on a stack).
4069 
4070       (2) Local Queue. The local queue of the task which is accessed
4071       reasonably efficiently by the task. Other tasks can steal from
4072       it when they run out of work. Throughout the marking phase, a
4073       task attempts to keep its local queue short but not totally
4074       empty, so that entries are available for stealing by other
4075       tasks. Only when there is no more work, a task will totally
4076       drain its local queue.
4077 
4078       (3) Global Mark Stack. This handles local queue overflow. During
4079       marking only sets of entries are moved between it and the local
4080       queues, as access to it requires a mutex and more fine-grain
4081       interaction with it which might cause contention. If it
4082       overflows, then the marking phase should restart and iterate
4083       over the bitmap to identify gray objects. Throughout the marking
4084       phase, tasks attempt to keep the global mark stack at a small
4085       length but not totally empty, so that entries are available for
4086       popping by other tasks. Only when there is no more work, tasks
4087       will totally drain the global mark stack.
4088 
4089       (4) Global Region Stack. Entries on it correspond to areas of
4090       the bitmap that need to be scanned since they contain gray
4091       objects. Pushes on the region stack only happen during
4092       evacuation pauses and typically correspond to areas covered by
4093       GC LABS. If it overflows, then the marking phase should restart
4094       and iterate over the bitmap to identify gray objects. Tasks will
4095       try to totally drain the region stack as soon as possible.
4096 
4097       (5) SATB Buffer Queue. This is where completed SATB buffers are
4098       made available. Buffers are regularly removed from this queue
4099       and scanned for roots, so that the queue doesn't get too
4100       long. During remark, all completed buffers are processed, as
4101       well as the filled in parts of any uncompleted buffers.
4102 
4103     The do_marking_step() method tries to abort when the time target
4104     has been reached. There are a few other cases when the
4105     do_marking_step() method also aborts:
4106 
4107       (1) When the marking phase has been aborted (after a Full GC).
4108 
4109       (2) When a global overflow (either on the global stack or the
4110       region stack) has been triggered. Before the task aborts, it
4111       will actually sync up with the other tasks to ensure that all
4112       the marking data structures (local queues, stacks, fingers etc.)
4113       are re-initialised so that when do_marking_step() completes,
4114       the marking phase can immediately restart.
4115 
4116       (3) When enough completed SATB buffers are available. The
4117       do_marking_step() method only tries to drain SATB buffers right
4118       at the beginning. So, if enough buffers are available, the
4119       marking step aborts and the SATB buffers are processed at
4120       the beginning of the next invocation.
4121 
4122       (4) To yield. when we have to yield then we abort and yield
4123       right at the end of do_marking_step(). This saves us from a lot
4124       of hassle as, by yielding we might allow a Full GC. If this
4125       happens then objects will be compacted underneath our feet, the
4126       heap might shrink, etc. We save checking for this by just
4127       aborting and doing the yield right at the end.
4128 
4129     From the above it follows that the do_marking_step() method should
4130     be called in a loop (or, otherwise, regularly) until it completes.
4131 
4132     If a marking step completes without its has_aborted() flag being
4133     true, it means it has completed the current marking phase (and
4134     also all other marking tasks have done so and have all synced up).
4135 
4136     A method called regular_clock_call() is invoked "regularly" (in
4137     sub ms intervals) throughout marking. It is this clock method that
4138     checks all the abort conditions which were mentioned above and
4139     decides when the task should abort. A work-based scheme is used to
4140     trigger this clock method: when the number of object words the
4141     marking phase has scanned or the number of references the marking
4142     phase has visited reach a given limit. Additional invocations to
4143     the method clock have been planted in a few other strategic places
4144     too. The initial reason for the clock method was to avoid calling
4145     vtime too regularly, as it is quite expensive. So, once it was in
4146     place, it was natural to piggy-back all the other conditions on it
4147     too and not constantly check them throughout the code.
4148 
4149  *****************************************************************************/
4150 
4151 void CMTask::do_marking_step(double time_target_ms,
4152                              bool do_stealing,
4153                              bool do_termination) {
4154   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4155   assert(concurrent() == _cm->concurrent(), "they should be the same");
4156 
4157   assert(concurrent() || _cm->region_stack_empty(),
4158          "the region stack should have been cleared before remark");
4159   assert(concurrent() || !_cm->has_aborted_regions(),
4160          "aborted regions should have been cleared before remark");
4161   assert(_region_finger == NULL,
4162          "this should be non-null only when a region is being scanned");
4163 
4164   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4165   assert(_task_queues != NULL, "invariant");
4166   assert(_task_queue != NULL, "invariant");
4167   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4168 
4169   assert(!_claimed,
4170          "only one thread should claim this task at any one time");
4171 
4172   // OK, this doesn't safeguard again all possible scenarios, as it is
4173   // possible for two threads to set the _claimed flag at the same
4174   // time. But it is only for debugging purposes anyway and it will
4175   // catch most problems.
4176   _claimed = true;
4177 
4178   _start_time_ms = os::elapsedVTime() * 1000.0;
4179   statsOnly( _interval_start_time_ms = _start_time_ms );
4180 
4181   double diff_prediction_ms =
4182     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4183   _time_target_ms = time_target_ms - diff_prediction_ms;
4184 
4185   // set up the variables that are used in the work-based scheme to
4186   // call the regular clock method
4187   _words_scanned = 0;
4188   _refs_reached  = 0;
4189   recalculate_limits();
4190 
4191   // clear all flags
4192   clear_has_aborted();
4193   _has_timed_out = false;
4194   _draining_satb_buffers = false;
4195 
4196   ++_calls;
4197 
4198   if (_cm->verbose_low()) {
4199     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4200                            "target = %1.2lfms >>>>>>>>>>",
4201                            _task_id, _calls, _time_target_ms);
4202   }
4203 
4204   // Set up the bitmap and oop closures. Anything that uses them is
4205   // eventually called from this method, so it is OK to allocate these
4206   // statically.
4207   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4208   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4209   set_cm_oop_closure(&cm_oop_closure);
4210 
4211   if (_cm->has_overflown()) {
4212     // This can happen if the region stack or the mark stack overflows
4213     // during a GC pause and this task, after a yield point,
4214     // restarts. We have to abort as we need to get into the overflow
4215     // protocol which happens right at the end of this task.
4216     set_has_aborted();
4217   }
4218 
4219   // First drain any available SATB buffers. After this, we will not
4220   // look at SATB buffers before the next invocation of this method.
4221   // If enough completed SATB buffers are queued up, the regular clock
4222   // will abort this task so that it restarts.
4223   drain_satb_buffers();
4224   // ...then partially drain the local queue and the global stack
4225   drain_local_queue(true);
4226   drain_global_stack(true);
4227 
4228   // Then totally drain the region stack.  We will not look at
4229   // it again before the next invocation of this method. Entries on
4230   // the region stack are only added during evacuation pauses, for
4231   // which we have to yield. When we do, we abort the task anyway so
4232   // it will look at the region stack again when it restarts.
4233   bitmap_closure.set_scanning_heap_region(false);
4234   drain_region_stack(&bitmap_closure);
4235   // ...then partially drain the local queue and the global stack
4236   drain_local_queue(true);
4237   drain_global_stack(true);
4238 
4239   do {
4240     if (!has_aborted() && _curr_region != NULL) {
4241       // This means that we're already holding on to a region.
4242       assert(_finger != NULL, "if region is not NULL, then the finger "
4243              "should not be NULL either");
4244 
4245       // We might have restarted this task after an evacuation pause
4246       // which might have evacuated the region we're holding on to
4247       // underneath our feet. Let's read its limit again to make sure
4248       // that we do not iterate over a region of the heap that
4249       // contains garbage (update_region_limit() will also move
4250       // _finger to the start of the region if it is found empty).
4251       update_region_limit();
4252       // We will start from _finger not from the start of the region,
4253       // as we might be restarting this task after aborting half-way
4254       // through scanning this region. In this case, _finger points to
4255       // the address where we last found a marked object. If this is a
4256       // fresh region, _finger points to start().
4257       MemRegion mr = MemRegion(_finger, _region_limit);
4258 
4259       if (_cm->verbose_low()) {
4260         gclog_or_tty->print_cr("[%d] we're scanning part "
4261                                "["PTR_FORMAT", "PTR_FORMAT") "
4262                                "of region "PTR_FORMAT,
4263                                _task_id, _finger, _region_limit, _curr_region);
4264       }
4265 
4266       // Let's iterate over the bitmap of the part of the
4267       // region that is left.
4268       bitmap_closure.set_scanning_heap_region(true);
4269       if (mr.is_empty() ||
4270           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4271         // We successfully completed iterating over the region. Now,
4272         // let's give up the region.
4273         giveup_current_region();
4274         regular_clock_call();
4275       } else {
4276         assert(has_aborted(), "currently the only way to do so");
4277         // The only way to abort the bitmap iteration is to return
4278         // false from the do_bit() method. However, inside the
4279         // do_bit() method we move the _finger to point to the
4280         // object currently being looked at. So, if we bail out, we
4281         // have definitely set _finger to something non-null.
4282         assert(_finger != NULL, "invariant");
4283 
4284         // Region iteration was actually aborted. So now _finger
4285         // points to the address of the object we last scanned. If we
4286         // leave it there, when we restart this task, we will rescan
4287         // the object. It is easy to avoid this. We move the finger by
4288         // enough to point to the next possible object header (the
4289         // bitmap knows by how much we need to move it as it knows its
4290         // granularity).
4291         assert(_finger < _region_limit, "invariant");
4292         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4293         // Check if bitmap iteration was aborted while scanning the last object
4294         if (new_finger >= _region_limit) {
4295             giveup_current_region();
4296         } else {
4297             move_finger_to(new_finger);
4298         }
4299       }
4300     }
4301     // At this point we have either completed iterating over the
4302     // region we were holding on to, or we have aborted.
4303 
4304     // We then partially drain the local queue and the global stack.
4305     // (Do we really need this?)
4306     drain_local_queue(true);
4307     drain_global_stack(true);
4308 
4309     // Read the note on the claim_region() method on why it might
4310     // return NULL with potentially more regions available for
4311     // claiming and why we have to check out_of_regions() to determine
4312     // whether we're done or not.
4313     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4314       // We are going to try to claim a new region. We should have
4315       // given up on the previous one.
4316       // Separated the asserts so that we know which one fires.
4317       assert(_curr_region  == NULL, "invariant");
4318       assert(_finger       == NULL, "invariant");
4319       assert(_region_limit == NULL, "invariant");
4320       if (_cm->verbose_low()) {
4321         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4322       }
4323       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4324       if (claimed_region != NULL) {
4325         // Yes, we managed to claim one
4326         statsOnly( ++_regions_claimed );
4327 
4328         if (_cm->verbose_low()) {
4329           gclog_or_tty->print_cr("[%d] we successfully claimed "
4330                                  "region "PTR_FORMAT,
4331                                  _task_id, claimed_region);
4332         }
4333 
4334         setup_for_region(claimed_region);
4335         assert(_curr_region == claimed_region, "invariant");
4336       }
4337       // It is important to call the regular clock here. It might take
4338       // a while to claim a region if, for example, we hit a large
4339       // block of empty regions. So we need to call the regular clock
4340       // method once round the loop to make sure it's called
4341       // frequently enough.
4342       regular_clock_call();
4343     }
4344 
4345     if (!has_aborted() && _curr_region == NULL) {
4346       assert(_cm->out_of_regions(),
4347              "at this point we should be out of regions");
4348     }
4349   } while ( _curr_region != NULL && !has_aborted());
4350 
4351   if (!has_aborted()) {
4352     // We cannot check whether the global stack is empty, since other
4353     // tasks might be pushing objects to it concurrently. We also cannot
4354     // check if the region stack is empty because if a thread is aborting
4355     // it can push a partially done region back.
4356     assert(_cm->out_of_regions(),
4357            "at this point we should be out of regions");
4358 
4359     if (_cm->verbose_low()) {
4360       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4361     }
4362 
4363     // Try to reduce the number of available SATB buffers so that
4364     // remark has less work to do.
4365     drain_satb_buffers();
4366   }
4367 
4368   // Since we've done everything else, we can now totally drain the
4369   // local queue and global stack.
4370   drain_local_queue(false);
4371   drain_global_stack(false);
4372 
4373   // Attempt at work stealing from other task's queues.
4374   if (do_stealing && !has_aborted()) {
4375     // We have not aborted. This means that we have finished all that
4376     // we could. Let's try to do some stealing...
4377 
4378     // We cannot check whether the global stack is empty, since other
4379     // tasks might be pushing objects to it concurrently. We also cannot
4380     // check if the region stack is empty because if a thread is aborting
4381     // it can push a partially done region back.
4382     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4383            "only way to reach here");
4384 
4385     if (_cm->verbose_low()) {
4386       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4387     }
4388 
4389     while (!has_aborted()) {
4390       oop obj;
4391       statsOnly( ++_steal_attempts );
4392 
4393       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4394         if (_cm->verbose_medium()) {
4395           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4396                                  _task_id, (void*) obj);
4397         }
4398 
4399         statsOnly( ++_steals );
4400 
4401         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4402                "any stolen object should be marked");
4403         scan_object(obj);
4404 
4405         // And since we're towards the end, let's totally drain the
4406         // local queue and global stack.
4407         drain_local_queue(false);
4408         drain_global_stack(false);
4409       } else {
4410         break;
4411       }
4412     }
4413   }
4414 
4415   // If we are about to wrap up and go into termination, check if we
4416   // should raise the overflow flag.
4417   if (do_termination && !has_aborted()) {
4418     if (_cm->force_overflow()->should_force()) {
4419       _cm->set_has_overflown();
4420       regular_clock_call();
4421     }
4422   }
4423 
4424   // We still haven't aborted. Now, let's try to get into the
4425   // termination protocol.
4426   if (do_termination && !has_aborted()) {
4427     // We cannot check whether the global stack is empty, since other
4428     // tasks might be concurrently pushing objects on it. We also cannot
4429     // check if the region stack is empty because if a thread is aborting
4430     // it can push a partially done region back.
4431     // Separated the asserts so that we know which one fires.
4432     assert(_cm->out_of_regions(), "only way to reach here");
4433     assert(_task_queue->size() == 0, "only way to reach here");
4434 
4435     if (_cm->verbose_low()) {
4436       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4437     }
4438 
4439     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4440     // The CMTask class also extends the TerminatorTerminator class,
4441     // hence its should_exit_termination() method will also decide
4442     // whether to exit the termination protocol or not.
4443     bool finished = _cm->terminator()->offer_termination(this);
4444     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4445     _termination_time_ms +=
4446       termination_end_time_ms - _termination_start_time_ms;
4447 
4448     if (finished) {
4449       // We're all done.
4450 
4451       if (_task_id == 0) {
4452         // let's allow task 0 to do this
4453         if (concurrent()) {
4454           assert(_cm->concurrent_marking_in_progress(), "invariant");
4455           // we need to set this to false before the next
4456           // safepoint. This way we ensure that the marking phase
4457           // doesn't observe any more heap expansions.
4458           _cm->clear_concurrent_marking_in_progress();
4459         }
4460       }
4461 
4462       // We can now guarantee that the global stack is empty, since
4463       // all other tasks have finished. We separated the guarantees so
4464       // that, if a condition is false, we can immediately find out
4465       // which one.
4466       guarantee(_cm->out_of_regions(), "only way to reach here");
4467       guarantee(_aborted_region.is_empty(), "only way to reach here");
4468       guarantee(_cm->region_stack_empty(), "only way to reach here");
4469       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4470       guarantee(_task_queue->size() == 0, "only way to reach here");
4471       guarantee(!_cm->has_overflown(), "only way to reach here");
4472       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4473       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4474 
4475       if (_cm->verbose_low()) {
4476         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4477       }
4478     } else {
4479       // Apparently there's more work to do. Let's abort this task. It
4480       // will restart it and we can hopefully find more things to do.
4481 
4482       if (_cm->verbose_low()) {
4483         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4484                                _task_id);
4485       }
4486 
4487       set_has_aborted();
4488       statsOnly( ++_aborted_termination );
4489     }
4490   }
4491 
4492   // Mainly for debugging purposes to make sure that a pointer to the
4493   // closure which was statically allocated in this frame doesn't
4494   // escape it by accident.
4495   set_cm_oop_closure(NULL);
4496   double end_time_ms = os::elapsedVTime() * 1000.0;
4497   double elapsed_time_ms = end_time_ms - _start_time_ms;
4498   // Update the step history.
4499   _step_times_ms.add(elapsed_time_ms);
4500 
4501   if (has_aborted()) {
4502     // The task was aborted for some reason.
4503 
4504     statsOnly( ++_aborted );
4505 
4506     if (_has_timed_out) {
4507       double diff_ms = elapsed_time_ms - _time_target_ms;
4508       // Keep statistics of how well we did with respect to hitting
4509       // our target only if we actually timed out (if we aborted for
4510       // other reasons, then the results might get skewed).
4511       _marking_step_diffs_ms.add(diff_ms);
4512     }
4513 
4514     if (_cm->has_overflown()) {
4515       // This is the interesting one. We aborted because a global
4516       // overflow was raised. This means we have to restart the
4517       // marking phase and start iterating over regions. However, in
4518       // order to do this we have to make sure that all tasks stop
4519       // what they are doing and re-initialise in a safe manner. We
4520       // will achieve this with the use of two barrier sync points.
4521 
4522       if (_cm->verbose_low()) {
4523         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4524       }
4525 
4526       _cm->enter_first_sync_barrier(_task_id);
4527       // When we exit this sync barrier we know that all tasks have
4528       // stopped doing marking work. So, it's now safe to
4529       // re-initialise our data structures. At the end of this method,
4530       // task 0 will clear the global data structures.
4531 
4532       statsOnly( ++_aborted_overflow );
4533 
4534       // We clear the local state of this task...
4535       clear_region_fields();
4536 
4537       // ...and enter the second barrier.
4538       _cm->enter_second_sync_barrier(_task_id);
4539       // At this point everything has bee re-initialised and we're
4540       // ready to restart.
4541     }
4542 
4543     if (_cm->verbose_low()) {
4544       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4545                              "elapsed = %1.2lfms <<<<<<<<<<",
4546                              _task_id, _time_target_ms, elapsed_time_ms);
4547       if (_cm->has_aborted()) {
4548         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4549                                _task_id);
4550       }
4551     }
4552   } else {
4553     if (_cm->verbose_low()) {
4554       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4555                              "elapsed = %1.2lfms <<<<<<<<<<",
4556                              _task_id, _time_target_ms, elapsed_time_ms);
4557     }
4558   }
4559 
4560   _claimed = false;
4561 }
4562 
4563 CMTask::CMTask(int task_id,
4564                ConcurrentMark* cm,
4565                CMTaskQueue* task_queue,
4566                CMTaskQueueSet* task_queues)
4567   : _g1h(G1CollectedHeap::heap()),
4568     _task_id(task_id), _cm(cm),
4569     _claimed(false),
4570     _nextMarkBitMap(NULL), _hash_seed(17),
4571     _task_queue(task_queue),
4572     _task_queues(task_queues),
4573     _cm_oop_closure(NULL),
4574     _aborted_region(MemRegion()) {
4575   guarantee(task_queue != NULL, "invariant");
4576   guarantee(task_queues != NULL, "invariant");
4577 
4578   statsOnly( _clock_due_to_scanning = 0;
4579              _clock_due_to_marking  = 0 );
4580 
4581   _marking_step_diffs_ms.add(0.5);
4582 }
4583 
4584 // These are formatting macros that are used below to ensure
4585 // consistent formatting. The *_H_* versions are used to format the
4586 // header for a particular value and they should be kept consistent
4587 // with the corresponding macro. Also note that most of the macros add
4588 // the necessary white space (as a prefix) which makes them a bit
4589 // easier to compose.
4590 
4591 // All the output lines are prefixed with this string to be able to
4592 // identify them easily in a large log file.
4593 #define G1PPRL_LINE_PREFIX            "###"
4594 
4595 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4596 #ifdef _LP64
4597 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4598 #else // _LP64
4599 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4600 #endif // _LP64
4601 
4602 // For per-region info
4603 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4604 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4605 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4606 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4607 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4608 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4609 
4610 // For summary info
4611 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4612 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4613 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4614 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4615 
4616 G1PrintRegionLivenessInfoClosure::
4617 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4618   : _out(out),
4619     _total_used_bytes(0), _total_capacity_bytes(0),
4620     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4621     _hum_used_bytes(0), _hum_capacity_bytes(0),
4622     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4623   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4624   MemRegion g1_committed = g1h->g1_committed();
4625   MemRegion g1_reserved = g1h->g1_reserved();
4626   double now = os::elapsedTime();
4627 
4628   // Print the header of the output.
4629   _out->cr();
4630   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4631   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4632                  G1PPRL_SUM_ADDR_FORMAT("committed")
4633                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4634                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4635                  g1_committed.start(), g1_committed.end(),
4636                  g1_reserved.start(), g1_reserved.end(),
4637                  HeapRegion::GrainBytes);
4638   _out->print_cr(G1PPRL_LINE_PREFIX);
4639   _out->print_cr(G1PPRL_LINE_PREFIX
4640                  G1PPRL_TYPE_H_FORMAT
4641                  G1PPRL_ADDR_BASE_H_FORMAT
4642                  G1PPRL_BYTE_H_FORMAT
4643                  G1PPRL_BYTE_H_FORMAT
4644                  G1PPRL_BYTE_H_FORMAT
4645                  G1PPRL_DOUBLE_H_FORMAT,
4646                  "type", "address-range",
4647                  "used", "prev-live", "next-live", "gc-eff");
4648 }
4649 
4650 // It takes as a parameter a reference to one of the _hum_* fields, it
4651 // deduces the corresponding value for a region in a humongous region
4652 // series (either the region size, or what's left if the _hum_* field
4653 // is < the region size), and updates the _hum_* field accordingly.
4654 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4655   size_t bytes = 0;
4656   // The > 0 check is to deal with the prev and next live bytes which
4657   // could be 0.
4658   if (*hum_bytes > 0) {
4659     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4660     *hum_bytes -= bytes;
4661   }
4662   return bytes;
4663 }
4664 
4665 // It deduces the values for a region in a humongous region series
4666 // from the _hum_* fields and updates those accordingly. It assumes
4667 // that that _hum_* fields have already been set up from the "starts
4668 // humongous" region and we visit the regions in address order.
4669 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4670                                                      size_t* capacity_bytes,
4671                                                      size_t* prev_live_bytes,
4672                                                      size_t* next_live_bytes) {
4673   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4674   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4675   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4676   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4677   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4678 }
4679 
4680 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4681   const char* type = "";
4682   HeapWord* bottom       = r->bottom();
4683   HeapWord* end          = r->end();
4684   size_t capacity_bytes  = r->capacity();
4685   size_t used_bytes      = r->used();
4686   size_t prev_live_bytes = r->live_bytes();
4687   size_t next_live_bytes = r->next_live_bytes();
4688   double gc_eff          = r->gc_efficiency();
4689   if (r->used() == 0) {
4690     type = "FREE";
4691   } else if (r->is_survivor()) {
4692     type = "SURV";
4693   } else if (r->is_young()) {
4694     type = "EDEN";
4695   } else if (r->startsHumongous()) {
4696     type = "HUMS";
4697 
4698     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4699            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4700            "they should have been zeroed after the last time we used them");
4701     // Set up the _hum_* fields.
4702     _hum_capacity_bytes  = capacity_bytes;
4703     _hum_used_bytes      = used_bytes;
4704     _hum_prev_live_bytes = prev_live_bytes;
4705     _hum_next_live_bytes = next_live_bytes;
4706     get_hum_bytes(&used_bytes, &capacity_bytes,
4707                   &prev_live_bytes, &next_live_bytes);
4708     end = bottom + HeapRegion::GrainWords;
4709   } else if (r->continuesHumongous()) {
4710     type = "HUMC";
4711     get_hum_bytes(&used_bytes, &capacity_bytes,
4712                   &prev_live_bytes, &next_live_bytes);
4713     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4714   } else {
4715     type = "OLD";
4716   }
4717 
4718   _total_used_bytes      += used_bytes;
4719   _total_capacity_bytes  += capacity_bytes;
4720   _total_prev_live_bytes += prev_live_bytes;
4721   _total_next_live_bytes += next_live_bytes;
4722 
4723   // Print a line for this particular region.
4724   _out->print_cr(G1PPRL_LINE_PREFIX
4725                  G1PPRL_TYPE_FORMAT
4726                  G1PPRL_ADDR_BASE_FORMAT
4727                  G1PPRL_BYTE_FORMAT
4728                  G1PPRL_BYTE_FORMAT
4729                  G1PPRL_BYTE_FORMAT
4730                  G1PPRL_DOUBLE_FORMAT,
4731                  type, bottom, end,
4732                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4733 
4734   return false;
4735 }
4736 
4737 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4738   // Print the footer of the output.
4739   _out->print_cr(G1PPRL_LINE_PREFIX);
4740   _out->print_cr(G1PPRL_LINE_PREFIX
4741                  " SUMMARY"
4742                  G1PPRL_SUM_MB_FORMAT("capacity")
4743                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4744                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4745                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4746                  bytes_to_mb(_total_capacity_bytes),
4747                  bytes_to_mb(_total_used_bytes),
4748                  perc(_total_used_bytes, _total_capacity_bytes),
4749                  bytes_to_mb(_total_prev_live_bytes),
4750                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4751                  bytes_to_mb(_total_next_live_bytes),
4752                  perc(_total_next_live_bytes, _total_capacity_bytes));
4753   _out->cr();
4754 }