1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, (size_t)1);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 466                                int max_regions) :
 467   _markBitMap1(rs, MinObjAlignment - 1),
 468   _markBitMap2(rs, MinObjAlignment - 1),
 469 
 470   _parallel_marking_threads(0),
 471   _max_parallel_marking_threads(0),
 472   _sleep_factor(0.0),
 473   _marking_task_overhead(1.0),
 474   _cleanup_sleep_factor(0.0),
 475   _cleanup_task_overhead(1.0),
 476   _cleanup_list("Cleanup List"),
 477   _region_bm(max_regions, false /* in_resource_area*/),
 478   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 479            CardTableModRefBS::card_shift,
 480            false /* in_resource_area*/),
 481 
 482   _prevMarkBitMap(&_markBitMap1),
 483   _nextMarkBitMap(&_markBitMap2),
 484   _at_least_one_mark_complete(false),
 485 
 486   _markStack(this),
 487   _regionStack(),
 488   // _finger set in set_non_marking_state
 489 
 490   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 491   // _active_tasks set in set_non_marking_state
 492   // _tasks set inside the constructor
 493   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 494   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 495 
 496   _has_overflown(false),
 497   _concurrent(false),
 498   _has_aborted(false),
 499   _restart_for_overflow(false),
 500   _concurrent_marking_in_progress(false),
 501   _should_gray_objects(false),
 502 
 503   // _verbose_level set below
 504 
 505   _init_times(),
 506   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 507   _cleanup_times(),
 508   _total_counting_time(0.0),
 509   _total_rs_scrub_time(0.0),
 510 
 511   _parallel_workers(NULL),
 512 
 513   _count_card_bitmaps(NULL),
 514   _count_marked_bytes(NULL)
 515 {
 516   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 517   if (verbose_level < no_verbose) {
 518     verbose_level = no_verbose;
 519   }
 520   if (verbose_level > high_verbose) {
 521     verbose_level = high_verbose;
 522   }
 523   _verbose_level = verbose_level;
 524 
 525   if (verbose_low()) {
 526     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 527                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 528   }
 529 
 530   _markStack.allocate(MarkStackSize);
 531   _regionStack.allocate(G1MarkRegionStackSize);
 532 
 533   // Create & start a ConcurrentMark thread.
 534   _cmThread = new ConcurrentMarkThread(this);
 535   assert(cmThread() != NULL, "CM Thread should have been created");
 536   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 537 
 538   _g1h = G1CollectedHeap::heap();
 539   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 540   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 541   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 542 
 543   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 544   satb_qs.set_buffer_size(G1SATBBufferSize);
 545 
 546   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 547   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 548 
 549   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
 550   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
 551 
 552   BitMap::idx_t card_bm_size = _card_bm.size();
 553 
 554   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 555   _active_tasks = _max_task_num;
 556   for (int i = 0; i < (int) _max_task_num; ++i) {
 557     CMTaskQueue* task_queue = new CMTaskQueue();
 558     task_queue->initialize();
 559     _task_queues->register_queue(i, task_queue);
 560 
 561     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 562     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions);
 563 
 564     _tasks[i] = new CMTask(i, this, 
 565                            _count_marked_bytes[i],
 566                            &_count_card_bitmaps[i],
 567                            task_queue, _task_queues);
 568 
 569     _accum_task_vtime[i] = 0.0;
 570   }
 571 
 572   // Calculate the card number for the bottom of the heap. Used
 573   // in biasing indexes into the accounting card bitmaps.
 574   _heap_bottom_card_num =
 575     intptr_t(uintptr_t(_g1h->reserved_region().start()) >> 
 576                                 CardTableModRefBS::card_shift);
 577 
 578 
 579   if (ConcGCThreads > ParallelGCThreads) {
 580     vm_exit_during_initialization("Can't have more ConcGCThreads "
 581                                   "than ParallelGCThreads.");
 582   }
 583   if (ParallelGCThreads == 0) {
 584     // if we are not running with any parallel GC threads we will not
 585     // spawn any marking threads either
 586     _parallel_marking_threads =       0;
 587     _max_parallel_marking_threads =   0;
 588     _sleep_factor             =     0.0;
 589     _marking_task_overhead    =     1.0;
 590   } else {
 591     if (ConcGCThreads > 0) {
 592       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 593       // if both are set
 594 
 595       _parallel_marking_threads = ConcGCThreads;
 596       _max_parallel_marking_threads = _parallel_marking_threads;
 597       _sleep_factor             = 0.0;
 598       _marking_task_overhead    = 1.0;
 599     } else if (G1MarkingOverheadPercent > 0) {
 600       // we will calculate the number of parallel marking threads
 601       // based on a target overhead with respect to the soft real-time
 602       // goal
 603 
 604       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 605       double overall_cm_overhead =
 606         (double) MaxGCPauseMillis * marking_overhead /
 607         (double) GCPauseIntervalMillis;
 608       double cpu_ratio = 1.0 / (double) os::processor_count();
 609       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 610       double marking_task_overhead =
 611         overall_cm_overhead / marking_thread_num *
 612                                                 (double) os::processor_count();
 613       double sleep_factor =
 614                          (1.0 - marking_task_overhead) / marking_task_overhead;
 615 
 616       _parallel_marking_threads = (size_t) marking_thread_num;
 617       _max_parallel_marking_threads = _parallel_marking_threads;
 618       _sleep_factor             = sleep_factor;
 619       _marking_task_overhead    = marking_task_overhead;
 620     } else {
 621       _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
 622       _max_parallel_marking_threads = _parallel_marking_threads;
 623       _sleep_factor             = 0.0;
 624       _marking_task_overhead    = 1.0;
 625     }
 626 
 627     if (parallel_marking_threads() > 1) {
 628       _cleanup_task_overhead = 1.0;
 629     } else {
 630       _cleanup_task_overhead = marking_task_overhead();
 631     }
 632     _cleanup_sleep_factor =
 633                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 634 
 635 #if 0
 636     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 637     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 638     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 639     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 640     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 641 #endif
 642 
 643     guarantee(parallel_marking_threads() > 0, "peace of mind");
 644     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 645          (int) _max_parallel_marking_threads, false, true);
 646     if (_parallel_workers == NULL) {
 647       vm_exit_during_initialization("Failed necessary allocation.");
 648     } else {
 649       _parallel_workers->initialize_workers();
 650     }
 651   }
 652 
 653   // so that the call below can read a sensible value
 654   _heap_start = (HeapWord*) rs.base();
 655   set_non_marking_state();
 656 }
 657 
 658 void ConcurrentMark::update_g1_committed(bool force) {
 659   // If concurrent marking is not in progress, then we do not need to
 660   // update _heap_end. This has a subtle and important
 661   // side-effect. Imagine that two evacuation pauses happen between
 662   // marking completion and remark. The first one can grow the
 663   // heap (hence now the finger is below the heap end). Then, the
 664   // second one could unnecessarily push regions on the region
 665   // stack. This causes the invariant that the region stack is empty
 666   // at the beginning of remark to be false. By ensuring that we do
 667   // not observe heap expansions after marking is complete, then we do
 668   // not have this problem.
 669   if (!concurrent_marking_in_progress() && !force) return;
 670 
 671   MemRegion committed = _g1h->g1_committed();
 672   assert(committed.start() == _heap_start, "start shouldn't change");
 673   HeapWord* new_end = committed.end();
 674   if (new_end > _heap_end) {
 675     // The heap has been expanded.
 676 
 677     _heap_end = new_end;
 678   }
 679   // Notice that the heap can also shrink. However, this only happens
 680   // during a Full GC (at least currently) and the entire marking
 681   // phase will bail out and the task will not be restarted. So, let's
 682   // do nothing.
 683 }
 684 
 685 void ConcurrentMark::reset() {
 686   // Starting values for these two. This should be called in a STW
 687   // phase. CM will be notified of any future g1_committed expansions
 688   // will be at the end of evacuation pauses, when tasks are
 689   // inactive.
 690   MemRegion committed = _g1h->g1_committed();
 691   _heap_start = committed.start();
 692   _heap_end   = committed.end();
 693 
 694   // Separated the asserts so that we know which one fires.
 695   assert(_heap_start != NULL, "heap bounds should look ok");
 696   assert(_heap_end != NULL, "heap bounds should look ok");
 697   assert(_heap_start < _heap_end, "heap bounds should look ok");
 698 
 699   // reset all the marking data structures and any necessary flags
 700   clear_marking_state();
 701 
 702   clear_all_count_data();
 703 
 704   if (verbose_low()) {
 705     gclog_or_tty->print_cr("[global] resetting");
 706   }
 707 
 708   // We do reset all of them, since different phases will use
 709   // different number of active threads. So, it's easiest to have all
 710   // of them ready.
 711   for (int i = 0; i < (int) _max_task_num; ++i) {
 712     _tasks[i]->reset(_nextMarkBitMap);
 713   }
 714 
 715   // we need this to make sure that the flag is on during the evac
 716   // pause with initial mark piggy-backed
 717   set_concurrent_marking_in_progress();
 718 }
 719 
 720 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 721   assert(active_tasks <= _max_task_num, "we should not have more");
 722 
 723   _active_tasks = active_tasks;
 724   // Need to update the three data structures below according to the
 725   // number of active threads for this phase.
 726   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 727   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 728   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 729 
 730   _concurrent = concurrent;
 731   // We propagate this to all tasks, not just the active ones.
 732   for (int i = 0; i < (int) _max_task_num; ++i)
 733     _tasks[i]->set_concurrent(concurrent);
 734 
 735   if (concurrent) {
 736     set_concurrent_marking_in_progress();
 737   } else {
 738     // We currently assume that the concurrent flag has been set to
 739     // false before we start remark. At this point we should also be
 740     // in a STW phase.
 741     assert(!concurrent_marking_in_progress(), "invariant");
 742     assert(_finger == _heap_end, "only way to get here");
 743     update_g1_committed(true);
 744   }
 745 }
 746 
 747 void ConcurrentMark::set_non_marking_state() {
 748   // We set the global marking state to some default values when we're
 749   // not doing marking.
 750   clear_marking_state();
 751   _active_tasks = 0;
 752   clear_concurrent_marking_in_progress();
 753 }
 754 
 755 // This closure is used to mark refs into the g1 generation
 756 // from external roots in the CMS bit map.
 757 // Called at the first checkpoint.
 758 //
 759 
 760 void ConcurrentMark::clearNextBitmap() {
 761   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 762   G1CollectorPolicy* g1p = g1h->g1_policy();
 763 
 764   // Make sure that the concurrent mark thread looks to still be in
 765   // the current cycle.
 766   guarantee(cmThread()->during_cycle(), "invariant");
 767 
 768   // We are finishing up the current cycle by clearing the next
 769   // marking bitmap and getting it ready for the next cycle. During
 770   // this time no other cycle can start. So, let's make sure that this
 771   // is the case.
 772   guarantee(!g1h->mark_in_progress(), "invariant");
 773 
 774   // clear the mark bitmap (no grey objects to start with).
 775   // We need to do this in chunks and offer to yield in between
 776   // each chunk.
 777   HeapWord* start  = _nextMarkBitMap->startWord();
 778   HeapWord* end    = _nextMarkBitMap->endWord();
 779   HeapWord* cur    = start;
 780   size_t chunkSize = M;
 781   while (cur < end) {
 782     HeapWord* next = cur + chunkSize;
 783     if (next > end) {
 784       next = end;
 785     }
 786     MemRegion mr(cur,next);
 787     _nextMarkBitMap->clearRange(mr);
 788     cur = next;
 789     do_yield_check();
 790 
 791     // Repeat the asserts from above. We'll do them as asserts here to
 792     // minimize their overhead on the product. However, we'll have
 793     // them as guarantees at the beginning / end of the bitmap
 794     // clearing to get some checking in the product.
 795     assert(cmThread()->during_cycle(), "invariant");
 796     assert(!g1h->mark_in_progress(), "invariant");
 797   }
 798 
 799   // Repeat the asserts from above.
 800   guarantee(cmThread()->during_cycle(), "invariant");
 801   guarantee(!g1h->mark_in_progress(), "invariant");
 802 }
 803 
 804 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 805 public:
 806   bool doHeapRegion(HeapRegion* r) {
 807     if (!r->continuesHumongous()) {
 808       r->note_start_of_marking(true);
 809     }
 810     return false;
 811   }
 812 };
 813 
 814 void ConcurrentMark::checkpointRootsInitialPre() {
 815   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 816   G1CollectorPolicy* g1p = g1h->g1_policy();
 817 
 818   _has_aborted = false;
 819 
 820 #ifndef PRODUCT
 821   if (G1PrintReachableAtInitialMark) {
 822     print_reachable("at-cycle-start",
 823                     VerifyOption_G1UsePrevMarking, true /* all */);
 824   }
 825 #endif
 826 
 827   // Initialise marking structures. This has to be done in a STW phase.
 828   reset();
 829 }
 830 
 831 
 832 void ConcurrentMark::checkpointRootsInitialPost() {
 833   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 834 
 835   // If we force an overflow during remark, the remark operation will
 836   // actually abort and we'll restart concurrent marking. If we always
 837   // force an oveflow during remark we'll never actually complete the
 838   // marking phase. So, we initilize this here, at the start of the
 839   // cycle, so that at the remaining overflow number will decrease at
 840   // every remark and we'll eventually not need to cause one.
 841   force_overflow_stw()->init();
 842 
 843   // For each region note start of marking.
 844   NoteStartOfMarkHRClosure startcl;
 845   g1h->heap_region_iterate(&startcl);
 846 
 847   // Start Concurrent Marking weak-reference discovery.
 848   ReferenceProcessor* rp = g1h->ref_processor_cm();
 849   // enable ("weak") refs discovery
 850   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 851   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 852 
 853   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 854   // This is the start of  the marking cycle, we're expected all
 855   // threads to have SATB queues with active set to false.
 856   satb_mq_set.set_active_all_threads(true, /* new active value */
 857                                      false /* expected_active */);
 858 
 859   // update_g1_committed() will be called at the end of an evac pause
 860   // when marking is on. So, it's also called at the end of the
 861   // initial-mark pause to update the heap end, if the heap expands
 862   // during it. No need to call it here.
 863 }
 864 
 865 /*
 866  * Notice that in the next two methods, we actually leave the STS
 867  * during the barrier sync and join it immediately afterwards. If we
 868  * do not do this, the following deadlock can occur: one thread could
 869  * be in the barrier sync code, waiting for the other thread to also
 870  * sync up, whereas another one could be trying to yield, while also
 871  * waiting for the other threads to sync up too.
 872  *
 873  * Note, however, that this code is also used during remark and in
 874  * this case we should not attempt to leave / enter the STS, otherwise
 875  * we'll either hit an asseert (debug / fastdebug) or deadlock
 876  * (product). So we should only leave / enter the STS if we are
 877  * operating concurrently.
 878  *
 879  * Because the thread that does the sync barrier has left the STS, it
 880  * is possible to be suspended for a Full GC or an evacuation pause
 881  * could occur. This is actually safe, since the entering the sync
 882  * barrier is one of the last things do_marking_step() does, and it
 883  * doesn't manipulate any data structures afterwards.
 884  */
 885 
 886 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 887   if (verbose_low()) {
 888     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 889   }
 890 
 891   if (concurrent()) {
 892     ConcurrentGCThread::stsLeave();
 893   }
 894   _first_overflow_barrier_sync.enter();
 895   if (concurrent()) {
 896     ConcurrentGCThread::stsJoin();
 897   }
 898   // at this point everyone should have synced up and not be doing any
 899   // more work
 900 
 901   if (verbose_low()) {
 902     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 903   }
 904 
 905   // let task 0 do this
 906   if (task_num == 0) {
 907     // task 0 is responsible for clearing the global data structures
 908     // We should be here because of an overflow. During STW we should
 909     // not clear the overflow flag since we rely on it being true when
 910     // we exit this method to abort the pause and restart concurent
 911     // marking.
 912     clear_marking_state(concurrent() /* clear_overflow */);
 913     force_overflow()->update();
 914 
 915     if (PrintGC) {
 916       gclog_or_tty->date_stamp(PrintGCDateStamps);
 917       gclog_or_tty->stamp(PrintGCTimeStamps);
 918       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 919     }
 920   }
 921 
 922   // after this, each task should reset its own data structures then
 923   // then go into the second barrier
 924 }
 925 
 926 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 927   if (verbose_low()) {
 928     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 929   }
 930 
 931   if (concurrent()) {
 932     ConcurrentGCThread::stsLeave();
 933   }
 934   _second_overflow_barrier_sync.enter();
 935   if (concurrent()) {
 936     ConcurrentGCThread::stsJoin();
 937   }
 938   // at this point everything should be re-initialised and ready to go
 939 
 940   if (verbose_low()) {
 941     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 942   }
 943 }
 944 
 945 #ifndef PRODUCT
 946 void ForceOverflowSettings::init() {
 947   _num_remaining = G1ConcMarkForceOverflow;
 948   _force = false;
 949   update();
 950 }
 951 
 952 void ForceOverflowSettings::update() {
 953   if (_num_remaining > 0) {
 954     _num_remaining -= 1;
 955     _force = true;
 956   } else {
 957     _force = false;
 958   }
 959 }
 960 
 961 bool ForceOverflowSettings::should_force() {
 962   if (_force) {
 963     _force = false;
 964     return true;
 965   } else {
 966     return false;
 967   }
 968 }
 969 #endif // !PRODUCT
 970 
 971 void ConcurrentMark::grayRoot(oop p, int worker_i) {
 972   HeapWord* addr = (HeapWord*) p;
 973   // We can't really check against _heap_start and _heap_end, since it
 974   // is possible during an evacuation pause with piggy-backed
 975   // initial-mark that the committed space is expanded during the
 976   // pause without CM observing this change. So the assertions below
 977   // is a bit conservative; but better than nothing.
 978   assert(_g1h->g1_committed().contains(addr),
 979          "address should be within the heap bounds");
 980 
 981   if (!_nextMarkBitMap->isMarked(addr)) {
 982     par_mark_and_count(p, worker_i);
 983   }
 984 }
 985 
 986 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 987   // The objects on the region have already been marked "in bulk" by
 988   // the caller. We only need to decide whether to push the region on
 989   // the region stack or not.
 990 
 991   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 992     // We're done with marking and waiting for remark. We do not need to
 993     // push anything else on the region stack.
 994     return;
 995   }
 996 
 997   HeapWord* finger = _finger;
 998 
 999   if (verbose_low()) {
1000     gclog_or_tty->print_cr("[global] attempting to push "
1001                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1002                            PTR_FORMAT, mr.start(), mr.end(), finger);
1003   }
1004 
1005   if (mr.start() < finger) {
1006     // The finger is always heap region aligned and it is not possible
1007     // for mr to span heap regions.
1008     assert(mr.end() <= finger, "invariant");
1009 
1010     // Separated the asserts so that we know which one fires.
1011     assert(mr.start() <= mr.end(),
1012            "region boundaries should fall within the committed space");
1013     assert(_heap_start <= mr.start(),
1014            "region boundaries should fall within the committed space");
1015     assert(mr.end() <= _heap_end,
1016            "region boundaries should fall within the committed space");
1017     if (verbose_low()) {
1018       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1019                              "below the finger, pushing it",
1020                              mr.start(), mr.end());
1021     }
1022 
1023     if (!region_stack_push_lock_free(mr)) {
1024       if (verbose_low()) {
1025         gclog_or_tty->print_cr("[global] region stack has overflown.");
1026       }
1027     }
1028   }
1029 }
1030 
1031 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p, int worker_i) {
1032   // The object is not marked by the caller. We need to at least mark
1033   // it and maybe push in on the stack.
1034 
1035   HeapWord* addr = (HeapWord*)p;
1036   if (!_nextMarkBitMap->isMarked(addr)) {
1037     // We definitely need to mark it, irrespective whether we bail out
1038     // because we're done with marking.
1039 
1040     if (par_mark_and_count(p, worker_i)) {
1041       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1042         // If we're done with concurrent marking and we're waiting for
1043         // remark, then we're not pushing anything on the stack.
1044         return;
1045       }
1046 
1047       // No OrderAccess:store_load() is needed. It is implicit in the
1048       // CAS done in parMark(addr) above
1049       HeapWord* finger = _finger;
1050 
1051       if (addr < finger) {
1052         if (!mark_stack_push(oop(addr))) {
1053           if (verbose_low()) {
1054             gclog_or_tty->print_cr("[global] global stack overflow "
1055                                    "during parMark");
1056           }
1057         }
1058       }
1059     }
1060   }
1061 }
1062 
1063 class CMConcurrentMarkingTask: public AbstractGangTask {
1064 private:
1065   ConcurrentMark*       _cm;
1066   ConcurrentMarkThread* _cmt;
1067 
1068 public:
1069   void work(int worker_i) {
1070     assert(Thread::current()->is_ConcurrentGC_thread(),
1071            "this should only be done by a conc GC thread");
1072     ResourceMark rm;
1073 
1074     double start_vtime = os::elapsedVTime();
1075 
1076     ConcurrentGCThread::stsJoin();
1077 
1078     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1079     CMTask* the_task = _cm->task(worker_i);
1080     the_task->record_start_time();
1081     if (!_cm->has_aborted()) {
1082       do {
1083         double start_vtime_sec = os::elapsedVTime();
1084         double start_time_sec = os::elapsedTime();
1085         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1086 
1087         the_task->do_marking_step(mark_step_duration_ms,
1088                                   true /* do_stealing    */,
1089                                   true /* do_termination */);
1090 
1091         double end_time_sec = os::elapsedTime();
1092         double end_vtime_sec = os::elapsedVTime();
1093         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1094         double elapsed_time_sec = end_time_sec - start_time_sec;
1095         _cm->clear_has_overflown();
1096 
1097         bool ret = _cm->do_yield_check(worker_i);
1098 
1099         jlong sleep_time_ms;
1100         if (!_cm->has_aborted() && the_task->has_aborted()) {
1101           sleep_time_ms =
1102             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1103           ConcurrentGCThread::stsLeave();
1104           os::sleep(Thread::current(), sleep_time_ms, false);
1105           ConcurrentGCThread::stsJoin();
1106         }
1107         double end_time2_sec = os::elapsedTime();
1108         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1109 
1110 #if 0
1111           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1112                                  "overhead %1.4lf",
1113                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1114                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1115           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1116                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1117 #endif
1118       } while (!_cm->has_aborted() && the_task->has_aborted());
1119     }
1120     the_task->record_end_time();
1121     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1122 
1123     ConcurrentGCThread::stsLeave();
1124 
1125     double end_vtime = os::elapsedVTime();
1126     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1127   }
1128 
1129   CMConcurrentMarkingTask(ConcurrentMark* cm,
1130                           ConcurrentMarkThread* cmt) :
1131       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1132 
1133   ~CMConcurrentMarkingTask() { }
1134 };
1135 
1136 // Calculates the number of active workers for a concurrent
1137 // phase.
1138 size_t ConcurrentMark::calc_parallel_marking_threads() {
1139   if (G1CollectedHeap::use_parallel_gc_threads()) {
1140     size_t n_conc_workers = 0;
1141     if (!UseDynamicNumberOfGCThreads ||
1142         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1143          !ForceDynamicNumberOfGCThreads)) {
1144       n_conc_workers = max_parallel_marking_threads();
1145     } else {
1146       n_conc_workers =
1147         AdaptiveSizePolicy::calc_default_active_workers(
1148                                      max_parallel_marking_threads(),
1149                                      1, /* Minimum workers */
1150                                      parallel_marking_threads(),
1151                                      Threads::number_of_non_daemon_threads());
1152       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1153       // that scaling has already gone into "_max_parallel_marking_threads".
1154     }
1155     assert(n_conc_workers > 0, "Always need at least 1");
1156     return n_conc_workers;
1157   }
1158   // If we are not running with any parallel GC threads we will not
1159   // have spawned any marking threads either. Hence the number of
1160   // concurrent workers should be 0.
1161   return 0;
1162 }
1163 
1164 void ConcurrentMark::markFromRoots() {
1165   // we might be tempted to assert that:
1166   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1167   //        "inconsistent argument?");
1168   // However that wouldn't be right, because it's possible that
1169   // a safepoint is indeed in progress as a younger generation
1170   // stop-the-world GC happens even as we mark in this generation.
1171 
1172   _restart_for_overflow = false;
1173   force_overflow_conc()->init();
1174 
1175   // _g1h has _n_par_threads
1176   _parallel_marking_threads = calc_parallel_marking_threads();
1177   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1178     "Maximum number of marking threads exceeded");
1179 
1180   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1181 
1182   // Parallel task terminator is set in "set_phase()"
1183   set_phase(active_workers, true /* concurrent */);
1184 
1185   CMConcurrentMarkingTask markingTask(this, cmThread());
1186   if (parallel_marking_threads() > 0) {
1187     _parallel_workers->set_active_workers((int)active_workers);
1188     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1189     // and the decisions on that MT processing is made elsewhere.
1190     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1191     _parallel_workers->run_task(&markingTask);
1192   } else {
1193     markingTask.work(0);
1194   }
1195   print_stats();
1196 }
1197 
1198 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1199   // world is stopped at this checkpoint
1200   assert(SafepointSynchronize::is_at_safepoint(),
1201          "world should be stopped");
1202 
1203   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1204 
1205   // If a full collection has happened, we shouldn't do this.
1206   if (has_aborted()) {
1207     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1208     return;
1209   }
1210 
1211   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1212 
1213   if (VerifyDuringGC) {
1214     HandleMark hm;  // handle scope
1215     gclog_or_tty->print(" VerifyDuringGC:(before)");
1216     Universe::heap()->prepare_for_verify();
1217     Universe::verify(/* allow dirty */ true,
1218                      /* silent      */ false,
1219                      /* option      */ VerifyOption_G1UsePrevMarking);
1220   }
1221 
1222   G1CollectorPolicy* g1p = g1h->g1_policy();
1223   g1p->record_concurrent_mark_remark_start();
1224 
1225   double start = os::elapsedTime();
1226 
1227   checkpointRootsFinalWork();
1228 
1229   double mark_work_end = os::elapsedTime();
1230 
1231   weakRefsWork(clear_all_soft_refs);
1232 
1233   if (has_overflown()) {
1234     // Oops.  We overflowed.  Restart concurrent marking.
1235     _restart_for_overflow = true;
1236     // Clear the flag. We do not need it any more.
1237     clear_has_overflown();
1238     if (G1TraceMarkStackOverflow) {
1239       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1240     }
1241   } else {
1242     // Aggregate the per-task counting data that we have accumulated
1243     // while marking.
1244     aggregate_and_clear_count_data();
1245 
1246     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1247     // We're done with marking.
1248     // This is the end of  the marking cycle, we're expected all
1249     // threads to have SATB queues with active set to true.
1250     satb_mq_set.set_active_all_threads(false, /* new active value */
1251                                        true /* expected_active */);
1252 
1253     if (VerifyDuringGC) {
1254 
1255       HandleMark hm;  // handle scope
1256       gclog_or_tty->print(" VerifyDuringGC:(after)");
1257       Universe::heap()->prepare_for_verify();
1258       Universe::verify(/* allow dirty */ true,
1259                        /* silent      */ false,
1260                        /* option      */ VerifyOption_G1UseNextMarking);
1261     }
1262     assert(!restart_for_overflow(), "sanity");
1263   }
1264 
1265   // Reset the marking state if marking completed
1266   if (!restart_for_overflow()) {
1267     set_non_marking_state();
1268   }
1269 
1270 #if VERIFY_OBJS_PROCESSED
1271   _scan_obj_cl.objs_processed = 0;
1272   ThreadLocalObjQueue::objs_enqueued = 0;
1273 #endif
1274 
1275   // Statistics
1276   double now = os::elapsedTime();
1277   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1278   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1279   _remark_times.add((now - start) * 1000.0);
1280 
1281   g1p->record_concurrent_mark_remark_end();
1282 }
1283 
1284 #define CARD_BM_TEST_MODE 0
1285 
1286 // Used to calculate the # live objects per region
1287 // for verification purposes
1288 class CalcLiveObjectsClosure: public HeapRegionClosure {
1289 
1290   CMBitMapRO* _bm;
1291   ConcurrentMark* _cm;
1292   BitMap* _region_bm;
1293   BitMap* _card_bm;
1294 
1295   size_t _tot_words_done;
1296   size_t _tot_live;
1297   size_t _tot_used;
1298 
1299   size_t _region_marked_bytes;
1300 
1301   intptr_t _bottom_card_num;
1302 
1303   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1304     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1305     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1306     
1307     for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1308 #if CARD_BM_TEST_MODE
1309       guarantee(_card_bm->at(i), "Should already be set.");
1310 #else
1311       _card_bm->par_at_put(i, 1);
1312 #endif
1313     }
1314   }
1315 
1316 public:
1317   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1318                          BitMap* region_bm, BitMap* card_bm) :
1319     _bm(bm), _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1320     _region_marked_bytes(0), _tot_words_done(0),
1321     _tot_live(0), _tot_used(0)
1322   {
1323     _bottom_card_num = cm->heap_bottom_card_num();
1324   }
1325 
1326   // It takes a region that's not empty (i.e., it has at least one
1327   // live object in it and sets its corresponding bit on the region
1328   // bitmap to 1. If the region is "starts humongous" it will also set
1329   // to 1 the bits on the region bitmap that correspond to its
1330   // associated "continues humongous" regions.
1331   void set_bit_for_region(HeapRegion* hr) {
1332     assert(!hr->continuesHumongous(), "should have filtered those out");
1333 
1334     size_t index = hr->hrs_index();
1335     if (!hr->startsHumongous()) {
1336       // Normal (non-humongous) case: just set the bit.
1337       _region_bm->par_at_put((BitMap::idx_t) index, true);
1338     } else {
1339       // Starts humongous case: calculate how many regions are part of
1340       // this humongous region and then set the bit range.
1341       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1342       HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1343       size_t end_index = last_hr->hrs_index() + 1;
1344       _region_bm->par_at_put_range((BitMap::idx_t) index,
1345                                    (BitMap::idx_t) end_index, true);
1346     }
1347   }
1348 
1349   bool doHeapRegion(HeapRegion* hr) {
1350 
1351     if (hr->continuesHumongous()) {
1352       // We will ignore these here and process them when their
1353       // associated "starts humongous" region is processed (see
1354       // set_bit_for_heap_region()). Note that we cannot rely on their
1355       // associated "starts humongous" region to have their bit set to
1356       // 1 since, due to the region chunking in the parallel region
1357       // iteration, a "continues humongous" region might be visited
1358       // before its associated "starts humongous".
1359       return false;
1360     }
1361 
1362     HeapWord* nextTop = hr->next_top_at_mark_start();
1363     HeapWord* start   = hr->bottom();
1364 
1365     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1366                 "Preconditions.");
1367 
1368     // Record the number of word's we'll examine.
1369     size_t words_done = (nextTop - start);
1370 
1371     // Find the first marked object at or after "start".
1372     start = _bm->getNextMarkedWordAddress(start, nextTop);
1373 
1374     size_t marked_bytes = 0;
1375     _region_marked_bytes = 0;
1376 
1377     // Below, the term "card num" means the result of shifting an address
1378     // by the card shift -- address 0 corresponds to card number 0.  One
1379     // must subtract the card num of the bottom of the heap to obtain a
1380     // card table index.
1381 
1382     // The first card num of the sequence of live cards currently being
1383     // constructed.  -1 ==> no sequence.
1384     intptr_t start_card_num = -1;
1385 
1386     // The last card num of the sequence of live cards currently being
1387     // constructed.  -1 ==> no sequence.
1388     intptr_t last_card_num = -1;
1389 
1390     while (start < nextTop) {
1391       oop obj = oop(start);
1392       int obj_sz = obj->size();
1393 
1394       // The card num of the start of the current object.
1395       intptr_t obj_card_num =
1396         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1397       HeapWord* obj_last = start + obj_sz - 1;
1398       intptr_t obj_last_card_num =
1399         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 
1401       if (obj_card_num != last_card_num) {
1402         if (start_card_num == -1) {
1403           assert(last_card_num == -1, "Both or neither.");
1404           start_card_num = obj_card_num;
1405         } else {
1406           assert(last_card_num != -1, "Both or neither.");
1407           assert(obj_card_num >= last_card_num, "Inv");
1408           if ((obj_card_num - last_card_num) > 1) {
1409             // Mark the last run, and start a new one.
1410             mark_card_num_range(start_card_num, last_card_num);
1411             start_card_num = obj_card_num;
1412           }
1413         }
1414 #if CARD_BM_TEST_MODE
1415         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1416           _card_bm->par_at_put(j - _bottom_card_num, 1);
1417         }
1418 #endif // CARD_BM_TEST_MODE
1419       }
1420       // In any case, we set the last card num.
1421       last_card_num = obj_last_card_num;
1422 
1423       marked_bytes += (size_t)obj_sz * HeapWordSize;
1424 
1425       // Find the next marked object after this one.
1426       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1427     }
1428 
1429     // Handle the last range, if any.
1430     if (start_card_num != -1) {
1431       mark_card_num_range(start_card_num, last_card_num);
1432     }
1433 
1434     // Mark the allocated-since-marking portion...
1435     HeapWord* top = hr->top();
1436     if (nextTop < top) {
1437       start_card_num = intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1438       last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1439 
1440       mark_card_num_range(start_card_num, last_card_num);
1441       
1442       // This definitely means the region has live objects.
1443       set_bit_for_region(hr);
1444     }
1445 
1446     // Update the live region bitmap.
1447     if (marked_bytes > 0) {
1448       set_bit_for_region(hr);
1449     }
1450 
1451     // Set the marked bytes for the current region so that
1452     // it can be queried by a calling verificiation routine
1453     _region_marked_bytes = marked_bytes;
1454 
1455     _tot_live += hr->next_live_bytes();
1456     _tot_used += hr->used();
1457     _tot_words_done = words_done;
1458 
1459     return false;
1460   }
1461 
1462   size_t region_marked_bytes() const { return _region_marked_bytes; }
1463   size_t tot_words_done() const      { return _tot_words_done; }
1464   size_t tot_live() const            { return _tot_live; }
1465   size_t tot_used() const            { return _tot_used; }
1466 };
1467 
1468 // Heap region closure used for verifying the counting data
1469 // that was accumulated concurrently and aggregated during
1470 // the remark pause. This closure is applied to the heap
1471 // regions during the STW cleanup pause.
1472 
1473 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1474   ConcurrentMark* _cm;
1475   CalcLiveObjectsClosure _calc_cl;
1476   BitMap* _region_bm;   // Region BM to be verified
1477   BitMap* _card_bm;     // Card BM to be verified
1478   bool _verbose;        // verbose output?
1479 
1480   BitMap* _exp_region_bm; // Expected Region BM values
1481   BitMap* _exp_card_bm;   // Expected card BM values
1482 
1483   intptr_t _bottom_card_num; // Used for calculatint bitmap indices
1484 
1485   int _failures;
1486 
1487 public:
1488   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1489                                 BitMap* region_bm,
1490                                 BitMap* card_bm,
1491                                 BitMap* exp_region_bm,
1492                                 BitMap* exp_card_bm,
1493                                 bool verbose) :
1494     _cm(cm),
1495     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1496     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1497     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1498     _failures(0)
1499   { 
1500     _bottom_card_num = cm->heap_bottom_card_num();
1501   }
1502 
1503   int failures() const { return _failures; }
1504 
1505   bool doHeapRegion(HeapRegion* hr) {
1506     if (hr->continuesHumongous()) {
1507       // We will ignore these here and process them when their
1508       // associated "starts humongous" region is processed (see
1509       // set_bit_for_heap_region()). Note that we cannot rely on their
1510       // associated "starts humongous" region to have their bit set to
1511       // 1 since, due to the region chunking in the parallel region
1512       // iteration, a "continues humongous" region might be visited
1513       // before its associated "starts humongous".
1514       return false;
1515     }
1516 
1517     int failures = 0;
1518 
1519     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1520     // this region and set the corresponding bits in the expected region
1521     // and card bitmaps.
1522     bool res = _calc_cl.doHeapRegion(hr);
1523     assert(res == false, "should be continuing");
1524 
1525     // Note that the calculated count data could be a subset of the
1526     // count data that was accumlated during marking. See the comment
1527     // in G1ParCopyHelper::copy_to_survivor space for an explanation
1528     // why.
1529 
1530     // Verify that _top_at_conc_count == ntams
1531     if (hr->top_at_conc_mark_count() != hr->next_top_at_mark_start()) {
1532       if (_verbose) {
1533         gclog_or_tty->print_cr("Region %d: top at conc count incorrect: expected "
1534                                PTR_FORMAT", actual: "PTR_FORMAT,
1535                                hr->hrs_index(), hr->next_top_at_mark_start(),
1536                                hr->top_at_conc_mark_count());
1537       }
1538       failures += 1;
1539     }
1540 
1541     // Verify the marked bytes for this region. 
1542     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1543     size_t act_marked_bytes = hr->next_marked_bytes();
1544     
1545     // We're not OK if expected marked bytes > actual marked bytes. It means
1546     // we have missed accounting some objects during the actual marking.
1547     if (exp_marked_bytes > act_marked_bytes) {
1548       if (_verbose) {
1549         gclog_or_tty->print_cr("Region %d: marked bytes mismatch: expected: "
1550                                SIZE_FORMAT", actual: "SIZE_FORMAT,
1551                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1552       }
1553       failures += 1;
1554     }
1555 
1556     // Verify the bit, for this region, in the actual and expected
1557     // (which was just calculated) region bit maps.
1558     // We're not OK if the expected bit is set and the actual is not set.
1559     BitMap::idx_t index = (BitMap::idx_t)hr->hrs_index();
1560     
1561     bool expected = _exp_region_bm->at(index);
1562     bool actual = _region_bm->at(index);
1563     if (expected && !actual) {
1564       if (_verbose) {
1565         gclog_or_tty->print_cr("Region %d: region bitmap mismatch: expected: %d, actual: %d",
1566                                hr->hrs_index(), expected, actual);
1567       }
1568       failures += 1;
1569     }
1570 
1571     // Verify that the card bit maps for the cards spanned by the current
1572     // region match. The set of offsets that have set bits in the expected
1573     // bitmap should be a subset of the offsets with set bits from the actual
1574     // calculated card bitmap.
1575     // Again it's more important that if the expected bit is set then the
1576     // actual bit be set.
1577     intptr_t start_card_num =
1578         intptr_t(uintptr_t(hr->bottom()) >> CardTableModRefBS::card_shift);
1579     intptr_t top_card_num =
1580         intptr_t(uintptr_t(hr->top()) >> CardTableModRefBS::card_shift);
1581 
1582     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1583     BitMap::idx_t end_idx = top_card_num - _bottom_card_num;
1584 
1585     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1586       expected = _exp_card_bm->at(i);
1587       actual = _card_bm->at(i);
1588       
1589       if (expected && !actual) {
1590         if (_verbose) {
1591           gclog_or_tty->print_cr("Region %d: card bitmap mismatch at idx %d: expected: %d, actual: %d",
1592                                  hr->hrs_index(), i, expected, actual);
1593         }
1594         failures += 1;
1595       }
1596     }
1597 
1598     if (failures > 0 && _verbose)  {
1599       gclog_or_tty->print("Region %d: bottom: "PTR_FORMAT", ntams: "
1600                           PTR_FORMAT", top: "PTR_FORMAT", end: "PTR_FORMAT,
1601                           hr->hrs_index(), hr->bottom(), hr->next_top_at_mark_start(),
1602                           hr->top(), hr->end());
1603       gclog_or_tty->print_cr(", marked_bytes: calc/actual "SIZE_FORMAT"/"SIZE_FORMAT,
1604                              _calc_cl.region_marked_bytes(),
1605                              hr->next_marked_bytes());
1606     }
1607 
1608     _failures += failures;
1609 
1610     // We could stop iteration over the heap when we
1611     // find the first voilating region by returning true.
1612     return false;
1613   }
1614 };
1615 
1616 
1617 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1618 protected:
1619   G1CollectedHeap* _g1h;
1620   ConcurrentMark* _cm;
1621   BitMap* _actual_region_bm;
1622   BitMap* _actual_card_bm;
1623 
1624   size_t _n_workers;
1625 
1626   BitMap* _expected_region_bm;
1627   BitMap* _expected_card_bm;
1628 
1629   int  _failures;
1630   bool _verbose;
1631 
1632 public:
1633   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1634                             BitMap* region_bm, BitMap* card_bm,
1635                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1636     : AbstractGangTask("G1 verify final counting"),
1637       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1638       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1639       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1640       _failures(0), _verbose(false),
1641       _n_workers(0)
1642   {
1643     assert(VerifyDuringGC, "don't call this otherwise");
1644 
1645     // Use the value already set as the number of active threads
1646     // in the call to run_task().
1647     if (G1CollectedHeap::use_parallel_gc_threads()) {
1648       assert( _g1h->workers()->active_workers() > 0,
1649         "Should have been previously set");
1650       _n_workers = _g1h->workers()->active_workers();
1651     } else {
1652       _n_workers = 1;
1653     }
1654 
1655     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1656     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1657 
1658     _verbose = _cm->verbose_medium();
1659   }
1660 
1661   void work(int worker_i) {
1662     assert((size_t) worker_i < _n_workers, "invariant");
1663 
1664     VerifyLiveObjectDataHRClosure verify_cl(_cm,
1665                                             _actual_region_bm, _actual_card_bm,
1666                                             _expected_region_bm,
1667                                             _expected_card_bm,
1668                                             _verbose);
1669 
1670     if (G1CollectedHeap::use_parallel_gc_threads()) {
1671       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1672                                             worker_i,
1673                                             (int) _n_workers,
1674                                             HeapRegion::VerifyCountClaimValue);
1675     } else {
1676       _g1h->heap_region_iterate(&verify_cl);
1677     }
1678 
1679     Atomic::add(verify_cl.failures(), &_failures);
1680   }
1681 
1682   int failures() const { return _failures; }
1683 };
1684 
1685 // Final update of count data (during cleanup).
1686 // Adds [top_at_count, NTAMS) to the marked bytes for each
1687 // region. Sets the bits in the card bitmap corresponding
1688 // to the interval [top_at_count, top], and sets the
1689 // liveness bit for each region containing live data
1690 // in the region bitmap.
1691 
1692 class FinalCountDataUpdateClosure: public HeapRegionClosure {
1693   ConcurrentMark* _cm;
1694   BitMap* _region_bm;
1695   BitMap* _card_bm;
1696   intptr_t _bottom_card_num;
1697 
1698   size_t _total_live_bytes;
1699   size_t _total_used_bytes;
1700   size_t _total_words_done;
1701 
1702   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1703     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
1704     BitMap::idx_t last_idx = last_card_num - _bottom_card_num;
1705     
1706     // Inclusive bit range [start_idx, last_idx]. par_at_put_range
1707     // is exclusive so we have to also set the bit for last_idx.
1708     // Passing last_idx+1 to the clear_range would work in
1709     // most cases but could trip an OOB assertion.
1710 
1711     if ((last_idx - start_idx) > 0) {
1712       _card_bm->par_at_put_range(start_idx, last_idx, true);
1713     }
1714     _card_bm->par_set_bit(last_idx);
1715   }
1716 
1717   // It takes a region that's not empty (i.e., it has at least one
1718   // live object in it and sets its corresponding bit on the region
1719   // bitmap to 1. If the region is "starts humongous" it will also set
1720   // to 1 the bits on the region bitmap that correspond to its
1721   // associated "continues humongous" regions.
1722   void set_bit_for_region(HeapRegion* hr) {
1723     assert(!hr->continuesHumongous(), "should have filtered those out");
1724 
1725     size_t index = hr->hrs_index();
1726     if (!hr->startsHumongous()) {
1727       // Normal (non-humongous) case: just set the bit.
1728       _region_bm->par_set_bit((BitMap::idx_t) index);
1729     } else {
1730       // Starts humongous case: calculate how many regions are part of
1731       // this humongous region and then set the bit range.
1732       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1733       HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1734       size_t end_index = last_hr->hrs_index() + 1;
1735       _region_bm->par_at_put_range((BitMap::idx_t) index,
1736                                    (BitMap::idx_t) end_index, true);
1737     }
1738   }
1739 
1740  public:
1741   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1742                               BitMap* region_bm,
1743                               BitMap* card_bm) :
1744     _cm(cm), _region_bm(region_bm), _card_bm(card_bm),
1745     _total_words_done(0), _total_live_bytes(0), _total_used_bytes(0)
1746   {
1747     _bottom_card_num = cm->heap_bottom_card_num();
1748   }
1749 
1750   bool doHeapRegion(HeapRegion* hr) {
1751 
1752     if (hr->continuesHumongous()) {
1753       // We will ignore these here and process them when their
1754       // associated "starts humongous" region is processed (see
1755       // set_bit_for_heap_region()). Note that we cannot rely on their
1756       // associated "starts humongous" region to have their bit set to
1757       // 1 since, due to the region chunking in the parallel region
1758       // iteration, a "continues humongous" region might be visited
1759       // before its associated "starts humongous".
1760       return false;
1761     }
1762 
1763     HeapWord* start = hr->top_at_conc_mark_count();
1764     HeapWord* ntams = hr->next_top_at_mark_start();
1765     HeapWord* top   = hr->top();
1766     
1767     assert(hr->bottom() <= start && start <= hr->end() &&
1768            hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1769     
1770     size_t words_done = ntams - hr->bottom();
1771 
1772     intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1773     intptr_t last_card_num = intptr_t(uintptr_t(top) >> CardTableModRefBS::card_shift);
1774 
1775 
1776     if (start < ntams) {
1777       // Region was changed between remark and cleanup pauses
1778       // We need to add (ntams - start) to the marked bytes
1779       // for this region, and set bits for the range
1780       // [ card_num(start), card_num(ntams) ) in the
1781       // card bitmap.
1782       size_t live_bytes = (ntams - start) * HeapWordSize;
1783       hr->add_to_marked_bytes(live_bytes);
1784       
1785       // Record the new top at conc count
1786       hr->set_top_at_conc_mark_count(ntams);
1787 
1788       // The setting of the bits card bitmap takes place below
1789     }
1790 
1791     // Mark the allocated-since-marking portion...
1792     if (ntams < top) {
1793       // This definitely means the region has live objects.
1794       set_bit_for_region(hr);
1795     }
1796 
1797     // Now set the bits for [start, top]
1798     mark_card_num_range(start_card_num, last_card_num);
1799 
1800     // Set the bit for the region if it contains live data
1801     if (hr->next_marked_bytes() > 0) {
1802       set_bit_for_region(hr);
1803     }
1804 
1805     _total_words_done += words_done;
1806     _total_used_bytes += hr->used();
1807     _total_live_bytes += hr->next_marked_bytes();
1808 
1809     return false;
1810   }
1811 
1812   size_t total_words_done() const { return _total_words_done; }
1813   size_t total_live_bytes() const { return _total_live_bytes; }
1814   size_t total_used_bytes() const { return _total_used_bytes; }
1815 };
1816 
1817 class G1ParFinalCountTask: public AbstractGangTask {
1818 protected:
1819   G1CollectedHeap* _g1h;
1820   ConcurrentMark* _cm;
1821   BitMap* _actual_region_bm;
1822   BitMap* _actual_card_bm;
1823   
1824   size_t _n_workers;
1825 
1826   size_t *_live_bytes;
1827   size_t *_used_bytes;
1828 
1829 public:
1830   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1831     : AbstractGangTask("G1 final counting"),
1832       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1833       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1834       _n_workers(0)
1835   {
1836     // Use the value already set as the number of active threads
1837     // in the call to run_task().  Needed for the allocation of
1838     // _live_bytes and _used_bytes.
1839     if (G1CollectedHeap::use_parallel_gc_threads()) {
1840       assert( _g1h->workers()->active_workers() > 0,
1841         "Should have been previously set");
1842       _n_workers = _g1h->workers()->active_workers();
1843     } else {
1844       _n_workers = 1;
1845     }
1846 
1847     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1848     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1849   }
1850 
1851   ~G1ParFinalCountTask() {
1852     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1853     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1854   }
1855 
1856   void work(int worker_i) {
1857     assert((size_t) worker_i < _n_workers, "invariant");
1858 
1859     FinalCountDataUpdateClosure final_update_cl(_cm,
1860                                                 _actual_region_bm,
1861                                                 _actual_card_bm);
1862 
1863     if (G1CollectedHeap::use_parallel_gc_threads()) {
1864       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1865                                             worker_i,
1866                                             (int) _n_workers,
1867                                             HeapRegion::FinalCountClaimValue);
1868     } else {
1869       _g1h->heap_region_iterate(&final_update_cl);
1870     }
1871 
1872     _live_bytes[worker_i] = final_update_cl.total_live_bytes();
1873     _used_bytes[worker_i] = final_update_cl.total_used_bytes();
1874   }
1875 
1876   size_t live_bytes()  {
1877     size_t live_bytes = 0;
1878     for (size_t i = 0; i < _n_workers; ++i)
1879       live_bytes += _live_bytes[i];
1880     return live_bytes;
1881   }
1882 
1883   size_t used_bytes()  {
1884     size_t used_bytes = 0;
1885     for (size_t i = 0; i < _n_workers; ++i)
1886       used_bytes += _used_bytes[i];
1887     return used_bytes;
1888   }
1889 };
1890 
1891 class G1ParNoteEndTask;
1892 
1893 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1894   G1CollectedHeap* _g1;
1895   int _worker_num;
1896   size_t _max_live_bytes;
1897   size_t _regions_claimed;
1898   size_t _freed_bytes;
1899   FreeRegionList* _local_cleanup_list;
1900   OldRegionSet* _old_proxy_set;
1901   HumongousRegionSet* _humongous_proxy_set;
1902   HRRSCleanupTask* _hrrs_cleanup_task;
1903   double _claimed_region_time;
1904   double _max_region_time;
1905 
1906 public:
1907   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1908                              int worker_num,
1909                              FreeRegionList* local_cleanup_list,
1910                              OldRegionSet* old_proxy_set,
1911                              HumongousRegionSet* humongous_proxy_set,
1912                              HRRSCleanupTask* hrrs_cleanup_task) :
1913     _g1(g1), _worker_num(worker_num),
1914     _max_live_bytes(0), _regions_claimed(0),
1915     _freed_bytes(0),
1916     _claimed_region_time(0.0), _max_region_time(0.0),
1917     _local_cleanup_list(local_cleanup_list),
1918     _old_proxy_set(old_proxy_set),
1919     _humongous_proxy_set(humongous_proxy_set),
1920     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1921 
1922   size_t freed_bytes() { return _freed_bytes; }
1923 
1924   bool doHeapRegion(HeapRegion *hr) {
1925     // We use a claim value of zero here because all regions
1926     // were claimed with value 1 in the FinalCount task.
1927     hr->reset_gc_time_stamp();
1928     if (!hr->continuesHumongous()) {
1929       double start = os::elapsedTime();
1930       _regions_claimed++;
1931       hr->note_end_of_marking();
1932       _max_live_bytes += hr->max_live_bytes();
1933       _g1->free_region_if_empty(hr,
1934                                 &_freed_bytes,
1935                                 _local_cleanup_list,
1936                                 _old_proxy_set,
1937                                 _humongous_proxy_set,
1938                                 _hrrs_cleanup_task,
1939                                 true /* par */);
1940       double region_time = (os::elapsedTime() - start);
1941       _claimed_region_time += region_time;
1942       if (region_time > _max_region_time) {
1943         _max_region_time = region_time;
1944       }
1945     }
1946     return false;
1947   }
1948 
1949   size_t max_live_bytes() { return _max_live_bytes; }
1950   size_t regions_claimed() { return _regions_claimed; }
1951   double claimed_region_time_sec() { return _claimed_region_time; }
1952   double max_region_time_sec() { return _max_region_time; }
1953 };
1954 
1955 class G1ParNoteEndTask: public AbstractGangTask {
1956   friend class G1NoteEndOfConcMarkClosure;
1957 
1958 protected:
1959   G1CollectedHeap* _g1h;
1960   size_t _max_live_bytes;
1961   size_t _freed_bytes;
1962   FreeRegionList* _cleanup_list;
1963 
1964 public:
1965   G1ParNoteEndTask(G1CollectedHeap* g1h,
1966                    FreeRegionList* cleanup_list) :
1967     AbstractGangTask("G1 note end"), _g1h(g1h),
1968     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1969 
1970   void work(int i) {
1971     double start = os::elapsedTime();
1972     FreeRegionList local_cleanup_list("Local Cleanup List");
1973     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1974     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1975     HRRSCleanupTask hrrs_cleanup_task;
1976     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1977                                            &old_proxy_set,
1978                                            &humongous_proxy_set,
1979                                            &hrrs_cleanup_task);
1980     if (G1CollectedHeap::use_parallel_gc_threads()) {
1981       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1982                                             _g1h->workers()->active_workers(),
1983                                             HeapRegion::NoteEndClaimValue);
1984     } else {
1985       _g1h->heap_region_iterate(&g1_note_end);
1986     }
1987     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1988 
1989     // Now update the lists
1990     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1991                                             NULL /* free_list */,
1992                                             &old_proxy_set,
1993                                             &humongous_proxy_set,
1994                                             true /* par */);
1995     {
1996       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1997       _max_live_bytes += g1_note_end.max_live_bytes();
1998       _freed_bytes += g1_note_end.freed_bytes();
1999 
2000       // If we iterate over the global cleanup list at the end of
2001       // cleanup to do this printing we will not guarantee to only
2002       // generate output for the newly-reclaimed regions (the list
2003       // might not be empty at the beginning of cleanup; we might
2004       // still be working on its previous contents). So we do the
2005       // printing here, before we append the new regions to the global
2006       // cleanup list.
2007 
2008       G1HRPrinter* hr_printer = _g1h->hr_printer();
2009       if (hr_printer->is_active()) {
2010         HeapRegionLinkedListIterator iter(&local_cleanup_list);
2011         while (iter.more_available()) {
2012           HeapRegion* hr = iter.get_next();
2013           hr_printer->cleanup(hr);
2014         }
2015       }
2016 
2017       _cleanup_list->add_as_tail(&local_cleanup_list);
2018       assert(local_cleanup_list.is_empty(), "post-condition");
2019 
2020       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
2021     }
2022     double end = os::elapsedTime();
2023     if (G1PrintParCleanupStats) {
2024       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
2025                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
2026                           i, start, end, (end-start)*1000.0,
2027                           g1_note_end.regions_claimed(),
2028                           g1_note_end.claimed_region_time_sec()*1000.0,
2029                           g1_note_end.max_region_time_sec()*1000.0);
2030     }
2031   }
2032   size_t max_live_bytes() { return _max_live_bytes; }
2033   size_t freed_bytes() { return _freed_bytes; }
2034 };
2035 
2036 class G1ParScrubRemSetTask: public AbstractGangTask {
2037 protected:
2038   G1RemSet* _g1rs;
2039   BitMap* _region_bm;
2040   BitMap* _card_bm;
2041 public:
2042   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
2043                        BitMap* region_bm, BitMap* card_bm) :
2044     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
2045     _region_bm(region_bm), _card_bm(card_bm)
2046   {}
2047 
2048   void work(int i) {
2049     if (G1CollectedHeap::use_parallel_gc_threads()) {
2050       _g1rs->scrub_par(_region_bm, _card_bm, i,
2051                        HeapRegion::ScrubRemSetClaimValue);
2052     } else {
2053       _g1rs->scrub(_region_bm, _card_bm);
2054     }
2055   }
2056 
2057 };
2058 
2059 void ConcurrentMark::cleanup() {
2060   // world is stopped at this checkpoint
2061   assert(SafepointSynchronize::is_at_safepoint(),
2062          "world should be stopped");
2063   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2064 
2065   // If a full collection has happened, we shouldn't do this.
2066   if (has_aborted()) {
2067     g1h->set_marking_complete(); // So bitmap clearing isn't confused
2068     return;
2069   }
2070 
2071   HRSPhaseSetter x(HRSPhaseCleanup);
2072   g1h->verify_region_sets_optional();
2073 
2074   if (VerifyDuringGC) {
2075     HandleMark hm;  // handle scope
2076     gclog_or_tty->print(" VerifyDuringGC:(before)");
2077     Universe::heap()->prepare_for_verify();
2078     Universe::verify(/* allow dirty */ true,
2079                      /* silent      */ false,
2080                      /* option      */ VerifyOption_G1UsePrevMarking);
2081   }
2082 
2083   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2084   g1p->record_concurrent_mark_cleanup_start();
2085 
2086   double start = os::elapsedTime();
2087 
2088   HeapRegionRemSet::reset_for_cleanup_tasks();
2089 
2090   // Clear the global region bitmap - it will be filled as part
2091   // of the final counting task.
2092   _region_bm.clear();
2093 
2094   size_t n_workers;
2095 
2096   // Do counting once more with the world stopped for good measure.
2097   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2098 
2099   if (G1CollectedHeap::use_parallel_gc_threads()) {
2100    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2101            "sanity check");
2102 
2103     g1h->set_par_threads();
2104     n_workers = g1h->n_par_threads();
2105     assert(g1h->n_par_threads() == (int) n_workers,
2106            "Should not have been reset");
2107     g1h->workers()->run_task(&g1_par_count_task);
2108     // Done with the parallel phase so reset to 0.
2109     g1h->set_par_threads(0);
2110 
2111     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2112            "sanity check");
2113   } else {
2114     n_workers = 1;
2115     g1_par_count_task.work(0);
2116   }
2117 
2118   if (VerifyDuringGC) {
2119     // Verify that the counting data accumulated during marking matches
2120     // that calculated by walking the marking bitmap.
2121     
2122     // Bitmaps to hold expected values
2123     BitMap expected_region_bm(_region_bm.size(), false);
2124     BitMap expected_card_bm(_card_bm.size(), false);
2125 
2126     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2127                                                  &_region_bm,
2128                                                  &_card_bm,
2129                                                  &expected_region_bm,
2130                                                  &expected_card_bm);
2131     
2132     if (G1CollectedHeap::use_parallel_gc_threads()) {
2133       g1h->set_par_threads((int)n_workers);
2134       g1h->workers()->run_task(&g1_par_verify_task);
2135       // Done with the parallel phase so reset to 0.
2136       g1h->set_par_threads(0);
2137       
2138       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2139              "sanity check");
2140     } else {
2141       g1_par_verify_task.work(0);
2142     }
2143 
2144     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2145   }
2146 
2147   size_t known_garbage_bytes =
2148     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
2149   g1p->set_known_garbage_bytes(known_garbage_bytes);
2150 
2151   size_t start_used_bytes = g1h->used();
2152   _at_least_one_mark_complete = true;
2153   g1h->set_marking_complete();
2154 
2155   ergo_verbose4(ErgoConcCycles,
2156            "finish cleanup",
2157            ergo_format_byte("occupancy")
2158            ergo_format_byte("capacity")
2159            ergo_format_byte_perc("known garbage"),
2160            start_used_bytes, g1h->capacity(),
2161            known_garbage_bytes,
2162            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
2163 
2164   double count_end = os::elapsedTime();
2165   double this_final_counting_time = (count_end - start);
2166   if (G1PrintParCleanupStats) {
2167     gclog_or_tty->print_cr("Cleanup:");
2168     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
2169                            this_final_counting_time*1000.0);
2170   }
2171   _total_counting_time += this_final_counting_time;
2172 
2173   if (G1PrintRegionLivenessInfo) {
2174     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2175     _g1h->heap_region_iterate(&cl);
2176   }
2177 
2178   // Install newly created mark bitMap as "prev".
2179   swapMarkBitMaps();
2180 
2181   g1h->reset_gc_time_stamp();
2182 
2183   // Note end of marking in all heap regions.
2184   double note_end_start = os::elapsedTime();
2185   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2186   if (G1CollectedHeap::use_parallel_gc_threads()) {
2187     g1h->set_par_threads((int)n_workers);
2188     g1h->workers()->run_task(&g1_par_note_end_task);
2189     g1h->set_par_threads(0);
2190 
2191     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2192            "sanity check");
2193   } else {
2194     g1_par_note_end_task.work(0);
2195   }
2196 
2197   if (!cleanup_list_is_empty()) {
2198     // The cleanup list is not empty, so we'll have to process it
2199     // concurrently. Notify anyone else that might be wanting free
2200     // regions that there will be more free regions coming soon.
2201     g1h->set_free_regions_coming();
2202   }
2203   double note_end_end = os::elapsedTime();
2204   if (G1PrintParCleanupStats) {
2205     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
2206                            (note_end_end - note_end_start)*1000.0);
2207   }
2208 
2209   // call below, since it affects the metric by which we sort the heap
2210   // regions.
2211   if (G1ScrubRemSets) {
2212     double rs_scrub_start = os::elapsedTime();
2213     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2214     if (G1CollectedHeap::use_parallel_gc_threads()) {
2215       g1h->set_par_threads((int)n_workers);
2216       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2217       g1h->set_par_threads(0);
2218 
2219       assert(g1h->check_heap_region_claim_values(
2220                                             HeapRegion::ScrubRemSetClaimValue),
2221              "sanity check");
2222     } else {
2223       g1_par_scrub_rs_task.work(0);
2224     }
2225 
2226     double rs_scrub_end = os::elapsedTime();
2227     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2228     _total_rs_scrub_time += this_rs_scrub_time;
2229   }
2230 
2231   // this will also free any regions totally full of garbage objects,
2232   // and sort the regions.
2233   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2234 
2235   // Statistics.
2236   double end = os::elapsedTime();
2237   _cleanup_times.add((end - start) * 1000.0);
2238 
2239   // G1CollectedHeap::heap()->print();
2240   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
2241   // G1CollectedHeap::heap()->get_gc_time_stamp());
2242 
2243   if (PrintGC || PrintGCDetails) {
2244     g1h->print_size_transition(gclog_or_tty,
2245                                start_used_bytes,
2246                                g1h->used(),
2247                                g1h->capacity());
2248   }
2249 
2250   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
2251   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
2252 
2253   // Clean up will have freed any regions completely full of garbage.
2254   // Update the soft reference policy with the new heap occupancy.
2255   Universe::update_heap_info_at_gc();
2256 
2257   // We need to make this be a "collection" so any collection pause that
2258   // races with it goes around and waits for completeCleanup to finish.
2259   g1h->increment_total_collections();
2260 
2261   if (VerifyDuringGC) {
2262     HandleMark hm;  // handle scope
2263     gclog_or_tty->print(" VerifyDuringGC:(after)");
2264     Universe::heap()->prepare_for_verify();
2265     Universe::verify(/* allow dirty */ true,
2266                      /* silent      */ false,
2267                      /* option      */ VerifyOption_G1UsePrevMarking);
2268   }
2269 
2270   g1h->verify_region_sets_optional();
2271 }
2272 
2273 void ConcurrentMark::completeCleanup() {
2274   if (has_aborted()) return;
2275 
2276   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2277 
2278   _cleanup_list.verify_optional();
2279   FreeRegionList tmp_free_list("Tmp Free List");
2280 
2281   if (G1ConcRegionFreeingVerbose) {
2282     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2283                            "cleanup list has "SIZE_FORMAT" entries",
2284                            _cleanup_list.length());
2285   }
2286 
2287   // Noone else should be accessing the _cleanup_list at this point,
2288   // so it's not necessary to take any locks
2289   while (!_cleanup_list.is_empty()) {
2290     HeapRegion* hr = _cleanup_list.remove_head();
2291     assert(hr != NULL, "the list was not empty");
2292     hr->par_clear();
2293     tmp_free_list.add_as_tail(hr);
2294 
2295     // Instead of adding one region at a time to the secondary_free_list,
2296     // we accumulate them in the local list and move them a few at a
2297     // time. This also cuts down on the number of notify_all() calls
2298     // we do during this process. We'll also append the local list when
2299     // _cleanup_list is empty (which means we just removed the last
2300     // region from the _cleanup_list).
2301     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2302         _cleanup_list.is_empty()) {
2303       if (G1ConcRegionFreeingVerbose) {
2304         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2305                                "appending "SIZE_FORMAT" entries to the "
2306                                "secondary_free_list, clean list still has "
2307                                SIZE_FORMAT" entries",
2308                                tmp_free_list.length(),
2309                                _cleanup_list.length());
2310       }
2311 
2312       {
2313         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2314         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2315         SecondaryFreeList_lock->notify_all();
2316       }
2317 
2318       if (G1StressConcRegionFreeing) {
2319         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2320           os::sleep(Thread::current(), (jlong) 1, false);
2321         }
2322       }
2323     }
2324   }
2325   assert(tmp_free_list.is_empty(), "post-condition");
2326 }
2327 
2328 // Support closures for reference procssing in G1
2329 
2330 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2331   HeapWord* addr = (HeapWord*)obj;
2332   return addr != NULL &&
2333          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2334 }
2335 
2336 class G1CMKeepAliveClosure: public OopClosure {
2337   G1CollectedHeap* _g1;
2338   ConcurrentMark*  _cm;
2339  public:
2340   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2341     _g1(g1), _cm(cm)
2342   {
2343     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2344   }
2345 
2346   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2347   virtual void do_oop(      oop* p) { do_oop_work(p); }
2348 
2349   template <class T> void do_oop_work(T* p) {
2350     oop obj = oopDesc::load_decode_heap_oop(p);
2351     HeapWord* addr = (HeapWord*)obj;
2352 
2353     if (_cm->verbose_high()) {
2354       gclog_or_tty->print_cr("\t[0] we're looking at location "
2355                              "*"PTR_FORMAT" = "PTR_FORMAT,
2356                              p, (void*) obj);
2357     }
2358 
2359     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2360       _cm->mark_and_count(obj);
2361       _cm->mark_stack_push(obj);
2362     }
2363   }
2364 };
2365 
2366 class G1CMDrainMarkingStackClosure: public VoidClosure {
2367   ConcurrentMark*               _cm;
2368   CMMarkStack*                  _markStack;
2369   G1CMKeepAliveClosure*         _oopClosure;
2370  public:
2371   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2372                                G1CMKeepAliveClosure* oopClosure) :
2373     _cm(cm),
2374     _markStack(markStack),
2375     _oopClosure(oopClosure)
2376   {}
2377 
2378   void do_void() {
2379     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2380   }
2381 };
2382 
2383 // 'Keep Alive' closure used by parallel reference processing.
2384 // An instance of this closure is used in the parallel reference processing
2385 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2386 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2387 // placed on to discovered ref lists once so we can mark and push with no
2388 // need to check whether the object has already been marked. Using the
2389 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2390 // operating on the global mark stack. This means that an individual
2391 // worker would be doing lock-free pushes while it processes its own
2392 // discovered ref list followed by drain call. If the discovered ref lists
2393 // are unbalanced then this could cause interference with the other
2394 // workers. Using a CMTask (and its embedded local data structures)
2395 // avoids that potential interference.
2396 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2397   ConcurrentMark*  _cm;
2398   CMTask*          _task;
2399   int              _ref_counter_limit;
2400   int              _ref_counter;
2401  public:
2402   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2403     _cm(cm), _task(task),
2404     _ref_counter_limit(G1RefProcDrainInterval) {
2405     assert(_ref_counter_limit > 0, "sanity");
2406     _ref_counter = _ref_counter_limit;
2407   }
2408 
2409   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2410   virtual void do_oop(      oop* p) { do_oop_work(p); }
2411 
2412   template <class T> void do_oop_work(T* p) {
2413     if (!_cm->has_overflown()) {
2414       oop obj = oopDesc::load_decode_heap_oop(p);
2415       if (_cm->verbose_high()) {
2416         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2417                                "*"PTR_FORMAT" = "PTR_FORMAT,
2418                                _task->task_id(), p, (void*) obj);
2419       }
2420 
2421       _task->deal_with_reference(obj);
2422       _ref_counter--;
2423 
2424       if (_ref_counter == 0) {
2425         // We have dealt with _ref_counter_limit references, pushing them and objects
2426         // reachable from them on to the local stack (and possibly the global stack).
2427         // Call do_marking_step() to process these entries. We call the routine in a
2428         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2429         // with the entries that we've pushed as a result of the deal_with_reference
2430         // calls above) or we overflow.
2431         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2432         // while there may still be some work to do. (See the comment at the
2433         // beginning of CMTask::do_marking_step() for those conditions - one of which
2434         // is reaching the specified time target.) It is only when
2435         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2436         // that the marking has completed.
2437         do {
2438           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2439           _task->do_marking_step(mark_step_duration_ms,
2440                                  false /* do_stealing    */,
2441                                  false /* do_termination */);
2442         } while (_task->has_aborted() && !_cm->has_overflown());
2443         _ref_counter = _ref_counter_limit;
2444       }
2445     } else {
2446       if (_cm->verbose_high()) {
2447          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2448       }
2449     }
2450   }
2451 };
2452 
2453 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2454   ConcurrentMark* _cm;
2455   CMTask* _task;
2456  public:
2457   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2458     _cm(cm), _task(task)
2459   {}
2460 
2461   void do_void() {
2462     do {
2463       if (_cm->verbose_high()) {
2464         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2465                                _task->task_id());
2466       }
2467 
2468       // We call CMTask::do_marking_step() to completely drain the local and
2469       // global marking stacks. The routine is called in a loop, which we'll
2470       // exit if there's nothing more to do (i.e. we'completely drained the
2471       // entries that were pushed as a result of applying the
2472       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2473       // lists above) or we overflow the global marking stack.
2474       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2475       // while there may still be some work to do. (See the comment at the
2476       // beginning of CMTask::do_marking_step() for those conditions - one of which
2477       // is reaching the specified time target.) It is only when
2478       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2479       // that the marking has completed.
2480 
2481       _task->do_marking_step(1000000000.0 /* something very large */,
2482                              true /* do_stealing    */,
2483                              true /* do_termination */);
2484     } while (_task->has_aborted() && !_cm->has_overflown());
2485   }
2486 };
2487 
2488 // Implementation of AbstractRefProcTaskExecutor for parallel
2489 // reference processing at the end of G1 concurrent marking
2490 
2491 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2492 private:
2493   G1CollectedHeap* _g1h;
2494   ConcurrentMark*  _cm;
2495   WorkGang*        _workers;
2496   int              _active_workers;
2497 
2498 public:
2499   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2500                         ConcurrentMark* cm,
2501                         WorkGang* workers,
2502                         int n_workers) :
2503     _g1h(g1h), _cm(cm),
2504     _workers(workers), _active_workers(n_workers) { }
2505 
2506   // Executes the given task using concurrent marking worker threads.
2507   virtual void execute(ProcessTask& task);
2508   virtual void execute(EnqueueTask& task);
2509 };
2510 
2511 class G1CMRefProcTaskProxy: public AbstractGangTask {
2512   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2513   ProcessTask&     _proc_task;
2514   G1CollectedHeap* _g1h;
2515   ConcurrentMark*  _cm;
2516 
2517 public:
2518   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2519                      G1CollectedHeap* g1h,
2520                      ConcurrentMark* cm) :
2521     AbstractGangTask("Process reference objects in parallel"),
2522     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2523 
2524   virtual void work(int i) {
2525     CMTask* marking_task = _cm->task(i);
2526     G1CMIsAliveClosure g1_is_alive(_g1h);
2527     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2528     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2529 
2530     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2531   }
2532 };
2533 
2534 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2535   assert(_workers != NULL, "Need parallel worker threads.");
2536 
2537   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2538 
2539   // We need to reset the phase for each task execution so that
2540   // the termination protocol of CMTask::do_marking_step works.
2541   _cm->set_phase(_active_workers, false /* concurrent */);
2542   _g1h->set_par_threads(_active_workers);
2543   _workers->run_task(&proc_task_proxy);
2544   _g1h->set_par_threads(0);
2545 }
2546 
2547 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2548   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2549   EnqueueTask& _enq_task;
2550 
2551 public:
2552   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2553     AbstractGangTask("Enqueue reference objects in parallel"),
2554     _enq_task(enq_task) { }
2555 
2556   virtual void work(int i) {
2557     _enq_task.work(i);
2558   }
2559 };
2560 
2561 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2562   assert(_workers != NULL, "Need parallel worker threads.");
2563 
2564   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2565 
2566   _g1h->set_par_threads(_active_workers);
2567   _workers->run_task(&enq_task_proxy);
2568   _g1h->set_par_threads(0);
2569 }
2570 
2571 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2572   ResourceMark rm;
2573   HandleMark   hm;
2574 
2575   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2576 
2577   // Is alive closure.
2578   G1CMIsAliveClosure g1_is_alive(g1h);
2579 
2580   // Inner scope to exclude the cleaning of the string and symbol
2581   // tables from the displayed time.
2582   {
2583     bool verbose = PrintGC && PrintGCDetails;
2584     if (verbose) {
2585       gclog_or_tty->put(' ');
2586     }
2587     TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2588 
2589     ReferenceProcessor* rp = g1h->ref_processor_cm();
2590 
2591     // See the comment in G1CollectedHeap::ref_processing_init()
2592     // about how reference processing currently works in G1.
2593 
2594     // Process weak references.
2595     rp->setup_policy(clear_all_soft_refs);
2596     assert(_markStack.isEmpty(), "mark stack should be empty");
2597 
2598     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2599     G1CMDrainMarkingStackClosure
2600       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2601 
2602     // We use the work gang from the G1CollectedHeap and we utilize all
2603     // the worker threads.
2604     int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2605     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2606 
2607     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2608                                               g1h->workers(), active_workers);
2609 
2610     if (rp->processing_is_mt()) {
2611       // Set the degree of MT here.  If the discovery is done MT, there
2612       // may have been a different number of threads doing the discovery
2613       // and a different number of discovered lists may have Ref objects.
2614       // That is OK as long as the Reference lists are balanced (see
2615       // balance_all_queues() and balance_queues()).
2616       rp->set_active_mt_degree(active_workers);
2617 
2618       rp->process_discovered_references(&g1_is_alive,
2619                                       &g1_keep_alive,
2620                                       &g1_drain_mark_stack,
2621                                       &par_task_executor);
2622 
2623       // The work routines of the parallel keep_alive and drain_marking_stack
2624       // will set the has_overflown flag if we overflow the global marking
2625       // stack.
2626     } else {
2627       rp->process_discovered_references(&g1_is_alive,
2628                                         &g1_keep_alive,
2629                                         &g1_drain_mark_stack,
2630                                         NULL);
2631     }
2632 
2633     assert(_markStack.overflow() || _markStack.isEmpty(),
2634             "mark stack should be empty (unless it overflowed)");
2635     if (_markStack.overflow()) {
2636       // Should have been done already when we tried to push an
2637       // entry on to the global mark stack. But let's do it again.
2638       set_has_overflown();
2639     }
2640 
2641     if (rp->processing_is_mt()) {
2642       assert(rp->num_q() == active_workers, "why not");
2643       rp->enqueue_discovered_references(&par_task_executor);
2644     } else {
2645       rp->enqueue_discovered_references();
2646     }
2647 
2648     rp->verify_no_references_recorded();
2649     assert(!rp->discovery_enabled(), "Post condition");
2650   }
2651 
2652   // Now clean up stale oops in StringTable
2653   StringTable::unlink(&g1_is_alive);
2654   // Clean up unreferenced symbols in symbol table.
2655   SymbolTable::unlink();
2656 }
2657 
2658 void ConcurrentMark::swapMarkBitMaps() {
2659   CMBitMapRO* temp = _prevMarkBitMap;
2660   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2661   _nextMarkBitMap  = (CMBitMap*)  temp;
2662 }
2663 
2664 class CMRemarkTask: public AbstractGangTask {
2665 private:
2666   ConcurrentMark *_cm;
2667 
2668 public:
2669   void work(int worker_i) {
2670     // Since all available tasks are actually started, we should
2671     // only proceed if we're supposed to be actived.
2672     if ((size_t)worker_i < _cm->active_tasks()) {
2673       CMTask* task = _cm->task(worker_i);
2674       task->record_start_time();
2675       do {
2676         task->do_marking_step(1000000000.0 /* something very large */,
2677                               true /* do_stealing    */,
2678                               true /* do_termination */);
2679       } while (task->has_aborted() && !_cm->has_overflown());
2680       // If we overflow, then we do not want to restart. We instead
2681       // want to abort remark and do concurrent marking again.
2682       task->record_end_time();
2683     }
2684   }
2685 
2686   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2687     AbstractGangTask("Par Remark"), _cm(cm) {
2688     _cm->terminator()->reset_for_reuse(active_workers);
2689   }
2690 };
2691 
2692 void ConcurrentMark::checkpointRootsFinalWork() {
2693   ResourceMark rm;
2694   HandleMark   hm;
2695   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2696 
2697   g1h->ensure_parsability(false);
2698 
2699   if (G1CollectedHeap::use_parallel_gc_threads()) {
2700     G1CollectedHeap::StrongRootsScope srs(g1h);
2701     // this is remark, so we'll use up all active threads
2702     int active_workers = g1h->workers()->active_workers();
2703     if (active_workers == 0) {
2704       assert(active_workers > 0, "Should have been set earlier");
2705       active_workers = ParallelGCThreads;
2706       g1h->workers()->set_active_workers(active_workers);
2707     }
2708     set_phase(active_workers, false /* concurrent */);
2709     // Leave _parallel_marking_threads at it's
2710     // value originally calculated in the ConcurrentMark
2711     // constructor and pass values of the active workers
2712     // through the gang in the task.
2713 
2714     CMRemarkTask remarkTask(this, active_workers);
2715     g1h->set_par_threads(active_workers);
2716     g1h->workers()->run_task(&remarkTask);
2717     g1h->set_par_threads(0);
2718   } else {
2719     G1CollectedHeap::StrongRootsScope srs(g1h);
2720     // this is remark, so we'll use up all available threads
2721     int active_workers = 1;
2722     set_phase(active_workers, false /* concurrent */);
2723 
2724     CMRemarkTask remarkTask(this, active_workers);
2725     // We will start all available threads, even if we decide that the
2726     // active_workers will be fewer. The extra ones will just bail out
2727     // immediately.
2728     remarkTask.work(0);
2729   }
2730   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2731   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2732 
2733   print_stats();
2734 
2735 #if VERIFY_OBJS_PROCESSED
2736   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2737     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2738                            _scan_obj_cl.objs_processed,
2739                            ThreadLocalObjQueue::objs_enqueued);
2740     guarantee(_scan_obj_cl.objs_processed ==
2741               ThreadLocalObjQueue::objs_enqueued,
2742               "Different number of objs processed and enqueued.");
2743   }
2744 #endif
2745 }
2746 
2747 #ifndef PRODUCT
2748 
2749 class PrintReachableOopClosure: public OopClosure {
2750 private:
2751   G1CollectedHeap* _g1h;
2752   outputStream*    _out;
2753   VerifyOption     _vo;
2754   bool             _all;
2755 
2756 public:
2757   PrintReachableOopClosure(outputStream* out,
2758                            VerifyOption  vo,
2759                            bool          all) :
2760     _g1h(G1CollectedHeap::heap()),
2761     _out(out), _vo(vo), _all(all) { }
2762 
2763   void do_oop(narrowOop* p) { do_oop_work(p); }
2764   void do_oop(      oop* p) { do_oop_work(p); }
2765 
2766   template <class T> void do_oop_work(T* p) {
2767     oop         obj = oopDesc::load_decode_heap_oop(p);
2768     const char* str = NULL;
2769     const char* str2 = "";
2770 
2771     if (obj == NULL) {
2772       str = "";
2773     } else if (!_g1h->is_in_g1_reserved(obj)) {
2774       str = " O";
2775     } else {
2776       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2777       guarantee(hr != NULL, "invariant");
2778       bool over_tams = false;
2779       bool marked = false;
2780 
2781       switch (_vo) {
2782         case VerifyOption_G1UsePrevMarking:
2783           over_tams = hr->obj_allocated_since_prev_marking(obj);
2784           marked = _g1h->isMarkedPrev(obj);
2785           break;
2786         case VerifyOption_G1UseNextMarking:
2787           over_tams = hr->obj_allocated_since_next_marking(obj);
2788           marked = _g1h->isMarkedNext(obj);
2789           break;
2790         case VerifyOption_G1UseMarkWord:
2791           marked = obj->is_gc_marked();
2792           break;
2793         default:
2794           ShouldNotReachHere();
2795       }
2796 
2797       if (over_tams) {
2798         str = " >";
2799         if (marked) {
2800           str2 = " AND MARKED";
2801         }
2802       } else if (marked) {
2803         str = " M";
2804       } else {
2805         str = " NOT";
2806       }
2807     }
2808 
2809     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2810                    p, (void*) obj, str, str2);
2811   }
2812 };
2813 
2814 class PrintReachableObjectClosure : public ObjectClosure {
2815 private:
2816   G1CollectedHeap* _g1h;
2817   outputStream*    _out;
2818   VerifyOption     _vo;
2819   bool             _all;
2820   HeapRegion*      _hr;
2821 
2822 public:
2823   PrintReachableObjectClosure(outputStream* out,
2824                               VerifyOption  vo,
2825                               bool          all,
2826                               HeapRegion*   hr) :
2827     _g1h(G1CollectedHeap::heap()),
2828     _out(out), _vo(vo), _all(all), _hr(hr) { }
2829 
2830   void do_object(oop o) {
2831     bool over_tams = false;
2832     bool marked = false;
2833 
2834     switch (_vo) {
2835       case VerifyOption_G1UsePrevMarking:
2836         over_tams = _hr->obj_allocated_since_prev_marking(o);
2837         marked = _g1h->isMarkedPrev(o);
2838         break;
2839       case VerifyOption_G1UseNextMarking:
2840         over_tams = _hr->obj_allocated_since_next_marking(o);
2841         marked = _g1h->isMarkedNext(o);
2842         break;
2843       case VerifyOption_G1UseMarkWord:
2844         marked = o->is_gc_marked();
2845         break;
2846       default:
2847         ShouldNotReachHere();
2848     }
2849     bool print_it = _all || over_tams || marked;
2850 
2851     if (print_it) {
2852       _out->print_cr(" "PTR_FORMAT"%s",
2853                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2854       PrintReachableOopClosure oopCl(_out, _vo, _all);
2855       o->oop_iterate(&oopCl);
2856     }
2857   }
2858 };
2859 
2860 class PrintReachableRegionClosure : public HeapRegionClosure {
2861 private:
2862   outputStream* _out;
2863   VerifyOption  _vo;
2864   bool          _all;
2865 
2866 public:
2867   bool doHeapRegion(HeapRegion* hr) {
2868     HeapWord* b = hr->bottom();
2869     HeapWord* e = hr->end();
2870     HeapWord* t = hr->top();
2871     HeapWord* p = NULL;
2872 
2873     switch (_vo) {
2874       case VerifyOption_G1UsePrevMarking:
2875         p = hr->prev_top_at_mark_start();
2876         break;
2877       case VerifyOption_G1UseNextMarking:
2878         p = hr->next_top_at_mark_start();
2879         break;
2880       case VerifyOption_G1UseMarkWord:
2881         // When we are verifying marking using the mark word
2882         // TAMS has no relevance.
2883         assert(p == NULL, "post-condition");
2884         break;
2885       default:
2886         ShouldNotReachHere();
2887     }
2888     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2889                    "TAMS: "PTR_FORMAT, b, e, t, p);
2890     _out->cr();
2891 
2892     HeapWord* from = b;
2893     HeapWord* to   = t;
2894 
2895     if (to > from) {
2896       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2897       _out->cr();
2898       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2899       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2900       _out->cr();
2901     }
2902 
2903     return false;
2904   }
2905 
2906   PrintReachableRegionClosure(outputStream* out,
2907                               VerifyOption  vo,
2908                               bool          all) :
2909     _out(out), _vo(vo), _all(all) { }
2910 };
2911 
2912 static const char* verify_option_to_tams(VerifyOption vo) {
2913   switch (vo) {
2914     case VerifyOption_G1UsePrevMarking:
2915       return "PTAMS";
2916     case VerifyOption_G1UseNextMarking:
2917       return "NTAMS";
2918     default:
2919       return "NONE";
2920   }
2921 }
2922 
2923 void ConcurrentMark::print_reachable(const char* str,
2924                                      VerifyOption vo,
2925                                      bool all) {
2926   gclog_or_tty->cr();
2927   gclog_or_tty->print_cr("== Doing heap dump... ");
2928 
2929   if (G1PrintReachableBaseFile == NULL) {
2930     gclog_or_tty->print_cr("  #### error: no base file defined");
2931     return;
2932   }
2933 
2934   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2935       (JVM_MAXPATHLEN - 1)) {
2936     gclog_or_tty->print_cr("  #### error: file name too long");
2937     return;
2938   }
2939 
2940   char file_name[JVM_MAXPATHLEN];
2941   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2942   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2943 
2944   fileStream fout(file_name);
2945   if (!fout.is_open()) {
2946     gclog_or_tty->print_cr("  #### error: could not open file");
2947     return;
2948   }
2949 
2950   outputStream* out = &fout;
2951   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2952   out->cr();
2953 
2954   out->print_cr("--- ITERATING OVER REGIONS");
2955   out->cr();
2956   PrintReachableRegionClosure rcl(out, vo, all);
2957   _g1h->heap_region_iterate(&rcl);
2958   out->cr();
2959 
2960   gclog_or_tty->print_cr("  done");
2961   gclog_or_tty->flush();
2962 }
2963 
2964 #endif // PRODUCT
2965 
2966 // This note is for drainAllSATBBuffers and the code in between.
2967 // In the future we could reuse a task to do this work during an
2968 // evacuation pause (since now tasks are not active and can be claimed
2969 // during an evacuation pause). This was a late change to the code and
2970 // is currently not being taken advantage of.
2971 
2972 class CMGlobalObjectClosure : public ObjectClosure {
2973 private:
2974   ConcurrentMark* _cm;
2975 
2976 public:
2977   void do_object(oop obj) {
2978     _cm->deal_with_reference(obj, 0);
2979   }
2980 
2981   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2982 };
2983 
2984 void ConcurrentMark::deal_with_reference(oop obj, int worker_i) {
2985   if (verbose_high()) {
2986     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2987                            (void*) obj);
2988   }
2989 
2990   HeapWord* objAddr = (HeapWord*) obj;
2991   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2992   if (_g1h->is_in_g1_reserved(objAddr)) {
2993     assert(obj != NULL, "null check is implicit");
2994     if (!_nextMarkBitMap->isMarked(objAddr)) {
2995       // Only get the containing region if the object is not marked on the
2996       // bitmap (otherwise, it's a waste of time since we won't do
2997       // anything with it).
2998       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2999       if (!hr->obj_allocated_since_next_marking(obj)) {
3000         if (verbose_high()) {
3001           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
3002                                  "marked", (void*) obj);
3003         }
3004 
3005         // we need to mark it first
3006         if (par_mark_and_count(obj, hr, worker_i)) {
3007           // No OrderAccess:store_load() is needed. It is implicit in the
3008           // CAS done in the call to CMBitMap::parMark() in the above
3009           // routine.
3010           HeapWord* finger = _finger;
3011           if (objAddr < finger) {
3012             if (verbose_high()) {
3013               gclog_or_tty->print_cr("[global] below the global finger "
3014                                      "("PTR_FORMAT"), pushing it", finger);
3015             }
3016             if (!mark_stack_push(obj)) {
3017               if (verbose_low()) {
3018                 gclog_or_tty->print_cr("[global] global stack overflow during "
3019                                        "deal_with_reference");
3020               }
3021             }
3022           }
3023         }
3024       }
3025     }
3026   }
3027 }
3028 
3029 void ConcurrentMark::drainAllSATBBuffers() {
3030   CMGlobalObjectClosure oc(this);
3031   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3032   satb_mq_set.set_closure(&oc);
3033 
3034   while (satb_mq_set.apply_closure_to_completed_buffer()) {
3035     if (verbose_medium()) {
3036       gclog_or_tty->print_cr("[global] processed an SATB buffer");
3037     }
3038   }
3039 
3040   // no need to check whether we should do this, as this is only
3041   // called during an evacuation pause
3042   satb_mq_set.iterate_closure_all_threads();
3043 
3044   satb_mq_set.set_closure(NULL);
3045   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
3046 }
3047 
3048 void ConcurrentMark::markPrev(oop p) {
3049   // Note we are overriding the read-only view of the prev map here, via
3050   // the cast.
3051   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
3052 }
3053 
3054 void ConcurrentMark::clear_mark(oop p) {
3055   assert(p != NULL && p->is_oop(), "expected an oop");
3056   HeapWord* addr = (HeapWord*)p;
3057   assert(addr >= _nextMarkBitMap->startWord() ||
3058          addr < _nextMarkBitMap->endWord(), "in a region");
3059 
3060   _nextMarkBitMap->clear(addr);
3061 }
3062 
3063 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
3064   // Note we are overriding the read-only view of the prev map here, via
3065   // the cast.
3066   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
3067   _nextMarkBitMap->clearRange(mr);
3068 }
3069 
3070 HeapRegion*
3071 ConcurrentMark::claim_region(int task_num) {
3072   // "checkpoint" the finger
3073   HeapWord* finger = _finger;
3074 
3075   // _heap_end will not change underneath our feet; it only changes at
3076   // yield points.
3077   while (finger < _heap_end) {
3078     assert(_g1h->is_in_g1_reserved(finger), "invariant");
3079 
3080     // Note on how this code handles humongous regions. In the
3081     // normal case the finger will reach the start of a "starts
3082     // humongous" (SH) region. Its end will either be the end of the
3083     // last "continues humongous" (CH) region in the sequence, or the
3084     // standard end of the SH region (if the SH is the only region in
3085     // the sequence). That way claim_region() will skip over the CH
3086     // regions. However, there is a subtle race between a CM thread
3087     // executing this method and a mutator thread doing a humongous
3088     // object allocation. The two are not mutually exclusive as the CM
3089     // thread does not need to hold the Heap_lock when it gets
3090     // here. So there is a chance that claim_region() will come across
3091     // a free region that's in the progress of becoming a SH or a CH
3092     // region. In the former case, it will either
3093     //   a) Miss the update to the region's end, in which case it will
3094     //      visit every subsequent CH region, will find their bitmaps
3095     //      empty, and do nothing, or
3096     //   b) Will observe the update of the region's end (in which case
3097     //      it will skip the subsequent CH regions).
3098     // If it comes across a region that suddenly becomes CH, the
3099     // scenario will be similar to b). So, the race between
3100     // claim_region() and a humongous object allocation might force us
3101     // to do a bit of unnecessary work (due to some unnecessary bitmap
3102     // iterations) but it should not introduce and correctness issues.
3103     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
3104     HeapWord*   bottom        = curr_region->bottom();
3105     HeapWord*   end           = curr_region->end();
3106     HeapWord*   limit         = curr_region->next_top_at_mark_start();
3107 
3108     if (verbose_low()) {
3109       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
3110                              "["PTR_FORMAT", "PTR_FORMAT"), "
3111                              "limit = "PTR_FORMAT,
3112                              task_num, curr_region, bottom, end, limit);
3113     }
3114 
3115     // Is the gap between reading the finger and doing the CAS too long?
3116     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
3117     if (res == finger) {
3118       // we succeeded
3119 
3120       // notice that _finger == end cannot be guaranteed here since,
3121       // someone else might have moved the finger even further
3122       assert(_finger >= end, "the finger should have moved forward");
3123 
3124       if (verbose_low()) {
3125         gclog_or_tty->print_cr("[%d] we were successful with region = "
3126                                PTR_FORMAT, task_num, curr_region);
3127       }
3128 
3129       if (limit > bottom) {
3130         if (verbose_low()) {
3131           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
3132                                  "returning it ", task_num, curr_region);
3133         }
3134         return curr_region;
3135       } else {
3136         assert(limit == bottom,
3137                "the region limit should be at bottom");
3138         if (verbose_low()) {
3139           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
3140                                  "returning NULL", task_num, curr_region);
3141         }
3142         // we return NULL and the caller should try calling
3143         // claim_region() again.
3144         return NULL;
3145       }
3146     } else {
3147       assert(_finger > finger, "the finger should have moved forward");
3148       if (verbose_low()) {
3149         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
3150                                "global finger = "PTR_FORMAT", "
3151                                "our finger = "PTR_FORMAT,
3152                                task_num, _finger, finger);
3153       }
3154 
3155       // read it again
3156       finger = _finger;
3157     }
3158   }
3159 
3160   return NULL;
3161 }
3162 
3163 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
3164   bool result = false;
3165   for (int i = 0; i < (int)_max_task_num; ++i) {
3166     CMTask* the_task = _tasks[i];
3167     MemRegion mr = the_task->aborted_region();
3168     if (mr.start() != NULL) {
3169       assert(mr.end() != NULL, "invariant");
3170       assert(mr.word_size() > 0, "invariant");
3171       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
3172       assert(hr != NULL, "invariant");
3173       if (hr->in_collection_set()) {
3174         // The region points into the collection set
3175         the_task->set_aborted_region(MemRegion());
3176         result = true;
3177       }
3178     }
3179   }
3180   return result;
3181 }
3182 
3183 bool ConcurrentMark::has_aborted_regions() {
3184   for (int i = 0; i < (int)_max_task_num; ++i) {
3185     CMTask* the_task = _tasks[i];
3186     MemRegion mr = the_task->aborted_region();
3187     if (mr.start() != NULL) {
3188       assert(mr.end() != NULL, "invariant");
3189       assert(mr.word_size() > 0, "invariant");
3190       return true;
3191     }
3192   }
3193   return false;
3194 }
3195 
3196 void ConcurrentMark::oops_do(OopClosure* cl) {
3197   if (_markStack.size() > 0 && verbose_low()) {
3198     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
3199                            "size = %d", _markStack.size());
3200   }
3201   // we first iterate over the contents of the mark stack...
3202   _markStack.oops_do(cl);
3203 
3204   for (int i = 0; i < (int)_max_task_num; ++i) {
3205     OopTaskQueue* queue = _task_queues->queue((int)i);
3206 
3207     if (queue->size() > 0 && verbose_low()) {
3208       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
3209                              "size = %d", i, queue->size());
3210     }
3211 
3212     // ...then over the contents of the all the task queues.
3213     queue->oops_do(cl);
3214   }
3215 
3216   // Invalidate any entries, that are in the region stack, that
3217   // point into the collection set
3218   if (_regionStack.invalidate_entries_into_cset()) {
3219     // otherwise, any gray objects copied during the evacuation pause
3220     // might not be visited.
3221     assert(_should_gray_objects, "invariant");
3222   }
3223 
3224   // Invalidate any aborted regions, recorded in the individual CM
3225   // tasks, that point into the collection set.
3226   if (invalidate_aborted_regions_in_cset()) {
3227     // otherwise, any gray objects copied during the evacuation pause
3228     // might not be visited.
3229     assert(_should_gray_objects, "invariant");
3230   }
3231 
3232 }
3233 
3234 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
3235   _markStack.setEmpty();
3236   _markStack.clear_overflow();
3237   _regionStack.setEmpty();
3238   _regionStack.clear_overflow();
3239   if (clear_overflow) {
3240     clear_has_overflown();
3241   } else {
3242     assert(has_overflown(), "pre-condition");
3243   }
3244   _finger = _heap_start;
3245 
3246   for (int i = 0; i < (int)_max_task_num; ++i) {
3247     OopTaskQueue* queue = _task_queues->queue(i);
3248     queue->set_empty();
3249     // Clear any partial regions from the CMTasks
3250     _tasks[i]->clear_aborted_region();
3251   }
3252 }
3253 
3254 // Aggregate the counting data that was constructed concurrently
3255 // with marking.
3256 class AggregateCountDataHRClosure: public HeapRegionClosure {
3257   ConcurrentMark* _cm;
3258   BitMap* _cm_card_bm;
3259   intptr_t _bottom_card_num;
3260   size_t _max_task_num;
3261 
3262  public:
3263   AggregateCountDataHRClosure(ConcurrentMark *cm,
3264                           BitMap* cm_card_bm,
3265                           intptr_t bottom_card_num,
3266                           size_t max_task_num) :
3267     _cm(cm),
3268     _cm_card_bm(cm_card_bm),
3269     _bottom_card_num(bottom_card_num),
3270     _max_task_num(max_task_num)
3271   { }
3272 
3273   bool is_card_aligned(HeapWord* p) {
3274     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
3275   }
3276 
3277   bool doHeapRegion(HeapRegion* hr) {
3278     if (hr->continuesHumongous()) {
3279       // We will ignore these here and process them when their
3280       // associated "starts humongous" region is processed.
3281       // Note that we cannot rely on their associated
3282       // "starts humongous" region to have their bit set to 1
3283       // since, due to the region chunking in the parallel region
3284       // iteration, a "continues humongous" region might be visited
3285       // before its associated "starts humongous".
3286       return false;
3287     }
3288 
3289     HeapWord* start = hr->bottom();
3290     HeapWord* limit = hr->next_top_at_mark_start();
3291     HeapWord* end = hr->end();
3292     
3293     assert(start <= limit && limit <= hr->top() && 
3294            hr->top() <= hr->end(), "Preconditions");
3295 
3296     assert(hr->next_marked_bytes() == 0, "Precondition");
3297 
3298     if (start == limit) {
3299       // NTAMS of this region has not been set so nothing to do.
3300       return false;
3301     }
3302 
3303     intptr_t start_card_num = intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3304     intptr_t limit_card_num = intptr_t(uintptr_t(limit) >> CardTableModRefBS::card_shift);
3305     intptr_t end_card_num   = intptr_t(uintptr_t(end) >> CardTableModRefBS::card_shift);
3306 
3307     assert(is_card_aligned(start), "sanity");
3308     assert(is_card_aligned(end), "sanity");
3309    
3310     // If ntams is not card aligned then we bump the index for
3311     // limit so that we get the card spanning ntams.
3312     if (!is_card_aligned(limit)) {
3313       limit_card_num += 1;
3314     }
3315 
3316     assert(limit_card_num <= end_card_num, "or else use atomics");
3317 
3318     BitMap::idx_t start_idx = start_card_num - _bottom_card_num;
3319     BitMap::idx_t limit_idx = limit_card_num - _bottom_card_num;
3320 
3321     // Aggregate the "stripe" in the count data associated with hr.
3322     size_t hrs_index = hr->hrs_index();
3323     size_t marked_bytes = 0;
3324 
3325     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
3326       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3327       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3328   
3329       // Fetch the marked_bytes in this region for task i and
3330       // add it to the running total for this region.
3331       marked_bytes += marked_bytes_array[hrs_index];
3332 
3333       // Now clear the value in the task's marked bytes array
3334       // for this region.
3335       marked_bytes_array[hrs_index] = 0;
3336   
3337       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
3338       // into the global card bitmap.
3339       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3340 
3341       while (scan_idx < limit_idx) {
3342         assert(task_card_bm->at(scan_idx) == true, "should be");
3343         _cm_card_bm->set_bit(scan_idx);
3344         task_card_bm->clear_bit(scan_idx);
3345         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3346         scan_idx = task_card_bm->get_next_one_offset(start_idx + 1, limit_idx);
3347       }
3348     }
3349 
3350     // Update the marked bytes for this region.
3351     hr->add_to_marked_bytes(marked_bytes);
3352   
3353     // Now set the top at count to NTAMS.
3354     hr->set_top_at_conc_mark_count(limit);
3355 
3356     // Next heap region
3357     return false;
3358   }
3359 };
3360 
3361 class G1AggregateCountDataTask: public AbstractGangTask {
3362 protected:
3363   G1CollectedHeap* _g1h;
3364   ConcurrentMark* _cm;
3365   BitMap* _cm_card_bm;
3366   intptr_t _heap_bottom_card_num;
3367   size_t _max_task_num;
3368   int _active_workers;
3369 
3370 public:
3371   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3372                            ConcurrentMark* cm,
3373                            BitMap* cm_card_bm,
3374                            intptr_t bottom_card_num,
3375                            size_t max_task_num,
3376                            int n_workers) :
3377     AbstractGangTask("Count Aggregation"),
3378     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3379     _heap_bottom_card_num(bottom_card_num),
3380     _max_task_num(max_task_num),
3381     _active_workers(n_workers)
3382   { }
3383 
3384   void work(int worker_i) {
3385     AggregateCountDataHRClosure cl(_cm, _cm_card_bm,
3386                                  _heap_bottom_card_num, _max_task_num);
3387 
3388     if (G1CollectedHeap::use_parallel_gc_threads()) {
3389       _g1h->heap_region_par_iterate_chunked(&cl, worker_i,
3390                                             _active_workers,
3391                                             HeapRegion::AggregateCountClaimValue);
3392     } else {
3393       _g1h->heap_region_iterate(&cl);
3394     }
3395   }
3396 };
3397 
3398 
3399 void ConcurrentMark::aggregate_and_clear_count_data() {
3400   // Clear the global card bitmap
3401   _card_bm.clear();
3402 
3403   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3404                         _g1h->workers()->active_workers() :
3405                         1);
3406 
3407   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3408                                            _heap_bottom_card_num, _max_task_num,
3409                                            n_workers);
3410 
3411   if (G1CollectedHeap::use_parallel_gc_threads()) {
3412     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3413            "sanity check");
3414     _g1h->set_par_threads(n_workers);
3415     _g1h->workers()->run_task(&g1_par_agg_task);
3416     _g1h->set_par_threads(0);
3417 
3418     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3419            "sanity check");
3420     _g1h->reset_heap_region_claim_values();
3421   } else {
3422     g1_par_agg_task.work(0);
3423   }
3424 }
3425 
3426 // Clear the per-worker arrays used to store the per-region counting data
3427 void ConcurrentMark::clear_all_count_data() {
3428   assert(SafepointSynchronize::is_at_safepoint() ||
3429          !Universe::is_fully_initialized(), "must be");
3430 
3431   size_t max_regions = _g1h->max_regions();
3432   
3433   assert(_max_task_num != 0, "unitialized");
3434   assert(_count_card_bitmaps != NULL, "uninitialized");
3435   assert(_count_marked_bytes != NULL, "uninitialized");
3436 
3437   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3438     BitMap* task_card_bm = count_card_bitmap_for(i);
3439     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3440 
3441     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3442     assert(marked_bytes_array != NULL, "uninitialized");
3443 
3444     for (int j = 0; (size_t) j < max_regions; j++) {
3445       marked_bytes_array[j] = 0;
3446     }
3447     task_card_bm->clear();
3448   }
3449 }
3450 
3451 void ConcurrentMark::clear_count_data_for_heap_region(HeapRegion* hr) {
3452   // Clears the count data for the given region from _all_ of
3453   // the per-task counting data structures.
3454 
3455   MemRegion used_region = hr->used_region();
3456   HeapWord* start = used_region.start();
3457   HeapWord* last = used_region.last();
3458   size_t hr_index = hr->hrs_index();
3459 
3460   intptr_t start_card_num =
3461     intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
3462   intptr_t last_card_num =
3463     intptr_t(uintptr_t(last) >> CardTableModRefBS::card_shift);
3464   
3465   BitMap::idx_t start_idx = start_card_num - heap_bottom_card_num();
3466   BitMap::idx_t last_idx = last_card_num - heap_bottom_card_num();
3467 
3468   size_t used_region_bytes = used_region.byte_size();
3469   size_t marked_bytes = 0;
3470 
3471   for (int i=0; (size_t)i < _max_task_num; i += 1) {
3472     BitMap* task_card_bm = count_card_bitmap_for(i);
3473     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3474 
3475     marked_bytes += marked_bytes_array[hr_index];
3476     // clear the amount of marked bytes in the task array for this
3477     // region
3478     marked_bytes_array[hr_index] = 0;
3479     
3480     // Clear the inclusive range [start_idx, last_idx] from the
3481     // card bitmap. The clear_range routine is exclusive so we
3482     // need to also explicitly clear the bit at last_idx.
3483     // Passing last_idx+1 to the clear_range would work in
3484     // most cases but could trip an OOB assertion.
3485 
3486     if ((last_idx - start_idx) > 0) {
3487       task_card_bm->clear_range(start_idx, last_idx);
3488     }
3489     task_card_bm->clear_bit(last_idx);
3490   }
3491 }
3492 
3493 void ConcurrentMark::print_stats() {
3494   if (verbose_stats()) {
3495     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3496     for (size_t i = 0; i < _active_tasks; ++i) {
3497       _tasks[i]->print_stats();
3498       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3499     }
3500   }
3501 }
3502 
3503 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
3504 
3505 class CSetMarkOopClosure: public OopClosure {
3506   friend class CSetMarkBitMapClosure;
3507 
3508   G1CollectedHeap* _g1h;
3509   ConcurrentMark*  _cm;
3510   oop*             _ms;
3511   jint*            _array_ind_stack;
3512   int              _ms_size;
3513   int              _ms_ind;
3514   int              _array_increment;
3515   int              _worker_i;
3516 
3517   bool push(oop obj, int arr_ind = 0) {
3518     if (_ms_ind == _ms_size) {
3519       gclog_or_tty->print_cr("Mark stack is full.");
3520       return false;
3521     }
3522     _ms[_ms_ind] = obj;
3523     if (obj->is_objArray()) {
3524       _array_ind_stack[_ms_ind] = arr_ind;
3525     }
3526     _ms_ind++;
3527     return true;
3528   }
3529 
3530   oop pop() {
3531     if (_ms_ind == 0) {
3532       return NULL;
3533     } else {
3534       _ms_ind--;
3535       return _ms[_ms_ind];
3536     }
3537   }
3538 
3539   template <class T> bool drain() {
3540     while (_ms_ind > 0) {
3541       oop obj = pop();
3542       assert(obj != NULL, "Since index was non-zero.");
3543       if (obj->is_objArray()) {
3544         jint arr_ind = _array_ind_stack[_ms_ind];
3545         objArrayOop aobj = objArrayOop(obj);
3546         jint len = aobj->length();
3547         jint next_arr_ind = arr_ind + _array_increment;
3548         if (next_arr_ind < len) {
3549           push(obj, next_arr_ind);
3550         }
3551         // Now process this portion of this one.
3552         int lim = MIN2(next_arr_ind, len);
3553         for (int j = arr_ind; j < lim; j++) {
3554           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
3555         }
3556       } else {
3557         obj->oop_iterate(this);
3558       }
3559       if (abort()) return false;
3560     }
3561     return true;
3562   }
3563 
3564 public:
3565   CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3566     _g1h(G1CollectedHeap::heap()),
3567     _cm(cm),
3568     _ms_size(ms_size), _ms_ind(0),
3569     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
3570     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
3571     _array_increment(MAX2(ms_size/8, 16)),
3572     _worker_i(worker_i) { }
3573 
3574   ~CSetMarkOopClosure() {
3575     FREE_C_HEAP_ARRAY(oop, _ms);
3576     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
3577   }
3578 
3579   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3580   virtual void do_oop(      oop* p) { do_oop_work(p); }
3581 
3582   template <class T> void do_oop_work(T* p) {
3583     T heap_oop = oopDesc::load_heap_oop(p);
3584     if (oopDesc::is_null(heap_oop)) return;
3585     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
3586     if (obj->is_forwarded()) {
3587       // If the object has already been forwarded, we have to make sure
3588       // that it's marked.  So follow the forwarding pointer.  Note that
3589       // this does the right thing for self-forwarding pointers in the
3590       // evacuation failure case.
3591       obj = obj->forwardee();
3592     }
3593     HeapRegion* hr = _g1h->heap_region_containing(obj);
3594     if (hr != NULL) {
3595       if (hr->in_collection_set()) {
3596         if (_g1h->is_obj_ill(obj)) {
3597           if (_cm->par_mark_and_count(obj, hr, _worker_i)) {
3598             if (!push(obj)) {
3599               gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3600               set_abort();
3601             }
3602           }
3603         }
3604       } else {
3605         // Outside the collection set; we need to gray it
3606         _cm->deal_with_reference(obj, _worker_i);
3607       }
3608     }
3609   }
3610 };
3611 
3612 class CSetMarkBitMapClosure: public BitMapClosure {
3613   G1CollectedHeap*   _g1h;
3614   CMBitMap*          _bitMap;
3615   ConcurrentMark*    _cm;
3616   CSetMarkOopClosure _oop_cl;
3617   int                _worker_i;
3618 
3619 public:
3620   CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3621     _g1h(G1CollectedHeap::heap()),
3622     _bitMap(cm->nextMarkBitMap()),
3623     _oop_cl(cm, ms_size, worker_i),
3624     _worker_i(worker_i) { }
3625 
3626   bool do_bit(size_t offset) {
3627     // convert offset into a HeapWord*
3628     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3629     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3630            "address out of range");
3631     assert(_bitMap->isMarked(addr), "tautology");
3632     oop obj = oop(addr);
3633     if (!obj->is_forwarded()) {
3634       if (!_oop_cl.push(obj)) return false;
3635       if (UseCompressedOops) {
3636         if (!_oop_cl.drain<narrowOop>()) return false;
3637       } else {
3638         if (!_oop_cl.drain<oop>()) return false;
3639       }
3640     }
3641     // Otherwise...
3642     return true;
3643   }
3644 };
3645 
3646 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3647   CMBitMap*             _bm;
3648   CSetMarkBitMapClosure _bit_cl;
3649   int                   _worker_i;
3650 
3651   enum SomePrivateConstants {
3652     MSSize = 1000
3653   };
3654 
3655 public:
3656   CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
3657     _bm(cm->nextMarkBitMap()),
3658     _bit_cl(cm, MSSize, worker_i),
3659     _worker_i(worker_i) { }
3660 
3661   bool doHeapRegion(HeapRegion* hr) {
3662     if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3663       // The current worker has successfully claimed the region.
3664       if (!hr->evacuation_failed()) {
3665         MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3666         if (!mr.is_empty()) {
3667           bool done = false;
3668           while (!done) {
3669             done = _bm->iterate(&_bit_cl, mr);
3670           }
3671         }
3672       }
3673     }
3674     return false;
3675   }
3676 };
3677 
3678 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
3679   jint _claim_value;
3680 
3681 public:
3682   SetClaimValuesInCSetHRClosure(jint claim_value) :
3683     _claim_value(claim_value) { }
3684 
3685   bool doHeapRegion(HeapRegion* hr) {
3686     hr->set_claim_value(_claim_value);
3687     return false;
3688   }
3689 };
3690 
3691 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3692 protected:
3693   G1CollectedHeap* _g1h;
3694   ConcurrentMark*  _cm;
3695 
3696 public:
3697   G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3698                               ConcurrentMark* cm) :
3699     AbstractGangTask("Complete Mark in CSet"),
3700     _g1h(g1h), _cm(cm) { }
3701 
3702   void work(int worker_i) {
3703     CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
3704     HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
3705     _g1h->collection_set_iterate_from(hr, &cmplt);
3706   }
3707 };
3708 
3709 void ConcurrentMark::complete_marking_in_collection_set() {
3710   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3711 
3712   if (!g1h->mark_in_progress()) {
3713     g1h->g1_policy()->record_mark_closure_time(0.0);
3714     return;
3715   }
3716 
3717   double start = os::elapsedTime();
3718   G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3719 
3720   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3721 
3722   if (G1CollectedHeap::use_parallel_gc_threads()) {
3723     int n_workers = g1h->workers()->active_workers();
3724     g1h->set_par_threads(n_workers);
3725     g1h->workers()->run_task(&complete_mark_task);
3726     g1h->set_par_threads(0);
3727   } else {
3728     complete_mark_task.work(0);
3729   }
3730 
3731   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3732 
3733   // Now reset the claim values in the regions in the collection set.
3734   SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
3735   g1h->collection_set_iterate(&set_cv_cl);
3736 
3737   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3738 
3739   double end_time = os::elapsedTime();
3740   double elapsed_time_ms = (end_time - start) * 1000.0;
3741   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3742 }
3743 
3744 // The next two methods deal with the following optimisation. Some
3745 // objects are gray by being marked and located above the finger. If
3746 // they are copied, during an evacuation pause, below the finger then
3747 // the need to be pushed on the stack. The observation is that, if
3748 // there are no regions in the collection set located above the
3749 // finger, then the above cannot happen, hence we do not need to
3750 // explicitly gray any objects when copying them to below the
3751 // finger. The global stack will be scanned to ensure that, if it
3752 // points to objects being copied, it will update their
3753 // location. There is a tricky situation with the gray objects in
3754 // region stack that are being coped, however. See the comment in
3755 // newCSet().
3756 
3757 void ConcurrentMark::newCSet() {
3758   if (!concurrent_marking_in_progress()) {
3759     // nothing to do if marking is not in progress
3760     return;
3761   }
3762 
3763   // find what the lowest finger is among the global and local fingers
3764   _min_finger = _finger;
3765   for (int i = 0; i < (int)_max_task_num; ++i) {
3766     CMTask* task = _tasks[i];
3767     HeapWord* task_finger = task->finger();
3768     if (task_finger != NULL && task_finger < _min_finger) {
3769       _min_finger = task_finger;
3770     }
3771   }
3772 
3773   _should_gray_objects = false;
3774 
3775   // This fixes a very subtle and fustrating bug. It might be the case
3776   // that, during en evacuation pause, heap regions that contain
3777   // objects that are gray (by being in regions contained in the
3778   // region stack) are included in the collection set. Since such gray
3779   // objects will be moved, and because it's not easy to redirect
3780   // region stack entries to point to a new location (because objects
3781   // in one region might be scattered to multiple regions after they
3782   // are copied), one option is to ensure that all marked objects
3783   // copied during a pause are pushed on the stack. Notice, however,
3784   // that this problem can only happen when the region stack is not
3785   // empty during an evacuation pause. So, we make the fix a bit less
3786   // conservative and ensure that regions are pushed on the stack,
3787   // irrespective whether all collection set regions are below the
3788   // finger, if the region stack is not empty. This is expected to be
3789   // a rare case, so I don't think it's necessary to be smarted about it.
3790   if (!region_stack_empty() || has_aborted_regions()) {
3791     _should_gray_objects = true;
3792   }
3793 }
3794 
3795 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3796   if (!concurrent_marking_in_progress()) return;
3797 
3798   HeapWord* region_end = hr->end();
3799   if (region_end > _min_finger) {
3800     _should_gray_objects = true;
3801   }
3802 }
3803 
3804 // Resets the region fields of active CMTasks whose values point
3805 // into the collection set.
3806 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3807   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3808   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3809 
3810   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3811     CMTask* task = _tasks[i];
3812     HeapWord* task_finger = task->finger();
3813     if (task_finger != NULL) {
3814       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3815       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3816       if (finger_region->in_collection_set()) {
3817         // The task's current region is in the collection set.
3818         // This region will be evacuated in the current GC and
3819         // the region fields in the task will be stale.
3820         task->giveup_current_region();
3821       }
3822     }
3823   }
3824 }
3825 
3826 // abandon current marking iteration due to a Full GC
3827 void ConcurrentMark::abort() {
3828   // Clear all marks to force marking thread to do nothing
3829   _nextMarkBitMap->clearAll();
3830   // Empty mark stack
3831   clear_marking_state();
3832   for (int i = 0; i < (int)_max_task_num; ++i) {
3833     _tasks[i]->clear_region_fields();
3834   }
3835   _has_aborted = true;
3836 
3837   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3838   satb_mq_set.abandon_partial_marking();
3839   // This can be called either during or outside marking, we'll read
3840   // the expected_active value from the SATB queue set.
3841   satb_mq_set.set_active_all_threads(
3842                                  false, /* new active value */
3843                                  satb_mq_set.is_active() /* expected_active */);
3844 }
3845 
3846 static void print_ms_time_info(const char* prefix, const char* name,
3847                                NumberSeq& ns) {
3848   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3849                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3850   if (ns.num() > 0) {
3851     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3852                            prefix, ns.sd(), ns.maximum());
3853   }
3854 }
3855 
3856 void ConcurrentMark::print_summary_info() {
3857   gclog_or_tty->print_cr(" Concurrent marking:");
3858   print_ms_time_info("  ", "init marks", _init_times);
3859   print_ms_time_info("  ", "remarks", _remark_times);
3860   {
3861     print_ms_time_info("     ", "final marks", _remark_mark_times);
3862     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3863 
3864   }
3865   print_ms_time_info("  ", "cleanups", _cleanup_times);
3866   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3867                          _total_counting_time,
3868                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3869                           (double)_cleanup_times.num()
3870                          : 0.0));
3871   if (G1ScrubRemSets) {
3872     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3873                            _total_rs_scrub_time,
3874                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3875                             (double)_cleanup_times.num()
3876                            : 0.0));
3877   }
3878   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3879                          (_init_times.sum() + _remark_times.sum() +
3880                           _cleanup_times.sum())/1000.0);
3881   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3882                 "(%8.2f s marking).",
3883                 cmThread()->vtime_accum(),
3884                 cmThread()->vtime_mark_accum());
3885 }
3886 
3887 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3888   _parallel_workers->print_worker_threads_on(st);
3889 }
3890 
3891 // Closures
3892 // XXX: there seems to be a lot of code  duplication here;
3893 // should refactor and consolidate the shared code.
3894 
3895 // This closure is used to mark refs into the CMS generation in
3896 // the CMS bit map. Called at the first checkpoint.
3897 
3898 // We take a break if someone is trying to stop the world.
3899 bool ConcurrentMark::do_yield_check(int worker_i) {
3900   if (should_yield()) {
3901     if (worker_i == 0) {
3902       _g1h->g1_policy()->record_concurrent_pause();
3903     }
3904     cmThread()->yield();
3905     if (worker_i == 0) {
3906       _g1h->g1_policy()->record_concurrent_pause_end();
3907     }
3908     return true;
3909   } else {
3910     return false;
3911   }
3912 }
3913 
3914 bool ConcurrentMark::should_yield() {
3915   return cmThread()->should_yield();
3916 }
3917 
3918 bool ConcurrentMark::containing_card_is_marked(void* p) {
3919   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3920   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3921 }
3922 
3923 bool ConcurrentMark::containing_cards_are_marked(void* start,
3924                                                  void* last) {
3925   return containing_card_is_marked(start) &&
3926          containing_card_is_marked(last);
3927 }
3928 
3929 #ifndef PRODUCT
3930 // for debugging purposes
3931 void ConcurrentMark::print_finger() {
3932   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3933                          _heap_start, _heap_end, _finger);
3934   for (int i = 0; i < (int) _max_task_num; ++i) {
3935     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3936   }
3937   gclog_or_tty->print_cr("");
3938 }
3939 #endif
3940 
3941 void CMTask::scan_object(oop obj) {
3942   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3943 
3944   if (_cm->verbose_high()) {
3945     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3946                            _task_id, (void*) obj);
3947   }
3948 
3949   size_t obj_size = obj->size();
3950   _words_scanned += obj_size;
3951 
3952   obj->oop_iterate(_cm_oop_closure);
3953   statsOnly( ++_objs_scanned );
3954   check_limits();
3955 }
3956 
3957 // Closure for iteration over bitmaps
3958 class CMBitMapClosure : public BitMapClosure {
3959 private:
3960   // the bitmap that is being iterated over
3961   CMBitMap*                   _nextMarkBitMap;
3962   ConcurrentMark*             _cm;
3963   CMTask*                     _task;
3964   // true if we're scanning a heap region claimed by the task (so that
3965   // we move the finger along), false if we're not, i.e. currently when
3966   // scanning a heap region popped from the region stack (so that we
3967   // do not move the task finger along; it'd be a mistake if we did so).
3968   bool                        _scanning_heap_region;
3969 
3970 public:
3971   CMBitMapClosure(CMTask *task,
3972                   ConcurrentMark* cm,
3973                   CMBitMap* nextMarkBitMap)
3974     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3975 
3976   void set_scanning_heap_region(bool scanning_heap_region) {
3977     _scanning_heap_region = scanning_heap_region;
3978   }
3979 
3980   bool do_bit(size_t offset) {
3981     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3982     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3983     assert( addr < _cm->finger(), "invariant");
3984 
3985     if (_scanning_heap_region) {
3986       statsOnly( _task->increase_objs_found_on_bitmap() );
3987       assert(addr >= _task->finger(), "invariant");
3988       // We move that task's local finger along.
3989       _task->move_finger_to(addr);
3990     } else {
3991       // We move the task's region finger along.
3992       _task->move_region_finger_to(addr);
3993     }
3994 
3995     _task->scan_object(oop(addr));
3996     // we only partially drain the local queue and global stack
3997     _task->drain_local_queue(true);
3998     _task->drain_global_stack(true);
3999 
4000     // if the has_aborted flag has been raised, we need to bail out of
4001     // the iteration
4002     return !_task->has_aborted();
4003   }
4004 };
4005 
4006 // Closure for iterating over objects, currently only used for
4007 // processing SATB buffers.
4008 class CMObjectClosure : public ObjectClosure {
4009 private:
4010   CMTask* _task;
4011 
4012 public:
4013   void do_object(oop obj) {
4014     _task->deal_with_reference(obj);
4015   }
4016 
4017   CMObjectClosure(CMTask* task) : _task(task) { }
4018 };
4019 
4020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
4021                                ConcurrentMark* cm,
4022                                CMTask* task)
4023   : _g1h(g1h), _cm(cm), _task(task) {
4024   assert(_ref_processor == NULL, "should be initialized to NULL");
4025 
4026   if (G1UseConcMarkReferenceProcessing) {
4027     _ref_processor = g1h->ref_processor_cm();
4028     assert(_ref_processor != NULL, "should not be NULL");
4029   }
4030 }
4031 
4032 void CMTask::setup_for_region(HeapRegion* hr) {
4033   // Separated the asserts so that we know which one fires.
4034   assert(hr != NULL,
4035         "claim_region() should have filtered out continues humongous regions");
4036   assert(!hr->continuesHumongous(),
4037         "claim_region() should have filtered out continues humongous regions");
4038 
4039   if (_cm->verbose_low()) {
4040     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
4041                            _task_id, hr);
4042   }
4043 
4044   _curr_region  = hr;
4045   _finger       = hr->bottom();
4046   update_region_limit();
4047 }
4048 
4049 void CMTask::update_region_limit() {
4050   HeapRegion* hr            = _curr_region;
4051   HeapWord* bottom          = hr->bottom();
4052   HeapWord* limit           = hr->next_top_at_mark_start();
4053 
4054   if (limit == bottom) {
4055     if (_cm->verbose_low()) {
4056       gclog_or_tty->print_cr("[%d] found an empty region "
4057                              "["PTR_FORMAT", "PTR_FORMAT")",
4058                              _task_id, bottom, limit);
4059     }
4060     // The region was collected underneath our feet.
4061     // We set the finger to bottom to ensure that the bitmap
4062     // iteration that will follow this will not do anything.
4063     // (this is not a condition that holds when we set the region up,
4064     // as the region is not supposed to be empty in the first place)
4065     _finger = bottom;
4066   } else if (limit >= _region_limit) {
4067     assert(limit >= _finger, "peace of mind");
4068   } else {
4069     assert(limit < _region_limit, "only way to get here");
4070     // This can happen under some pretty unusual circumstances.  An
4071     // evacuation pause empties the region underneath our feet (NTAMS
4072     // at bottom). We then do some allocation in the region (NTAMS
4073     // stays at bottom), followed by the region being used as a GC
4074     // alloc region (NTAMS will move to top() and the objects
4075     // originally below it will be grayed). All objects now marked in
4076     // the region are explicitly grayed, if below the global finger,
4077     // and we do not need in fact to scan anything else. So, we simply
4078     // set _finger to be limit to ensure that the bitmap iteration
4079     // doesn't do anything.
4080     _finger = limit;
4081   }
4082 
4083   _region_limit = limit;
4084 }
4085 
4086 void CMTask::giveup_current_region() {
4087   assert(_curr_region != NULL, "invariant");
4088   if (_cm->verbose_low()) {
4089     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
4090                            _task_id, _curr_region);
4091   }
4092   clear_region_fields();
4093 }
4094 
4095 void CMTask::clear_region_fields() {
4096   // Values for these three fields that indicate that we're not
4097   // holding on to a region.
4098   _curr_region   = NULL;
4099   _finger        = NULL;
4100   _region_limit  = NULL;
4101 
4102   _region_finger = NULL;
4103 }
4104 
4105 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
4106   if (cm_oop_closure == NULL) {
4107     assert(_cm_oop_closure != NULL, "invariant");
4108   } else {
4109     assert(_cm_oop_closure == NULL, "invariant");
4110   }
4111   _cm_oop_closure = cm_oop_closure;
4112 }
4113 
4114 void CMTask::reset(CMBitMap* nextMarkBitMap) {
4115   guarantee(nextMarkBitMap != NULL, "invariant");
4116 
4117   if (_cm->verbose_low()) {
4118     gclog_or_tty->print_cr("[%d] resetting", _task_id);
4119   }
4120 
4121   _nextMarkBitMap                = nextMarkBitMap;
4122   clear_region_fields();
4123   assert(_aborted_region.is_empty(), "should have been cleared");
4124 
4125   _calls                         = 0;
4126   _elapsed_time_ms               = 0.0;
4127   _termination_time_ms           = 0.0;
4128   _termination_start_time_ms     = 0.0;
4129 
4130 #if _MARKING_STATS_
4131   _local_pushes                  = 0;
4132   _local_pops                    = 0;
4133   _local_max_size                = 0;
4134   _objs_scanned                  = 0;
4135   _global_pushes                 = 0;
4136   _global_pops                   = 0;
4137   _global_max_size               = 0;
4138   _global_transfers_to           = 0;
4139   _global_transfers_from         = 0;
4140   _region_stack_pops             = 0;
4141   _regions_claimed               = 0;
4142   _objs_found_on_bitmap          = 0;
4143   _satb_buffers_processed        = 0;
4144   _steal_attempts                = 0;
4145   _steals                        = 0;
4146   _aborted                       = 0;
4147   _aborted_overflow              = 0;
4148   _aborted_cm_aborted            = 0;
4149   _aborted_yield                 = 0;
4150   _aborted_timed_out             = 0;
4151   _aborted_satb                  = 0;
4152   _aborted_termination           = 0;
4153 #endif // _MARKING_STATS_
4154 }
4155 
4156 bool CMTask::should_exit_termination() {
4157   regular_clock_call();
4158   // This is called when we are in the termination protocol. We should
4159   // quit if, for some reason, this task wants to abort or the global
4160   // stack is not empty (this means that we can get work from it).
4161   return !_cm->mark_stack_empty() || has_aborted();
4162 }
4163 
4164 void CMTask::reached_limit() {
4165   assert(_words_scanned >= _words_scanned_limit ||
4166          _refs_reached >= _refs_reached_limit ,
4167          "shouldn't have been called otherwise");
4168   regular_clock_call();
4169 }
4170 
4171 void CMTask::regular_clock_call() {
4172   if (has_aborted()) return;
4173 
4174   // First, we need to recalculate the words scanned and refs reached
4175   // limits for the next clock call.
4176   recalculate_limits();
4177 
4178   // During the regular clock call we do the following
4179 
4180   // (1) If an overflow has been flagged, then we abort.
4181   if (_cm->has_overflown()) {
4182     set_has_aborted();
4183     return;
4184   }
4185 
4186   // If we are not concurrent (i.e. we're doing remark) we don't need
4187   // to check anything else. The other steps are only needed during
4188   // the concurrent marking phase.
4189   if (!concurrent()) return;
4190 
4191   // (2) If marking has been aborted for Full GC, then we also abort.
4192   if (_cm->has_aborted()) {
4193     set_has_aborted();
4194     statsOnly( ++_aborted_cm_aborted );
4195     return;
4196   }
4197 
4198   double curr_time_ms = os::elapsedVTime() * 1000.0;
4199 
4200   // (3) If marking stats are enabled, then we update the step history.
4201 #if _MARKING_STATS_
4202   if (_words_scanned >= _words_scanned_limit) {
4203     ++_clock_due_to_scanning;
4204   }
4205   if (_refs_reached >= _refs_reached_limit) {
4206     ++_clock_due_to_marking;
4207   }
4208 
4209   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
4210   _interval_start_time_ms = curr_time_ms;
4211   _all_clock_intervals_ms.add(last_interval_ms);
4212 
4213   if (_cm->verbose_medium()) {
4214       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
4215                         "scanned = %d%s, refs reached = %d%s",
4216                         _task_id, last_interval_ms,
4217                         _words_scanned,
4218                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
4219                         _refs_reached,
4220                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
4221   }
4222 #endif // _MARKING_STATS_
4223 
4224   // (4) We check whether we should yield. If we have to, then we abort.
4225   if (_cm->should_yield()) {
4226     // We should yield. To do this we abort the task. The caller is
4227     // responsible for yielding.
4228     set_has_aborted();
4229     statsOnly( ++_aborted_yield );
4230     return;
4231   }
4232 
4233   // (5) We check whether we've reached our time quota. If we have,
4234   // then we abort.
4235   double elapsed_time_ms = curr_time_ms - _start_time_ms;
4236   if (elapsed_time_ms > _time_target_ms) {
4237     set_has_aborted();
4238     _has_timed_out = true;
4239     statsOnly( ++_aborted_timed_out );
4240     return;
4241   }
4242 
4243   // (6) Finally, we check whether there are enough completed STAB
4244   // buffers available for processing. If there are, we abort.
4245   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4246   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
4247     if (_cm->verbose_low()) {
4248       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
4249                              _task_id);
4250     }
4251     // we do need to process SATB buffers, we'll abort and restart
4252     // the marking task to do so
4253     set_has_aborted();
4254     statsOnly( ++_aborted_satb );
4255     return;
4256   }
4257 }
4258 
4259 void CMTask::recalculate_limits() {
4260   _real_words_scanned_limit = _words_scanned + words_scanned_period;
4261   _words_scanned_limit      = _real_words_scanned_limit;
4262 
4263   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
4264   _refs_reached_limit       = _real_refs_reached_limit;
4265 }
4266 
4267 void CMTask::decrease_limits() {
4268   // This is called when we believe that we're going to do an infrequent
4269   // operation which will increase the per byte scanned cost (i.e. move
4270   // entries to/from the global stack). It basically tries to decrease the
4271   // scanning limit so that the clock is called earlier.
4272 
4273   if (_cm->verbose_medium()) {
4274     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
4275   }
4276 
4277   _words_scanned_limit = _real_words_scanned_limit -
4278     3 * words_scanned_period / 4;
4279   _refs_reached_limit  = _real_refs_reached_limit -
4280     3 * refs_reached_period / 4;
4281 }
4282 
4283 void CMTask::move_entries_to_global_stack() {
4284   // local array where we'll store the entries that will be popped
4285   // from the local queue
4286   oop buffer[global_stack_transfer_size];
4287 
4288   int n = 0;
4289   oop obj;
4290   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
4291     buffer[n] = obj;
4292     ++n;
4293   }
4294 
4295   if (n > 0) {
4296     // we popped at least one entry from the local queue
4297 
4298     statsOnly( ++_global_transfers_to; _local_pops += n );
4299 
4300     if (!_cm->mark_stack_push(buffer, n)) {
4301       if (_cm->verbose_low()) {
4302         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
4303                                _task_id);
4304       }
4305       set_has_aborted();
4306     } else {
4307       // the transfer was successful
4308 
4309       if (_cm->verbose_medium()) {
4310         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
4311                                _task_id, n);
4312       }
4313       statsOnly( int tmp_size = _cm->mark_stack_size();
4314                  if (tmp_size > _global_max_size) {
4315                    _global_max_size = tmp_size;
4316                  }
4317                  _global_pushes += n );
4318     }
4319   }
4320 
4321   // this operation was quite expensive, so decrease the limits
4322   decrease_limits();
4323 }
4324 
4325 void CMTask::get_entries_from_global_stack() {
4326   // local array where we'll store the entries that will be popped
4327   // from the global stack.
4328   oop buffer[global_stack_transfer_size];
4329   int n;
4330   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
4331   assert(n <= global_stack_transfer_size,
4332          "we should not pop more than the given limit");
4333   if (n > 0) {
4334     // yes, we did actually pop at least one entry
4335 
4336     statsOnly( ++_global_transfers_from; _global_pops += n );
4337     if (_cm->verbose_medium()) {
4338       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
4339                              _task_id, n);
4340     }
4341     for (int i = 0; i < n; ++i) {
4342       bool success = _task_queue->push(buffer[i]);
4343       // We only call this when the local queue is empty or under a
4344       // given target limit. So, we do not expect this push to fail.
4345       assert(success, "invariant");
4346     }
4347 
4348     statsOnly( int tmp_size = _task_queue->size();
4349                if (tmp_size > _local_max_size) {
4350                  _local_max_size = tmp_size;
4351                }
4352                _local_pushes += n );
4353   }
4354 
4355   // this operation was quite expensive, so decrease the limits
4356   decrease_limits();
4357 }
4358 
4359 void CMTask::drain_local_queue(bool partially) {
4360   if (has_aborted()) return;
4361 
4362   // Decide what the target size is, depending whether we're going to
4363   // drain it partially (so that other tasks can steal if they run out
4364   // of things to do) or totally (at the very end).
4365   size_t target_size;
4366   if (partially) {
4367     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
4368   } else {
4369     target_size = 0;
4370   }
4371 
4372   if (_task_queue->size() > target_size) {
4373     if (_cm->verbose_high()) {
4374       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
4375                              _task_id, target_size);
4376     }
4377 
4378     oop obj;
4379     bool ret = _task_queue->pop_local(obj);
4380     while (ret) {
4381       statsOnly( ++_local_pops );
4382 
4383       if (_cm->verbose_high()) {
4384         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
4385                                (void*) obj);
4386       }
4387 
4388       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
4389       assert(!_g1h->is_on_master_free_list(
4390                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
4391 
4392       scan_object(obj);
4393 
4394       if (_task_queue->size() <= target_size || has_aborted()) {
4395         ret = false;
4396       } else {
4397         ret = _task_queue->pop_local(obj);
4398       }
4399     }
4400 
4401     if (_cm->verbose_high()) {
4402       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
4403                              _task_id, _task_queue->size());
4404     }
4405   }
4406 }
4407 
4408 void CMTask::drain_global_stack(bool partially) {
4409   if (has_aborted()) return;
4410 
4411   // We have a policy to drain the local queue before we attempt to
4412   // drain the global stack.
4413   assert(partially || _task_queue->size() == 0, "invariant");
4414 
4415   // Decide what the target size is, depending whether we're going to
4416   // drain it partially (so that other tasks can steal if they run out
4417   // of things to do) or totally (at the very end).  Notice that,
4418   // because we move entries from the global stack in chunks or
4419   // because another task might be doing the same, we might in fact
4420   // drop below the target. But, this is not a problem.
4421   size_t target_size;
4422   if (partially) {
4423     target_size = _cm->partial_mark_stack_size_target();
4424   } else {
4425     target_size = 0;
4426   }
4427 
4428   if (_cm->mark_stack_size() > target_size) {
4429     if (_cm->verbose_low()) {
4430       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
4431                              _task_id, target_size);
4432     }
4433 
4434     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
4435       get_entries_from_global_stack();
4436       drain_local_queue(partially);
4437     }
4438 
4439     if (_cm->verbose_low()) {
4440       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
4441                              _task_id, _cm->mark_stack_size());
4442     }
4443   }
4444 }
4445 
4446 // SATB Queue has several assumptions on whether to call the par or
4447 // non-par versions of the methods. this is why some of the code is
4448 // replicated. We should really get rid of the single-threaded version
4449 // of the code to simplify things.
4450 void CMTask::drain_satb_buffers() {
4451   if (has_aborted()) return;
4452 
4453   // We set this so that the regular clock knows that we're in the
4454   // middle of draining buffers and doesn't set the abort flag when it
4455   // notices that SATB buffers are available for draining. It'd be
4456   // very counter productive if it did that. :-)
4457   _draining_satb_buffers = true;
4458 
4459   CMObjectClosure oc(this);
4460   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4461   if (G1CollectedHeap::use_parallel_gc_threads()) {
4462     satb_mq_set.set_par_closure(_task_id, &oc);
4463   } else {
4464     satb_mq_set.set_closure(&oc);
4465   }
4466 
4467   // This keeps claiming and applying the closure to completed buffers
4468   // until we run out of buffers or we need to abort.
4469   if (G1CollectedHeap::use_parallel_gc_threads()) {
4470     while (!has_aborted() &&
4471            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
4472       if (_cm->verbose_medium()) {
4473         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4474       }
4475       statsOnly( ++_satb_buffers_processed );
4476       regular_clock_call();
4477     }
4478   } else {
4479     while (!has_aborted() &&
4480            satb_mq_set.apply_closure_to_completed_buffer()) {
4481       if (_cm->verbose_medium()) {
4482         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
4483       }
4484       statsOnly( ++_satb_buffers_processed );
4485       regular_clock_call();
4486     }
4487   }
4488 
4489   if (!concurrent() && !has_aborted()) {
4490     // We should only do this during remark.
4491     if (G1CollectedHeap::use_parallel_gc_threads()) {
4492       satb_mq_set.par_iterate_closure_all_threads(_task_id);
4493     } else {
4494       satb_mq_set.iterate_closure_all_threads();
4495     }
4496   }
4497 
4498   _draining_satb_buffers = false;
4499 
4500   assert(has_aborted() ||
4501          concurrent() ||
4502          satb_mq_set.completed_buffers_num() == 0, "invariant");
4503 
4504   if (G1CollectedHeap::use_parallel_gc_threads()) {
4505     satb_mq_set.set_par_closure(_task_id, NULL);
4506   } else {
4507     satb_mq_set.set_closure(NULL);
4508   }
4509 
4510   // again, this was a potentially expensive operation, decrease the
4511   // limits to get the regular clock call early
4512   decrease_limits();
4513 }
4514 
4515 void CMTask::drain_region_stack(BitMapClosure* bc) {
4516   if (has_aborted()) return;
4517 
4518   assert(_region_finger == NULL,
4519          "it should be NULL when we're not scanning a region");
4520 
4521   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
4522     if (_cm->verbose_low()) {
4523       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
4524                              _task_id, _cm->region_stack_size());
4525     }
4526 
4527     MemRegion mr;
4528 
4529     if (!_aborted_region.is_empty()) {
4530       mr = _aborted_region;
4531       _aborted_region = MemRegion();
4532 
4533       if (_cm->verbose_low()) {
4534         gclog_or_tty->print_cr("[%d] scanning aborted region "
4535                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
4536                                _task_id, mr.start(), mr.end());
4537       }
4538     } else {
4539       mr = _cm->region_stack_pop_lock_free();
4540       // it returns MemRegion() if the pop fails
4541       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4542     }
4543 
4544     while (mr.start() != NULL) {
4545       if (_cm->verbose_medium()) {
4546         gclog_or_tty->print_cr("[%d] we are scanning region "
4547                                "["PTR_FORMAT", "PTR_FORMAT")",
4548                                _task_id, mr.start(), mr.end());
4549       }
4550 
4551       assert(mr.end() <= _cm->finger(),
4552              "otherwise the region shouldn't be on the stack");
4553       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
4554       if (_nextMarkBitMap->iterate(bc, mr)) {
4555         assert(!has_aborted(),
4556                "cannot abort the task without aborting the bitmap iteration");
4557 
4558         // We finished iterating over the region without aborting.
4559         regular_clock_call();
4560         if (has_aborted()) {
4561           mr = MemRegion();
4562         } else {
4563           mr = _cm->region_stack_pop_lock_free();
4564           // it returns MemRegion() if the pop fails
4565           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
4566         }
4567       } else {
4568         assert(has_aborted(), "currently the only way to do so");
4569 
4570         // The only way to abort the bitmap iteration is to return
4571         // false from the do_bit() method. However, inside the
4572         // do_bit() method we move the _region_finger to point to the
4573         // object currently being looked at. So, if we bail out, we
4574         // have definitely set _region_finger to something non-null.
4575         assert(_region_finger != NULL, "invariant");
4576 
4577         // Make sure that any previously aborted region has been
4578         // cleared.
4579         assert(_aborted_region.is_empty(), "aborted region not cleared");
4580 
4581         // The iteration was actually aborted. So now _region_finger
4582         // points to the address of the object we last scanned. If we
4583         // leave it there, when we restart this task, we will rescan
4584         // the object. It is easy to avoid this. We move the finger by
4585         // enough to point to the next possible object header (the
4586         // bitmap knows by how much we need to move it as it knows its
4587         // granularity).
4588         MemRegion newRegion =
4589           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4590 
4591         if (!newRegion.is_empty()) {
4592           if (_cm->verbose_low()) {
4593             gclog_or_tty->print_cr("[%d] recording unscanned region"
4594                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4595                                    _task_id,
4596                                    newRegion.start(), newRegion.end());
4597           }
4598           // Now record the part of the region we didn't scan to
4599           // make sure this task scans it later.
4600           _aborted_region = newRegion;
4601         }
4602         // break from while
4603         mr = MemRegion();
4604       }
4605       _region_finger = NULL;
4606     }
4607 
4608     if (_cm->verbose_low()) {
4609       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4610                              _task_id, _cm->region_stack_size());
4611     }
4612   }
4613 }
4614 
4615 void CMTask::print_stats() {
4616   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4617                          _task_id, _calls);
4618   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4619                          _elapsed_time_ms, _termination_time_ms);
4620   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4621                          _step_times_ms.num(), _step_times_ms.avg(),
4622                          _step_times_ms.sd());
4623   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4624                          _step_times_ms.maximum(), _step_times_ms.sum());
4625 
4626 #if _MARKING_STATS_
4627   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4628                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4629                          _all_clock_intervals_ms.sd());
4630   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4631                          _all_clock_intervals_ms.maximum(),
4632                          _all_clock_intervals_ms.sum());
4633   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4634                          _clock_due_to_scanning, _clock_due_to_marking);
4635   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4636                          _objs_scanned, _objs_found_on_bitmap);
4637   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4638                          _local_pushes, _local_pops, _local_max_size);
4639   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4640                          _global_pushes, _global_pops, _global_max_size);
4641   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4642                          _global_transfers_to,_global_transfers_from);
4643   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4644                          _regions_claimed, _region_stack_pops);
4645   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4646   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4647                          _steal_attempts, _steals);
4648   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4649   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4650                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4651   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4652                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4653 #endif // _MARKING_STATS_
4654 }
4655 
4656 /*****************************************************************************
4657 
4658     The do_marking_step(time_target_ms) method is the building block
4659     of the parallel marking framework. It can be called in parallel
4660     with other invocations of do_marking_step() on different tasks
4661     (but only one per task, obviously) and concurrently with the
4662     mutator threads, or during remark, hence it eliminates the need
4663     for two versions of the code. When called during remark, it will
4664     pick up from where the task left off during the concurrent marking
4665     phase. Interestingly, tasks are also claimable during evacuation
4666     pauses too, since do_marking_step() ensures that it aborts before
4667     it needs to yield.
4668 
4669     The data structures that is uses to do marking work are the
4670     following:
4671 
4672       (1) Marking Bitmap. If there are gray objects that appear only
4673       on the bitmap (this happens either when dealing with an overflow
4674       or when the initial marking phase has simply marked the roots
4675       and didn't push them on the stack), then tasks claim heap
4676       regions whose bitmap they then scan to find gray objects. A
4677       global finger indicates where the end of the last claimed region
4678       is. A local finger indicates how far into the region a task has
4679       scanned. The two fingers are used to determine how to gray an
4680       object (i.e. whether simply marking it is OK, as it will be
4681       visited by a task in the future, or whether it needs to be also
4682       pushed on a stack).
4683 
4684       (2) Local Queue. The local queue of the task which is accessed
4685       reasonably efficiently by the task. Other tasks can steal from
4686       it when they run out of work. Throughout the marking phase, a
4687       task attempts to keep its local queue short but not totally
4688       empty, so that entries are available for stealing by other
4689       tasks. Only when there is no more work, a task will totally
4690       drain its local queue.
4691 
4692       (3) Global Mark Stack. This handles local queue overflow. During
4693       marking only sets of entries are moved between it and the local
4694       queues, as access to it requires a mutex and more fine-grain
4695       interaction with it which might cause contention. If it
4696       overflows, then the marking phase should restart and iterate
4697       over the bitmap to identify gray objects. Throughout the marking
4698       phase, tasks attempt to keep the global mark stack at a small
4699       length but not totally empty, so that entries are available for
4700       popping by other tasks. Only when there is no more work, tasks
4701       will totally drain the global mark stack.
4702 
4703       (4) Global Region Stack. Entries on it correspond to areas of
4704       the bitmap that need to be scanned since they contain gray
4705       objects. Pushes on the region stack only happen during
4706       evacuation pauses and typically correspond to areas covered by
4707       GC LABS. If it overflows, then the marking phase should restart
4708       and iterate over the bitmap to identify gray objects. Tasks will
4709       try to totally drain the region stack as soon as possible.
4710 
4711       (5) SATB Buffer Queue. This is where completed SATB buffers are
4712       made available. Buffers are regularly removed from this queue
4713       and scanned for roots, so that the queue doesn't get too
4714       long. During remark, all completed buffers are processed, as
4715       well as the filled in parts of any uncompleted buffers.
4716 
4717     The do_marking_step() method tries to abort when the time target
4718     has been reached. There are a few other cases when the
4719     do_marking_step() method also aborts:
4720 
4721       (1) When the marking phase has been aborted (after a Full GC).
4722 
4723       (2) When a global overflow (either on the global stack or the
4724       region stack) has been triggered. Before the task aborts, it
4725       will actually sync up with the other tasks to ensure that all
4726       the marking data structures (local queues, stacks, fingers etc.)
4727       are re-initialised so that when do_marking_step() completes,
4728       the marking phase can immediately restart.
4729 
4730       (3) When enough completed SATB buffers are available. The
4731       do_marking_step() method only tries to drain SATB buffers right
4732       at the beginning. So, if enough buffers are available, the
4733       marking step aborts and the SATB buffers are processed at
4734       the beginning of the next invocation.
4735 
4736       (4) To yield. when we have to yield then we abort and yield
4737       right at the end of do_marking_step(). This saves us from a lot
4738       of hassle as, by yielding we might allow a Full GC. If this
4739       happens then objects will be compacted underneath our feet, the
4740       heap might shrink, etc. We save checking for this by just
4741       aborting and doing the yield right at the end.
4742 
4743     From the above it follows that the do_marking_step() method should
4744     be called in a loop (or, otherwise, regularly) until it completes.
4745 
4746     If a marking step completes without its has_aborted() flag being
4747     true, it means it has completed the current marking phase (and
4748     also all other marking tasks have done so and have all synced up).
4749 
4750     A method called regular_clock_call() is invoked "regularly" (in
4751     sub ms intervals) throughout marking. It is this clock method that
4752     checks all the abort conditions which were mentioned above and
4753     decides when the task should abort. A work-based scheme is used to
4754     trigger this clock method: when the number of object words the
4755     marking phase has scanned or the number of references the marking
4756     phase has visited reach a given limit. Additional invocations to
4757     the method clock have been planted in a few other strategic places
4758     too. The initial reason for the clock method was to avoid calling
4759     vtime too regularly, as it is quite expensive. So, once it was in
4760     place, it was natural to piggy-back all the other conditions on it
4761     too and not constantly check them throughout the code.
4762 
4763  *****************************************************************************/
4764 
4765 void CMTask::do_marking_step(double time_target_ms,
4766                              bool do_stealing,
4767                              bool do_termination) {
4768   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4769   assert(concurrent() == _cm->concurrent(), "they should be the same");
4770 
4771   assert(concurrent() || _cm->region_stack_empty(),
4772          "the region stack should have been cleared before remark");
4773   assert(concurrent() || !_cm->has_aborted_regions(),
4774          "aborted regions should have been cleared before remark");
4775   assert(_region_finger == NULL,
4776          "this should be non-null only when a region is being scanned");
4777 
4778   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4779   assert(_task_queues != NULL, "invariant");
4780   assert(_task_queue != NULL, "invariant");
4781   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4782 
4783   assert(!_claimed,
4784          "only one thread should claim this task at any one time");
4785 
4786   // OK, this doesn't safeguard again all possible scenarios, as it is
4787   // possible for two threads to set the _claimed flag at the same
4788   // time. But it is only for debugging purposes anyway and it will
4789   // catch most problems.
4790   _claimed = true;
4791 
4792   _start_time_ms = os::elapsedVTime() * 1000.0;
4793   statsOnly( _interval_start_time_ms = _start_time_ms );
4794 
4795   double diff_prediction_ms =
4796     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4797   _time_target_ms = time_target_ms - diff_prediction_ms;
4798 
4799   // set up the variables that are used in the work-based scheme to
4800   // call the regular clock method
4801   _words_scanned = 0;
4802   _refs_reached  = 0;
4803   recalculate_limits();
4804 
4805   // clear all flags
4806   clear_has_aborted();
4807   _has_timed_out = false;
4808   _draining_satb_buffers = false;
4809 
4810   ++_calls;
4811 
4812   if (_cm->verbose_low()) {
4813     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4814                            "target = %1.2lfms >>>>>>>>>>",
4815                            _task_id, _calls, _time_target_ms);
4816   }
4817 
4818   // Set up the bitmap and oop closures. Anything that uses them is
4819   // eventually called from this method, so it is OK to allocate these
4820   // statically.
4821   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4822   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4823   set_cm_oop_closure(&cm_oop_closure);
4824 
4825   if (_cm->has_overflown()) {
4826     // This can happen if the region stack or the mark stack overflows
4827     // during a GC pause and this task, after a yield point,
4828     // restarts. We have to abort as we need to get into the overflow
4829     // protocol which happens right at the end of this task.
4830     set_has_aborted();
4831   }
4832 
4833   // First drain any available SATB buffers. After this, we will not
4834   // look at SATB buffers before the next invocation of this method.
4835   // If enough completed SATB buffers are queued up, the regular clock
4836   // will abort this task so that it restarts.
4837   drain_satb_buffers();
4838   // ...then partially drain the local queue and the global stack
4839   drain_local_queue(true);
4840   drain_global_stack(true);
4841 
4842   // Then totally drain the region stack.  We will not look at
4843   // it again before the next invocation of this method. Entries on
4844   // the region stack are only added during evacuation pauses, for
4845   // which we have to yield. When we do, we abort the task anyway so
4846   // it will look at the region stack again when it restarts.
4847   bitmap_closure.set_scanning_heap_region(false);
4848   drain_region_stack(&bitmap_closure);
4849   // ...then partially drain the local queue and the global stack
4850   drain_local_queue(true);
4851   drain_global_stack(true);
4852 
4853   do {
4854     if (!has_aborted() && _curr_region != NULL) {
4855       // This means that we're already holding on to a region.
4856       assert(_finger != NULL, "if region is not NULL, then the finger "
4857              "should not be NULL either");
4858 
4859       // We might have restarted this task after an evacuation pause
4860       // which might have evacuated the region we're holding on to
4861       // underneath our feet. Let's read its limit again to make sure
4862       // that we do not iterate over a region of the heap that
4863       // contains garbage (update_region_limit() will also move
4864       // _finger to the start of the region if it is found empty).
4865       update_region_limit();
4866       // We will start from _finger not from the start of the region,
4867       // as we might be restarting this task after aborting half-way
4868       // through scanning this region. In this case, _finger points to
4869       // the address where we last found a marked object. If this is a
4870       // fresh region, _finger points to start().
4871       MemRegion mr = MemRegion(_finger, _region_limit);
4872 
4873       if (_cm->verbose_low()) {
4874         gclog_or_tty->print_cr("[%d] we're scanning part "
4875                                "["PTR_FORMAT", "PTR_FORMAT") "
4876                                "of region "PTR_FORMAT,
4877                                _task_id, _finger, _region_limit, _curr_region);
4878       }
4879 
4880       // Let's iterate over the bitmap of the part of the
4881       // region that is left.
4882       bitmap_closure.set_scanning_heap_region(true);
4883       if (mr.is_empty() ||
4884           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4885         // We successfully completed iterating over the region. Now,
4886         // let's give up the region.
4887         giveup_current_region();
4888         regular_clock_call();
4889       } else {
4890         assert(has_aborted(), "currently the only way to do so");
4891         // The only way to abort the bitmap iteration is to return
4892         // false from the do_bit() method. However, inside the
4893         // do_bit() method we move the _finger to point to the
4894         // object currently being looked at. So, if we bail out, we
4895         // have definitely set _finger to something non-null.
4896         assert(_finger != NULL, "invariant");
4897 
4898         // Region iteration was actually aborted. So now _finger
4899         // points to the address of the object we last scanned. If we
4900         // leave it there, when we restart this task, we will rescan
4901         // the object. It is easy to avoid this. We move the finger by
4902         // enough to point to the next possible object header (the
4903         // bitmap knows by how much we need to move it as it knows its
4904         // granularity).
4905         assert(_finger < _region_limit, "invariant");
4906         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4907         // Check if bitmap iteration was aborted while scanning the last object
4908         if (new_finger >= _region_limit) {
4909             giveup_current_region();
4910         } else {
4911             move_finger_to(new_finger);
4912         }
4913       }
4914     }
4915     // At this point we have either completed iterating over the
4916     // region we were holding on to, or we have aborted.
4917 
4918     // We then partially drain the local queue and the global stack.
4919     // (Do we really need this?)
4920     drain_local_queue(true);
4921     drain_global_stack(true);
4922 
4923     // Read the note on the claim_region() method on why it might
4924     // return NULL with potentially more regions available for
4925     // claiming and why we have to check out_of_regions() to determine
4926     // whether we're done or not.
4927     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4928       // We are going to try to claim a new region. We should have
4929       // given up on the previous one.
4930       // Separated the asserts so that we know which one fires.
4931       assert(_curr_region  == NULL, "invariant");
4932       assert(_finger       == NULL, "invariant");
4933       assert(_region_limit == NULL, "invariant");
4934       if (_cm->verbose_low()) {
4935         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4936       }
4937       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4938       if (claimed_region != NULL) {
4939         // Yes, we managed to claim one
4940         statsOnly( ++_regions_claimed );
4941 
4942         if (_cm->verbose_low()) {
4943           gclog_or_tty->print_cr("[%d] we successfully claimed "
4944                                  "region "PTR_FORMAT,
4945                                  _task_id, claimed_region);
4946         }
4947 
4948         setup_for_region(claimed_region);
4949         assert(_curr_region == claimed_region, "invariant");
4950       }
4951       // It is important to call the regular clock here. It might take
4952       // a while to claim a region if, for example, we hit a large
4953       // block of empty regions. So we need to call the regular clock
4954       // method once round the loop to make sure it's called
4955       // frequently enough.
4956       regular_clock_call();
4957     }
4958 
4959     if (!has_aborted() && _curr_region == NULL) {
4960       assert(_cm->out_of_regions(),
4961              "at this point we should be out of regions");
4962     }
4963   } while ( _curr_region != NULL && !has_aborted());
4964 
4965   if (!has_aborted()) {
4966     // We cannot check whether the global stack is empty, since other
4967     // tasks might be pushing objects to it concurrently. We also cannot
4968     // check if the region stack is empty because if a thread is aborting
4969     // it can push a partially done region back.
4970     assert(_cm->out_of_regions(),
4971            "at this point we should be out of regions");
4972 
4973     if (_cm->verbose_low()) {
4974       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4975     }
4976 
4977     // Try to reduce the number of available SATB buffers so that
4978     // remark has less work to do.
4979     drain_satb_buffers();
4980   }
4981 
4982   // Since we've done everything else, we can now totally drain the
4983   // local queue and global stack.
4984   drain_local_queue(false);
4985   drain_global_stack(false);
4986 
4987   // Attempt at work stealing from other task's queues.
4988   if (do_stealing && !has_aborted()) {
4989     // We have not aborted. This means that we have finished all that
4990     // we could. Let's try to do some stealing...
4991 
4992     // We cannot check whether the global stack is empty, since other
4993     // tasks might be pushing objects to it concurrently. We also cannot
4994     // check if the region stack is empty because if a thread is aborting
4995     // it can push a partially done region back.
4996     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4997            "only way to reach here");
4998 
4999     if (_cm->verbose_low()) {
5000       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
5001     }
5002 
5003     while (!has_aborted()) {
5004       oop obj;
5005       statsOnly( ++_steal_attempts );
5006 
5007       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
5008         if (_cm->verbose_medium()) {
5009           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
5010                                  _task_id, (void*) obj);
5011         }
5012 
5013         statsOnly( ++_steals );
5014 
5015         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
5016                "any stolen object should be marked");
5017         scan_object(obj);
5018 
5019         // And since we're towards the end, let's totally drain the
5020         // local queue and global stack.
5021         drain_local_queue(false);
5022         drain_global_stack(false);
5023       } else {
5024         break;
5025       }
5026     }
5027   }
5028 
5029   // If we are about to wrap up and go into termination, check if we
5030   // should raise the overflow flag.
5031   if (do_termination && !has_aborted()) {
5032     if (_cm->force_overflow()->should_force()) {
5033       _cm->set_has_overflown();
5034       regular_clock_call();
5035     }
5036   }
5037 
5038   // We still haven't aborted. Now, let's try to get into the
5039   // termination protocol.
5040   if (do_termination && !has_aborted()) {
5041     // We cannot check whether the global stack is empty, since other
5042     // tasks might be concurrently pushing objects on it. We also cannot
5043     // check if the region stack is empty because if a thread is aborting
5044     // it can push a partially done region back.
5045     // Separated the asserts so that we know which one fires.
5046     assert(_cm->out_of_regions(), "only way to reach here");
5047     assert(_task_queue->size() == 0, "only way to reach here");
5048 
5049     if (_cm->verbose_low()) {
5050       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
5051     }
5052 
5053     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
5054     // The CMTask class also extends the TerminatorTerminator class,
5055     // hence its should_exit_termination() method will also decide
5056     // whether to exit the termination protocol or not.
5057     bool finished = _cm->terminator()->offer_termination(this);
5058     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
5059     _termination_time_ms +=
5060       termination_end_time_ms - _termination_start_time_ms;
5061 
5062     if (finished) {
5063       // We're all done.
5064 
5065       if (_task_id == 0) {
5066         // let's allow task 0 to do this
5067         if (concurrent()) {
5068           assert(_cm->concurrent_marking_in_progress(), "invariant");
5069           // we need to set this to false before the next
5070           // safepoint. This way we ensure that the marking phase
5071           // doesn't observe any more heap expansions.
5072           _cm->clear_concurrent_marking_in_progress();
5073         }
5074       }
5075 
5076       // We can now guarantee that the global stack is empty, since
5077       // all other tasks have finished. We separated the guarantees so
5078       // that, if a condition is false, we can immediately find out
5079       // which one.
5080       guarantee(_cm->out_of_regions(), "only way to reach here");
5081       guarantee(_aborted_region.is_empty(), "only way to reach here");
5082       guarantee(_cm->region_stack_empty(), "only way to reach here");
5083       guarantee(_cm->mark_stack_empty(), "only way to reach here");
5084       guarantee(_task_queue->size() == 0, "only way to reach here");
5085       guarantee(!_cm->has_overflown(), "only way to reach here");
5086       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
5087       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
5088 
5089       if (_cm->verbose_low()) {
5090         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
5091       }
5092     } else {
5093       // Apparently there's more work to do. Let's abort this task. It
5094       // will restart it and we can hopefully find more things to do.
5095 
5096       if (_cm->verbose_low()) {
5097         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
5098                                _task_id);
5099       }
5100 
5101       set_has_aborted();
5102       statsOnly( ++_aborted_termination );
5103     }
5104   }
5105 
5106   // Mainly for debugging purposes to make sure that a pointer to the
5107   // closure which was statically allocated in this frame doesn't
5108   // escape it by accident.
5109   set_cm_oop_closure(NULL);
5110   double end_time_ms = os::elapsedVTime() * 1000.0;
5111   double elapsed_time_ms = end_time_ms - _start_time_ms;
5112   // Update the step history.
5113   _step_times_ms.add(elapsed_time_ms);
5114 
5115   if (has_aborted()) {
5116     // The task was aborted for some reason.
5117 
5118     statsOnly( ++_aborted );
5119 
5120     if (_has_timed_out) {
5121       double diff_ms = elapsed_time_ms - _time_target_ms;
5122       // Keep statistics of how well we did with respect to hitting
5123       // our target only if we actually timed out (if we aborted for
5124       // other reasons, then the results might get skewed).
5125       _marking_step_diffs_ms.add(diff_ms);
5126     }
5127 
5128     if (_cm->has_overflown()) {
5129       // This is the interesting one. We aborted because a global
5130       // overflow was raised. This means we have to restart the
5131       // marking phase and start iterating over regions. However, in
5132       // order to do this we have to make sure that all tasks stop
5133       // what they are doing and re-initialise in a safe manner. We
5134       // will achieve this with the use of two barrier sync points.
5135 
5136       if (_cm->verbose_low()) {
5137         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
5138       }
5139 
5140       _cm->enter_first_sync_barrier(_task_id);
5141       // When we exit this sync barrier we know that all tasks have
5142       // stopped doing marking work. So, it's now safe to
5143       // re-initialise our data structures. At the end of this method,
5144       // task 0 will clear the global data structures.
5145 
5146       statsOnly( ++_aborted_overflow );
5147 
5148       // We clear the local state of this task...
5149       clear_region_fields();
5150 
5151       // ...and enter the second barrier.
5152       _cm->enter_second_sync_barrier(_task_id);
5153       // At this point everything has bee re-initialised and we're
5154       // ready to restart.
5155     }
5156 
5157     if (_cm->verbose_low()) {
5158       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
5159                              "elapsed = %1.2lfms <<<<<<<<<<",
5160                              _task_id, _time_target_ms, elapsed_time_ms);
5161       if (_cm->has_aborted()) {
5162         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
5163                                _task_id);
5164       }
5165     }
5166   } else {
5167     if (_cm->verbose_low()) {
5168       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
5169                              "elapsed = %1.2lfms <<<<<<<<<<",
5170                              _task_id, _time_target_ms, elapsed_time_ms);
5171     }
5172   }
5173 
5174   _claimed = false;
5175 }
5176 
5177 CMTask::CMTask(int task_id,
5178                ConcurrentMark* cm,
5179                size_t* marked_bytes,
5180                BitMap* card_bm,
5181                CMTaskQueue* task_queue,
5182                CMTaskQueueSet* task_queues)
5183   : _g1h(G1CollectedHeap::heap()),
5184     _task_id(task_id), _cm(cm),
5185     _claimed(false),
5186     _nextMarkBitMap(NULL), _hash_seed(17),
5187     _task_queue(task_queue),
5188     _task_queues(task_queues),
5189     _cm_oop_closure(NULL),
5190     _aborted_region(MemRegion()),
5191     _marked_bytes_array(marked_bytes),
5192     _card_bm(card_bm) {
5193   guarantee(task_queue != NULL, "invariant");
5194   guarantee(task_queues != NULL, "invariant");
5195 
5196   statsOnly( _clock_due_to_scanning = 0;
5197              _clock_due_to_marking  = 0 );
5198 
5199   _marking_step_diffs_ms.add(0.5);
5200 }
5201 
5202 // These are formatting macros that are used below to ensure
5203 // consistent formatting. The *_H_* versions are used to format the
5204 // header for a particular value and they should be kept consistent
5205 // with the corresponding macro. Also note that most of the macros add
5206 // the necessary white space (as a prefix) which makes them a bit
5207 // easier to compose.
5208 
5209 // All the output lines are prefixed with this string to be able to
5210 // identify them easily in a large log file.
5211 #define G1PPRL_LINE_PREFIX            "###"
5212 
5213 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
5214 #ifdef _LP64
5215 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
5216 #else // _LP64
5217 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
5218 #endif // _LP64
5219 
5220 // For per-region info
5221 #define G1PPRL_TYPE_FORMAT            "   %-4s"
5222 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
5223 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
5224 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
5225 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
5226 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
5227 
5228 // For summary info
5229 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
5230 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
5231 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
5232 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
5233 
5234 G1PrintRegionLivenessInfoClosure::
5235 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
5236   : _out(out),
5237     _total_used_bytes(0), _total_capacity_bytes(0),
5238     _total_prev_live_bytes(0), _total_next_live_bytes(0),
5239     _hum_used_bytes(0), _hum_capacity_bytes(0),
5240     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
5241   G1CollectedHeap* g1h = G1CollectedHeap::heap();
5242   MemRegion g1_committed = g1h->g1_committed();
5243   MemRegion g1_reserved = g1h->g1_reserved();
5244   double now = os::elapsedTime();
5245 
5246   // Print the header of the output.
5247   _out->cr();
5248   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
5249   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
5250                  G1PPRL_SUM_ADDR_FORMAT("committed")
5251                  G1PPRL_SUM_ADDR_FORMAT("reserved")
5252                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
5253                  g1_committed.start(), g1_committed.end(),
5254                  g1_reserved.start(), g1_reserved.end(),
5255                  HeapRegion::GrainBytes);
5256   _out->print_cr(G1PPRL_LINE_PREFIX);
5257   _out->print_cr(G1PPRL_LINE_PREFIX
5258                  G1PPRL_TYPE_H_FORMAT
5259                  G1PPRL_ADDR_BASE_H_FORMAT
5260                  G1PPRL_BYTE_H_FORMAT
5261                  G1PPRL_BYTE_H_FORMAT
5262                  G1PPRL_BYTE_H_FORMAT
5263                  G1PPRL_DOUBLE_H_FORMAT,
5264                  "type", "address-range",
5265                  "used", "prev-live", "next-live", "gc-eff");
5266   _out->print_cr(G1PPRL_LINE_PREFIX
5267                  G1PPRL_TYPE_H_FORMAT
5268                  G1PPRL_ADDR_BASE_H_FORMAT
5269                  G1PPRL_BYTE_H_FORMAT
5270                  G1PPRL_BYTE_H_FORMAT
5271                  G1PPRL_BYTE_H_FORMAT
5272                  G1PPRL_DOUBLE_H_FORMAT,
5273                  "", "",
5274                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
5275 }
5276 
5277 // It takes as a parameter a reference to one of the _hum_* fields, it
5278 // deduces the corresponding value for a region in a humongous region
5279 // series (either the region size, or what's left if the _hum_* field
5280 // is < the region size), and updates the _hum_* field accordingly.
5281 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
5282   size_t bytes = 0;
5283   // The > 0 check is to deal with the prev and next live bytes which
5284   // could be 0.
5285   if (*hum_bytes > 0) {
5286     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
5287     *hum_bytes -= bytes;
5288   }
5289   return bytes;
5290 }
5291 
5292 // It deduces the values for a region in a humongous region series
5293 // from the _hum_* fields and updates those accordingly. It assumes
5294 // that that _hum_* fields have already been set up from the "starts
5295 // humongous" region and we visit the regions in address order.
5296 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
5297                                                      size_t* capacity_bytes,
5298                                                      size_t* prev_live_bytes,
5299                                                      size_t* next_live_bytes) {
5300   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
5301   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
5302   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
5303   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
5304   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
5305 }
5306 
5307 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
5308   const char* type = "";
5309   HeapWord* bottom       = r->bottom();
5310   HeapWord* end          = r->end();
5311   size_t capacity_bytes  = r->capacity();
5312   size_t used_bytes      = r->used();
5313   size_t prev_live_bytes = r->live_bytes();
5314   size_t next_live_bytes = r->next_live_bytes();
5315   double gc_eff          = r->gc_efficiency();
5316   if (r->used() == 0) {
5317     type = "FREE";
5318   } else if (r->is_survivor()) {
5319     type = "SURV";
5320   } else if (r->is_young()) {
5321     type = "EDEN";
5322   } else if (r->startsHumongous()) {
5323     type = "HUMS";
5324 
5325     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
5326            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
5327            "they should have been zeroed after the last time we used them");
5328     // Set up the _hum_* fields.
5329     _hum_capacity_bytes  = capacity_bytes;
5330     _hum_used_bytes      = used_bytes;
5331     _hum_prev_live_bytes = prev_live_bytes;
5332     _hum_next_live_bytes = next_live_bytes;
5333     get_hum_bytes(&used_bytes, &capacity_bytes,
5334                   &prev_live_bytes, &next_live_bytes);
5335     end = bottom + HeapRegion::GrainWords;
5336   } else if (r->continuesHumongous()) {
5337     type = "HUMC";
5338     get_hum_bytes(&used_bytes, &capacity_bytes,
5339                   &prev_live_bytes, &next_live_bytes);
5340     assert(end == bottom + HeapRegion::GrainWords, "invariant");
5341   } else {
5342     type = "OLD";
5343   }
5344 
5345   _total_used_bytes      += used_bytes;
5346   _total_capacity_bytes  += capacity_bytes;
5347   _total_prev_live_bytes += prev_live_bytes;
5348   _total_next_live_bytes += next_live_bytes;
5349 
5350   // Print a line for this particular region.
5351   _out->print_cr(G1PPRL_LINE_PREFIX
5352                  G1PPRL_TYPE_FORMAT
5353                  G1PPRL_ADDR_BASE_FORMAT
5354                  G1PPRL_BYTE_FORMAT
5355                  G1PPRL_BYTE_FORMAT
5356                  G1PPRL_BYTE_FORMAT
5357                  G1PPRL_DOUBLE_FORMAT,
5358                  type, bottom, end,
5359                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
5360 
5361   return false;
5362 }
5363 
5364 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
5365   // Print the footer of the output.
5366   _out->print_cr(G1PPRL_LINE_PREFIX);
5367   _out->print_cr(G1PPRL_LINE_PREFIX
5368                  " SUMMARY"
5369                  G1PPRL_SUM_MB_FORMAT("capacity")
5370                  G1PPRL_SUM_MB_PERC_FORMAT("used")
5371                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
5372                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
5373                  bytes_to_mb(_total_capacity_bytes),
5374                  bytes_to_mb(_total_used_bytes),
5375                  perc(_total_used_bytes, _total_capacity_bytes),
5376                  bytes_to_mb(_total_prev_live_bytes),
5377                  perc(_total_prev_live_bytes, _total_capacity_bytes),
5378                  bytes_to_mb(_total_next_live_bytes),
5379                  perc(_total_next_live_bytes, _total_capacity_bytes));
5380   _out->cr();
5381 }