1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, (size_t)1);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 466                                int max_regions) :
 467   _markBitMap1(rs, MinObjAlignment - 1),
 468   _markBitMap2(rs, MinObjAlignment - 1),
 469 
 470   _parallel_marking_threads(0),
 471   _max_parallel_marking_threads(0),
 472   _sleep_factor(0.0),
 473   _marking_task_overhead(1.0),
 474   _cleanup_sleep_factor(0.0),
 475   _cleanup_task_overhead(1.0),
 476   _cleanup_list("Cleanup List"),
 477   _region_bm(max_regions, false /* in_resource_area*/),
 478   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 479            CardTableModRefBS::card_shift,
 480            false /* in_resource_area*/),
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483   _at_least_one_mark_complete(false),
 484 
 485   _markStack(this),
 486   _regionStack(),
 487   // _finger set in set_non_marking_state
 488 
 489   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 490   // _active_tasks set in set_non_marking_state
 491   // _tasks set inside the constructor
 492   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 493   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 494 
 495   _has_overflown(false),
 496   _concurrent(false),
 497   _has_aborted(false),
 498   _restart_for_overflow(false),
 499   _concurrent_marking_in_progress(false),
 500   _should_gray_objects(false),
 501 
 502   // _verbose_level set below
 503 
 504   _init_times(),
 505   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 506   _cleanup_times(),
 507   _total_counting_time(0.0),
 508   _total_rs_scrub_time(0.0),
 509 
 510   _parallel_workers(NULL) {
 511   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 512   if (verbose_level < no_verbose) {
 513     verbose_level = no_verbose;
 514   }
 515   if (verbose_level > high_verbose) {
 516     verbose_level = high_verbose;
 517   }
 518   _verbose_level = verbose_level;
 519 
 520   if (verbose_low()) {
 521     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 522                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 523   }
 524 
 525   _markStack.allocate(MarkStackSize);
 526   _regionStack.allocate(G1MarkRegionStackSize);
 527 
 528   // Create & start a ConcurrentMark thread.
 529   _cmThread = new ConcurrentMarkThread(this);
 530   assert(cmThread() != NULL, "CM Thread should have been created");
 531   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 532 
 533   _g1h = G1CollectedHeap::heap();
 534   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 535   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 536   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 537 
 538   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 539   satb_qs.set_buffer_size(G1SATBBufferSize);
 540 
 541   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 542   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 543 
 544   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 545   _active_tasks = _max_task_num;
 546   for (int i = 0; i < (int) _max_task_num; ++i) {
 547     CMTaskQueue* task_queue = new CMTaskQueue();
 548     task_queue->initialize();
 549     _task_queues->register_queue(i, task_queue);
 550 
 551     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 552     _accum_task_vtime[i] = 0.0;
 553   }
 554 
 555   if (ConcGCThreads > ParallelGCThreads) {
 556     vm_exit_during_initialization("Can't have more ConcGCThreads "
 557                                   "than ParallelGCThreads.");
 558   }
 559   if (ParallelGCThreads == 0) {
 560     // if we are not running with any parallel GC threads we will not
 561     // spawn any marking threads either
 562     _parallel_marking_threads =       0;
 563     _max_parallel_marking_threads =   0;
 564     _sleep_factor             =     0.0;
 565     _marking_task_overhead    =     1.0;
 566   } else {
 567     if (ConcGCThreads > 0) {
 568       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 569       // if both are set
 570 
 571       _parallel_marking_threads = ConcGCThreads;
 572       _max_parallel_marking_threads = _parallel_marking_threads;
 573       _sleep_factor             = 0.0;
 574       _marking_task_overhead    = 1.0;
 575     } else if (G1MarkingOverheadPercent > 0) {
 576       // we will calculate the number of parallel marking threads
 577       // based on a target overhead with respect to the soft real-time
 578       // goal
 579 
 580       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 581       double overall_cm_overhead =
 582         (double) MaxGCPauseMillis * marking_overhead /
 583         (double) GCPauseIntervalMillis;
 584       double cpu_ratio = 1.0 / (double) os::processor_count();
 585       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 586       double marking_task_overhead =
 587         overall_cm_overhead / marking_thread_num *
 588                                                 (double) os::processor_count();
 589       double sleep_factor =
 590                          (1.0 - marking_task_overhead) / marking_task_overhead;
 591 
 592       _parallel_marking_threads = (size_t) marking_thread_num;
 593       _max_parallel_marking_threads = _parallel_marking_threads;
 594       _sleep_factor             = sleep_factor;
 595       _marking_task_overhead    = marking_task_overhead;
 596     } else {
 597       _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
 598       _max_parallel_marking_threads = _parallel_marking_threads;
 599       _sleep_factor             = 0.0;
 600       _marking_task_overhead    = 1.0;
 601     }
 602 
 603     if (parallel_marking_threads() > 1) {
 604       _cleanup_task_overhead = 1.0;
 605     } else {
 606       _cleanup_task_overhead = marking_task_overhead();
 607     }
 608     _cleanup_sleep_factor =
 609                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 610 
 611 #if 0
 612     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 613     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 614     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 615     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 616     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 617 #endif
 618 
 619     guarantee(parallel_marking_threads() > 0, "peace of mind");
 620     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 621          (int) _max_parallel_marking_threads, false, true);
 622     if (_parallel_workers == NULL) {
 623       vm_exit_during_initialization("Failed necessary allocation.");
 624     } else {
 625       _parallel_workers->initialize_workers();
 626     }
 627   }
 628 
 629   // so that the call below can read a sensible value
 630   _heap_start = (HeapWord*) rs.base();
 631   set_non_marking_state();
 632 }
 633 
 634 void ConcurrentMark::update_g1_committed(bool force) {
 635   // If concurrent marking is not in progress, then we do not need to
 636   // update _heap_end. This has a subtle and important
 637   // side-effect. Imagine that two evacuation pauses happen between
 638   // marking completion and remark. The first one can grow the
 639   // heap (hence now the finger is below the heap end). Then, the
 640   // second one could unnecessarily push regions on the region
 641   // stack. This causes the invariant that the region stack is empty
 642   // at the beginning of remark to be false. By ensuring that we do
 643   // not observe heap expansions after marking is complete, then we do
 644   // not have this problem.
 645   if (!concurrent_marking_in_progress() && !force) return;
 646 
 647   MemRegion committed = _g1h->g1_committed();
 648   assert(committed.start() == _heap_start, "start shouldn't change");
 649   HeapWord* new_end = committed.end();
 650   if (new_end > _heap_end) {
 651     // The heap has been expanded.
 652 
 653     _heap_end = new_end;
 654   }
 655   // Notice that the heap can also shrink. However, this only happens
 656   // during a Full GC (at least currently) and the entire marking
 657   // phase will bail out and the task will not be restarted. So, let's
 658   // do nothing.
 659 }
 660 
 661 void ConcurrentMark::reset() {
 662   // Starting values for these two. This should be called in a STW
 663   // phase. CM will be notified of any future g1_committed expansions
 664   // will be at the end of evacuation pauses, when tasks are
 665   // inactive.
 666   MemRegion committed = _g1h->g1_committed();
 667   _heap_start = committed.start();
 668   _heap_end   = committed.end();
 669 
 670   // Separated the asserts so that we know which one fires.
 671   assert(_heap_start != NULL, "heap bounds should look ok");
 672   assert(_heap_end != NULL, "heap bounds should look ok");
 673   assert(_heap_start < _heap_end, "heap bounds should look ok");
 674 
 675   // reset all the marking data structures and any necessary flags
 676   clear_marking_state();
 677 
 678   if (verbose_low()) {
 679     gclog_or_tty->print_cr("[global] resetting");
 680   }
 681 
 682   // We do reset all of them, since different phases will use
 683   // different number of active threads. So, it's easiest to have all
 684   // of them ready.
 685   for (int i = 0; i < (int) _max_task_num; ++i) {
 686     _tasks[i]->reset(_nextMarkBitMap);
 687   }
 688 
 689   // we need this to make sure that the flag is on during the evac
 690   // pause with initial mark piggy-backed
 691   set_concurrent_marking_in_progress();
 692 }
 693 
 694 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 695   assert(active_tasks <= _max_task_num, "we should not have more");
 696 
 697   _active_tasks = active_tasks;
 698   // Need to update the three data structures below according to the
 699   // number of active threads for this phase.
 700   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 701   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 702   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 703 
 704   _concurrent = concurrent;
 705   // We propagate this to all tasks, not just the active ones.
 706   for (int i = 0; i < (int) _max_task_num; ++i)
 707     _tasks[i]->set_concurrent(concurrent);
 708 
 709   if (concurrent) {
 710     set_concurrent_marking_in_progress();
 711   } else {
 712     // We currently assume that the concurrent flag has been set to
 713     // false before we start remark. At this point we should also be
 714     // in a STW phase.
 715     assert(!concurrent_marking_in_progress(), "invariant");
 716     assert(_finger == _heap_end, "only way to get here");
 717     update_g1_committed(true);
 718   }
 719 }
 720 
 721 void ConcurrentMark::set_non_marking_state() {
 722   // We set the global marking state to some default values when we're
 723   // not doing marking.
 724   clear_marking_state();
 725   _active_tasks = 0;
 726   clear_concurrent_marking_in_progress();
 727 }
 728 
 729 ConcurrentMark::~ConcurrentMark() {
 730   for (int i = 0; i < (int) _max_task_num; ++i) {
 731     delete _task_queues->queue(i);
 732     delete _tasks[i];
 733   }
 734   delete _task_queues;
 735   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 736 }
 737 
 738 // This closure is used to mark refs into the g1 generation
 739 // from external roots in the CMS bit map.
 740 // Called at the first checkpoint.
 741 //
 742 
 743 void ConcurrentMark::clearNextBitmap() {
 744   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 745   G1CollectorPolicy* g1p = g1h->g1_policy();
 746 
 747   // Make sure that the concurrent mark thread looks to still be in
 748   // the current cycle.
 749   guarantee(cmThread()->during_cycle(), "invariant");
 750 
 751   // We are finishing up the current cycle by clearing the next
 752   // marking bitmap and getting it ready for the next cycle. During
 753   // this time no other cycle can start. So, let's make sure that this
 754   // is the case.
 755   guarantee(!g1h->mark_in_progress(), "invariant");
 756 
 757   // clear the mark bitmap (no grey objects to start with).
 758   // We need to do this in chunks and offer to yield in between
 759   // each chunk.
 760   HeapWord* start  = _nextMarkBitMap->startWord();
 761   HeapWord* end    = _nextMarkBitMap->endWord();
 762   HeapWord* cur    = start;
 763   size_t chunkSize = M;
 764   while (cur < end) {
 765     HeapWord* next = cur + chunkSize;
 766     if (next > end) {
 767       next = end;
 768     }
 769     MemRegion mr(cur,next);
 770     _nextMarkBitMap->clearRange(mr);
 771     cur = next;
 772     do_yield_check();
 773 
 774     // Repeat the asserts from above. We'll do them as asserts here to
 775     // minimize their overhead on the product. However, we'll have
 776     // them as guarantees at the beginning / end of the bitmap
 777     // clearing to get some checking in the product.
 778     assert(cmThread()->during_cycle(), "invariant");
 779     assert(!g1h->mark_in_progress(), "invariant");
 780   }
 781 
 782   // Repeat the asserts from above.
 783   guarantee(cmThread()->during_cycle(), "invariant");
 784   guarantee(!g1h->mark_in_progress(), "invariant");
 785 }
 786 
 787 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 788 public:
 789   bool doHeapRegion(HeapRegion* r) {
 790     if (!r->continuesHumongous()) {
 791       r->note_start_of_marking(true);
 792     }
 793     return false;
 794   }
 795 };
 796 
 797 void ConcurrentMark::checkpointRootsInitialPre() {
 798   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 799   G1CollectorPolicy* g1p = g1h->g1_policy();
 800 
 801   _has_aborted = false;
 802 
 803 #ifndef PRODUCT
 804   if (G1PrintReachableAtInitialMark) {
 805     print_reachable("at-cycle-start",
 806                     VerifyOption_G1UsePrevMarking, true /* all */);
 807   }
 808 #endif
 809 
 810   // Initialise marking structures. This has to be done in a STW phase.
 811   reset();
 812 }
 813 
 814 
 815 void ConcurrentMark::checkpointRootsInitialPost() {
 816   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 817 
 818   // If we force an overflow during remark, the remark operation will
 819   // actually abort and we'll restart concurrent marking. If we always
 820   // force an oveflow during remark we'll never actually complete the
 821   // marking phase. So, we initilize this here, at the start of the
 822   // cycle, so that at the remaining overflow number will decrease at
 823   // every remark and we'll eventually not need to cause one.
 824   force_overflow_stw()->init();
 825 
 826   // For each region note start of marking.
 827   NoteStartOfMarkHRClosure startcl;
 828   g1h->heap_region_iterate(&startcl);
 829 
 830   // Start Concurrent Marking weak-reference discovery.
 831   ReferenceProcessor* rp = g1h->ref_processor_cm();
 832   // enable ("weak") refs discovery
 833   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 834   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 835 
 836   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 837   // This is the start of  the marking cycle, we're expected all
 838   // threads to have SATB queues with active set to false.
 839   satb_mq_set.set_active_all_threads(true, /* new active value */
 840                                      false /* expected_active */);
 841 
 842   // update_g1_committed() will be called at the end of an evac pause
 843   // when marking is on. So, it's also called at the end of the
 844   // initial-mark pause to update the heap end, if the heap expands
 845   // during it. No need to call it here.
 846 }
 847 
 848 /*
 849  * Notice that in the next two methods, we actually leave the STS
 850  * during the barrier sync and join it immediately afterwards. If we
 851  * do not do this, the following deadlock can occur: one thread could
 852  * be in the barrier sync code, waiting for the other thread to also
 853  * sync up, whereas another one could be trying to yield, while also
 854  * waiting for the other threads to sync up too.
 855  *
 856  * Note, however, that this code is also used during remark and in
 857  * this case we should not attempt to leave / enter the STS, otherwise
 858  * we'll either hit an asseert (debug / fastdebug) or deadlock
 859  * (product). So we should only leave / enter the STS if we are
 860  * operating concurrently.
 861  *
 862  * Because the thread that does the sync barrier has left the STS, it
 863  * is possible to be suspended for a Full GC or an evacuation pause
 864  * could occur. This is actually safe, since the entering the sync
 865  * barrier is one of the last things do_marking_step() does, and it
 866  * doesn't manipulate any data structures afterwards.
 867  */
 868 
 869 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 870   if (verbose_low()) {
 871     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 872   }
 873 
 874   if (concurrent()) {
 875     ConcurrentGCThread::stsLeave();
 876   }
 877   _first_overflow_barrier_sync.enter();
 878   if (concurrent()) {
 879     ConcurrentGCThread::stsJoin();
 880   }
 881   // at this point everyone should have synced up and not be doing any
 882   // more work
 883 
 884   if (verbose_low()) {
 885     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 886   }
 887 
 888   // let task 0 do this
 889   if (task_num == 0) {
 890     // task 0 is responsible for clearing the global data structures
 891     // We should be here because of an overflow. During STW we should
 892     // not clear the overflow flag since we rely on it being true when
 893     // we exit this method to abort the pause and restart concurent
 894     // marking.
 895     clear_marking_state(concurrent() /* clear_overflow */);
 896     force_overflow()->update();
 897 
 898     if (PrintGC) {
 899       gclog_or_tty->date_stamp(PrintGCDateStamps);
 900       gclog_or_tty->stamp(PrintGCTimeStamps);
 901       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 902     }
 903   }
 904 
 905   // after this, each task should reset its own data structures then
 906   // then go into the second barrier
 907 }
 908 
 909 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 910   if (verbose_low()) {
 911     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 912   }
 913 
 914   if (concurrent()) {
 915     ConcurrentGCThread::stsLeave();
 916   }
 917   _second_overflow_barrier_sync.enter();
 918   if (concurrent()) {
 919     ConcurrentGCThread::stsJoin();
 920   }
 921   // at this point everything should be re-initialised and ready to go
 922 
 923   if (verbose_low()) {
 924     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 925   }
 926 }
 927 
 928 #ifndef PRODUCT
 929 void ForceOverflowSettings::init() {
 930   _num_remaining = G1ConcMarkForceOverflow;
 931   _force = false;
 932   update();
 933 }
 934 
 935 void ForceOverflowSettings::update() {
 936   if (_num_remaining > 0) {
 937     _num_remaining -= 1;
 938     _force = true;
 939   } else {
 940     _force = false;
 941   }
 942 }
 943 
 944 bool ForceOverflowSettings::should_force() {
 945   if (_force) {
 946     _force = false;
 947     return true;
 948   } else {
 949     return false;
 950   }
 951 }
 952 #endif // !PRODUCT
 953 
 954 void ConcurrentMark::grayRoot(oop p) {
 955   HeapWord* addr = (HeapWord*) p;
 956   // We can't really check against _heap_start and _heap_end, since it
 957   // is possible during an evacuation pause with piggy-backed
 958   // initial-mark that the committed space is expanded during the
 959   // pause without CM observing this change. So the assertions below
 960   // is a bit conservative; but better than nothing.
 961   assert(_g1h->g1_committed().contains(addr),
 962          "address should be within the heap bounds");
 963 
 964   if (!_nextMarkBitMap->isMarked(addr)) {
 965     _nextMarkBitMap->parMark(addr);
 966   }
 967 }
 968 
 969 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 970   // The objects on the region have already been marked "in bulk" by
 971   // the caller. We only need to decide whether to push the region on
 972   // the region stack or not.
 973 
 974   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 975     // We're done with marking and waiting for remark. We do not need to
 976     // push anything else on the region stack.
 977     return;
 978   }
 979 
 980   HeapWord* finger = _finger;
 981 
 982   if (verbose_low()) {
 983     gclog_or_tty->print_cr("[global] attempting to push "
 984                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 985                            PTR_FORMAT, mr.start(), mr.end(), finger);
 986   }
 987 
 988   if (mr.start() < finger) {
 989     // The finger is always heap region aligned and it is not possible
 990     // for mr to span heap regions.
 991     assert(mr.end() <= finger, "invariant");
 992 
 993     // Separated the asserts so that we know which one fires.
 994     assert(mr.start() <= mr.end(),
 995            "region boundaries should fall within the committed space");
 996     assert(_heap_start <= mr.start(),
 997            "region boundaries should fall within the committed space");
 998     assert(mr.end() <= _heap_end,
 999            "region boundaries should fall within the committed space");
1000     if (verbose_low()) {
1001       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1002                              "below the finger, pushing it",
1003                              mr.start(), mr.end());
1004     }
1005 
1006     if (!region_stack_push_lock_free(mr)) {
1007       if (verbose_low()) {
1008         gclog_or_tty->print_cr("[global] region stack has overflown.");
1009       }
1010     }
1011   }
1012 }
1013 
1014 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1015   // The object is not marked by the caller. We need to at least mark
1016   // it and maybe push in on the stack.
1017 
1018   HeapWord* addr = (HeapWord*)p;
1019   if (!_nextMarkBitMap->isMarked(addr)) {
1020     // We definitely need to mark it, irrespective whether we bail out
1021     // because we're done with marking.
1022     if (_nextMarkBitMap->parMark(addr)) {
1023       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1024         // If we're done with concurrent marking and we're waiting for
1025         // remark, then we're not pushing anything on the stack.
1026         return;
1027       }
1028 
1029       // No OrderAccess:store_load() is needed. It is implicit in the
1030       // CAS done in parMark(addr) above
1031       HeapWord* finger = _finger;
1032 
1033       if (addr < finger) {
1034         if (!mark_stack_push(oop(addr))) {
1035           if (verbose_low()) {
1036             gclog_or_tty->print_cr("[global] global stack overflow "
1037                                    "during parMark");
1038           }
1039         }
1040       }
1041     }
1042   }
1043 }
1044 
1045 class CMConcurrentMarkingTask: public AbstractGangTask {
1046 private:
1047   ConcurrentMark*       _cm;
1048   ConcurrentMarkThread* _cmt;
1049 
1050 public:
1051   void work(int worker_i) {
1052     assert(Thread::current()->is_ConcurrentGC_thread(),
1053            "this should only be done by a conc GC thread");
1054     ResourceMark rm;
1055 
1056     double start_vtime = os::elapsedVTime();
1057 
1058     ConcurrentGCThread::stsJoin();
1059 
1060     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1061     CMTask* the_task = _cm->task(worker_i);
1062     the_task->record_start_time();
1063     if (!_cm->has_aborted()) {
1064       do {
1065         double start_vtime_sec = os::elapsedVTime();
1066         double start_time_sec = os::elapsedTime();
1067         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1068 
1069         the_task->do_marking_step(mark_step_duration_ms,
1070                                   true /* do_stealing    */,
1071                                   true /* do_termination */);
1072 
1073         double end_time_sec = os::elapsedTime();
1074         double end_vtime_sec = os::elapsedVTime();
1075         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1076         double elapsed_time_sec = end_time_sec - start_time_sec;
1077         _cm->clear_has_overflown();
1078 
1079         bool ret = _cm->do_yield_check(worker_i);
1080 
1081         jlong sleep_time_ms;
1082         if (!_cm->has_aborted() && the_task->has_aborted()) {
1083           sleep_time_ms =
1084             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1085           ConcurrentGCThread::stsLeave();
1086           os::sleep(Thread::current(), sleep_time_ms, false);
1087           ConcurrentGCThread::stsJoin();
1088         }
1089         double end_time2_sec = os::elapsedTime();
1090         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1091 
1092 #if 0
1093           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1094                                  "overhead %1.4lf",
1095                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1096                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1097           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1098                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1099 #endif
1100       } while (!_cm->has_aborted() && the_task->has_aborted());
1101     }
1102     the_task->record_end_time();
1103     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1104 
1105     ConcurrentGCThread::stsLeave();
1106 
1107     double end_vtime = os::elapsedVTime();
1108     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1109   }
1110 
1111   CMConcurrentMarkingTask(ConcurrentMark* cm,
1112                           ConcurrentMarkThread* cmt) :
1113       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1114 
1115   ~CMConcurrentMarkingTask() { }
1116 };
1117 
1118 // Calculates the number of active workers for a concurrent
1119 // phase.
1120 int ConcurrentMark::calc_parallel_marking_threads() {
1121 
1122   size_t n_conc_workers;
1123   if (!G1CollectedHeap::use_parallel_gc_threads()) {
1124     n_conc_workers = 1;
1125   } else {
1126     if (!UseDynamicNumberOfGCThreads ||
1127         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1128          !ForceDynamicNumberOfGCThreads)) {
1129       n_conc_workers = max_parallel_marking_threads();
1130     } else {
1131       n_conc_workers =
1132         AdaptiveSizePolicy::calc_default_active_workers(
1133                                      max_parallel_marking_threads(),
1134                                      1, /* Minimum workers */
1135                                      parallel_marking_threads(),
1136                                      Threads::number_of_non_daemon_threads());
1137       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1138       // that scaling has already gone into "_max_parallel_marking_threads".
1139     }
1140   }
1141   assert(n_conc_workers > 0, "Always need at least 1");
1142   return (int) MAX2(n_conc_workers, (size_t) 1);
1143 }
1144 
1145 void ConcurrentMark::markFromRoots() {
1146   // we might be tempted to assert that:
1147   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1148   //        "inconsistent argument?");
1149   // However that wouldn't be right, because it's possible that
1150   // a safepoint is indeed in progress as a younger generation
1151   // stop-the-world GC happens even as we mark in this generation.
1152 
1153   _restart_for_overflow = false;
1154 
1155   // Parallel task terminator is set in "set_phase()".
1156   force_overflow_conc()->init();
1157 
1158   // _g1h has _n_par_threads
1159 
1160   _parallel_marking_threads = calc_parallel_marking_threads();
1161   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1162     "Maximum number of marking threads exceeded");
1163   _parallel_workers->set_active_workers((int)_parallel_marking_threads);
1164   // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1165   // and the decisions on that MT processing is made elsewhere.
1166 
1167   assert( _parallel_workers->active_workers() > 0, "Should have been set");
1168   set_phase(_parallel_workers->active_workers(), true /* concurrent */);
1169 
1170   CMConcurrentMarkingTask markingTask(this, cmThread());
1171   if (parallel_marking_threads() > 0) {
1172     _parallel_workers->run_task(&markingTask);
1173   } else {
1174     markingTask.work(0);
1175   }
1176   print_stats();
1177 }
1178 
1179 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1180   // world is stopped at this checkpoint
1181   assert(SafepointSynchronize::is_at_safepoint(),
1182          "world should be stopped");
1183 
1184   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1185 
1186   // If a full collection has happened, we shouldn't do this.
1187   if (has_aborted()) {
1188     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1189     return;
1190   }
1191 
1192   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1193 
1194   if (VerifyDuringGC) {
1195     HandleMark hm;  // handle scope
1196     gclog_or_tty->print(" VerifyDuringGC:(before)");
1197     Universe::heap()->prepare_for_verify();
1198     Universe::verify(/* allow dirty */ true,
1199                      /* silent      */ false,
1200                      /* option      */ VerifyOption_G1UsePrevMarking);
1201   }
1202 
1203   G1CollectorPolicy* g1p = g1h->g1_policy();
1204   g1p->record_concurrent_mark_remark_start();
1205 
1206   double start = os::elapsedTime();
1207 
1208   checkpointRootsFinalWork();
1209 
1210   double mark_work_end = os::elapsedTime();
1211 
1212   weakRefsWork(clear_all_soft_refs);
1213 
1214   if (has_overflown()) {
1215     // Oops.  We overflowed.  Restart concurrent marking.
1216     _restart_for_overflow = true;
1217     // Clear the flag. We do not need it any more.
1218     clear_has_overflown();
1219     if (G1TraceMarkStackOverflow) {
1220       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1221     }
1222   } else {
1223     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1224     // We're done with marking.
1225     // This is the end of  the marking cycle, we're expected all
1226     // threads to have SATB queues with active set to true.
1227     satb_mq_set.set_active_all_threads(false, /* new active value */
1228                                        true /* expected_active */);
1229 
1230     if (VerifyDuringGC) {
1231 
1232       HandleMark hm;  // handle scope
1233       gclog_or_tty->print(" VerifyDuringGC:(after)");
1234       Universe::heap()->prepare_for_verify();
1235       Universe::verify(/* allow dirty */ true,
1236                        /* silent      */ false,
1237                        /* option      */ VerifyOption_G1UseNextMarking);
1238     }
1239     assert(!restart_for_overflow(), "sanity");
1240   }
1241 
1242   // Reset the marking state if marking completed
1243   if (!restart_for_overflow()) {
1244     set_non_marking_state();
1245   }
1246 
1247 #if VERIFY_OBJS_PROCESSED
1248   _scan_obj_cl.objs_processed = 0;
1249   ThreadLocalObjQueue::objs_enqueued = 0;
1250 #endif
1251 
1252   // Statistics
1253   double now = os::elapsedTime();
1254   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1255   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1256   _remark_times.add((now - start) * 1000.0);
1257 
1258   g1p->record_concurrent_mark_remark_end();
1259 }
1260 
1261 #define CARD_BM_TEST_MODE 0
1262 
1263 class CalcLiveObjectsClosure: public HeapRegionClosure {
1264 
1265   CMBitMapRO* _bm;
1266   ConcurrentMark* _cm;
1267   bool _changed;
1268   bool _yield;
1269   size_t _words_done;
1270   size_t _tot_live;
1271   size_t _tot_used;
1272   size_t _regions_done;
1273   double _start_vtime_sec;
1274 
1275   BitMap* _region_bm;
1276   BitMap* _card_bm;
1277   intptr_t _bottom_card_num;
1278   bool _final;
1279 
1280   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1281     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1282 #if CARD_BM_TEST_MODE
1283       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1284 #else
1285       _card_bm->par_at_put(i - _bottom_card_num, 1);
1286 #endif
1287     }
1288   }
1289 
1290 public:
1291   CalcLiveObjectsClosure(bool final,
1292                          CMBitMapRO *bm, ConcurrentMark *cm,
1293                          BitMap* region_bm, BitMap* card_bm) :
1294     _bm(bm), _cm(cm), _changed(false), _yield(true),
1295     _words_done(0), _tot_live(0), _tot_used(0),
1296     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1297     _regions_done(0), _start_vtime_sec(0.0)
1298   {
1299     _bottom_card_num =
1300       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1301                CardTableModRefBS::card_shift);
1302   }
1303 
1304   // It takes a region that's not empty (i.e., it has at least one
1305   // live object in it and sets its corresponding bit on the region
1306   // bitmap to 1. If the region is "starts humongous" it will also set
1307   // to 1 the bits on the region bitmap that correspond to its
1308   // associated "continues humongous" regions.
1309   void set_bit_for_region(HeapRegion* hr) {
1310     assert(!hr->continuesHumongous(), "should have filtered those out");
1311 
1312     size_t index = hr->hrs_index();
1313     if (!hr->startsHumongous()) {
1314       // Normal (non-humongous) case: just set the bit.
1315       _region_bm->par_at_put((BitMap::idx_t) index, true);
1316     } else {
1317       // Starts humongous case: calculate how many regions are part of
1318       // this humongous region and then set the bit range. It might
1319       // have been a bit more efficient to look at the object that
1320       // spans these humongous regions to calculate their number from
1321       // the object's size. However, it's a good idea to calculate
1322       // this based on the metadata itself, and not the region
1323       // contents, so that this code is not aware of what goes into
1324       // the humongous regions (in case this changes in the future).
1325       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1326       size_t end_index = index + 1;
1327       while (end_index < g1h->n_regions()) {
1328         HeapRegion* chr = g1h->region_at(end_index);
1329         if (!chr->continuesHumongous()) break;
1330         end_index += 1;
1331       }
1332       _region_bm->par_at_put_range((BitMap::idx_t) index,
1333                                    (BitMap::idx_t) end_index, true);
1334     }
1335   }
1336 
1337   bool doHeapRegion(HeapRegion* hr) {
1338     if (!_final && _regions_done == 0) {
1339       _start_vtime_sec = os::elapsedVTime();
1340     }
1341 
1342     if (hr->continuesHumongous()) {
1343       // We will ignore these here and process them when their
1344       // associated "starts humongous" region is processed (see
1345       // set_bit_for_heap_region()). Note that we cannot rely on their
1346       // associated "starts humongous" region to have their bit set to
1347       // 1 since, due to the region chunking in the parallel region
1348       // iteration, a "continues humongous" region might be visited
1349       // before its associated "starts humongous".
1350       return false;
1351     }
1352 
1353     HeapWord* nextTop = hr->next_top_at_mark_start();
1354     HeapWord* start   = hr->top_at_conc_mark_count();
1355     assert(hr->bottom() <= start && start <= hr->end() &&
1356            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1357            start <= nextTop,
1358            "Preconditions.");
1359     // Otherwise, record the number of word's we'll examine.
1360     size_t words_done = (nextTop - start);
1361     // Find the first marked object at or after "start".
1362     start = _bm->getNextMarkedWordAddress(start, nextTop);
1363     size_t marked_bytes = 0;
1364 
1365     // Below, the term "card num" means the result of shifting an address
1366     // by the card shift -- address 0 corresponds to card number 0.  One
1367     // must subtract the card num of the bottom of the heap to obtain a
1368     // card table index.
1369     // The first card num of the sequence of live cards currently being
1370     // constructed.  -1 ==> no sequence.
1371     intptr_t start_card_num = -1;
1372     // The last card num of the sequence of live cards currently being
1373     // constructed.  -1 ==> no sequence.
1374     intptr_t last_card_num = -1;
1375 
1376     while (start < nextTop) {
1377       if (_yield && _cm->do_yield_check()) {
1378         // We yielded.  It might be for a full collection, in which case
1379         // all bets are off; terminate the traversal.
1380         if (_cm->has_aborted()) {
1381           _changed = false;
1382           return true;
1383         } else {
1384           // Otherwise, it might be a collection pause, and the region
1385           // we're looking at might be in the collection set.  We'll
1386           // abandon this region.
1387           return false;
1388         }
1389       }
1390       oop obj = oop(start);
1391       int obj_sz = obj->size();
1392       // The card num of the start of the current object.
1393       intptr_t obj_card_num =
1394         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1395 
1396       HeapWord* obj_last = start + obj_sz - 1;
1397       intptr_t obj_last_card_num =
1398         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1399 
1400       if (obj_card_num != last_card_num) {
1401         if (start_card_num == -1) {
1402           assert(last_card_num == -1, "Both or neither.");
1403           start_card_num = obj_card_num;
1404         } else {
1405           assert(last_card_num != -1, "Both or neither.");
1406           assert(obj_card_num >= last_card_num, "Inv");
1407           if ((obj_card_num - last_card_num) > 1) {
1408             // Mark the last run, and start a new one.
1409             mark_card_num_range(start_card_num, last_card_num);
1410             start_card_num = obj_card_num;
1411           }
1412         }
1413 #if CARD_BM_TEST_MODE
1414         /*
1415         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1416                                obj_card_num - _bottom_card_num,
1417                                obj_last_card_num - _bottom_card_num);
1418         */
1419         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1420           _card_bm->par_at_put(j - _bottom_card_num, 1);
1421         }
1422 #endif
1423       }
1424       // In any case, we set the last card num.
1425       last_card_num = obj_last_card_num;
1426 
1427       marked_bytes += (size_t)obj_sz * HeapWordSize;
1428       // Find the next marked object after this one.
1429       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1430       _changed = true;
1431     }
1432     // Handle the last range, if any.
1433     if (start_card_num != -1) {
1434       mark_card_num_range(start_card_num, last_card_num);
1435     }
1436     if (_final) {
1437       // Mark the allocated-since-marking portion...
1438       HeapWord* tp = hr->top();
1439       if (nextTop < tp) {
1440         start_card_num =
1441           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1442         last_card_num =
1443           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1444         mark_card_num_range(start_card_num, last_card_num);
1445         // This definitely means the region has live objects.
1446         set_bit_for_region(hr);
1447       }
1448     }
1449 
1450     hr->add_to_marked_bytes(marked_bytes);
1451     // Update the live region bitmap.
1452     if (marked_bytes > 0) {
1453       set_bit_for_region(hr);
1454     }
1455     hr->set_top_at_conc_mark_count(nextTop);
1456     _tot_live += hr->next_live_bytes();
1457     _tot_used += hr->used();
1458     _words_done = words_done;
1459 
1460     if (!_final) {
1461       ++_regions_done;
1462       if (_regions_done % 10 == 0) {
1463         double end_vtime_sec = os::elapsedVTime();
1464         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1465         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1466           jlong sleep_time_ms =
1467             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1468           os::sleep(Thread::current(), sleep_time_ms, false);
1469           _start_vtime_sec = end_vtime_sec;
1470         }
1471       }
1472     }
1473 
1474     return false;
1475   }
1476 
1477   bool changed() { return _changed;  }
1478   void reset()   { _changed = false; _words_done = 0; }
1479   void no_yield() { _yield = false; }
1480   size_t words_done() { return _words_done; }
1481   size_t tot_live() { return _tot_live; }
1482   size_t tot_used() { return _tot_used; }
1483 };
1484 
1485 
1486 void ConcurrentMark::calcDesiredRegions() {
1487   _region_bm.clear();
1488   _card_bm.clear();
1489   CalcLiveObjectsClosure calccl(false /*final*/,
1490                                 nextMarkBitMap(), this,
1491                                 &_region_bm, &_card_bm);
1492   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1493   g1h->heap_region_iterate(&calccl);
1494 
1495   do {
1496     calccl.reset();
1497     g1h->heap_region_iterate(&calccl);
1498   } while (calccl.changed());
1499 }
1500 
1501 class G1ParFinalCountTask: public AbstractGangTask {
1502 protected:
1503   G1CollectedHeap* _g1h;
1504   CMBitMap* _bm;
1505   size_t _n_workers;
1506   size_t *_live_bytes;
1507   size_t *_used_bytes;
1508   BitMap* _region_bm;
1509   BitMap* _card_bm;
1510 public:
1511   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1512                       BitMap* region_bm, BitMap* card_bm)
1513     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1514     _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1515     _n_workers(0)
1516   {
1517     // Use the value already set as the number of active threads
1518     // in the call to run_task().  Needed for the allocation of
1519     // _live_bytes and _used_bytes.
1520     if (G1CollectedHeap::use_parallel_gc_threads()) {
1521       assert( _g1h->workers()->active_workers() > 0,
1522         "Should have been previously set");
1523       _n_workers = _g1h->workers()->active_workers();
1524     } else {
1525       _n_workers = 1;
1526     }
1527 
1528     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1529     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1530   }
1531 
1532   ~G1ParFinalCountTask() {
1533     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1534     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1535   }
1536 
1537   void work(int i) {
1538     CalcLiveObjectsClosure calccl(true /*final*/,
1539                                   _bm, _g1h->concurrent_mark(),
1540                                   _region_bm, _card_bm);
1541     calccl.no_yield();
1542     if (G1CollectedHeap::use_parallel_gc_threads()) {
1543       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1544                                             (int) _n_workers,
1545                                             HeapRegion::FinalCountClaimValue);
1546     } else {
1547       _g1h->heap_region_iterate(&calccl);
1548     }
1549     assert(calccl.complete(), "Shouldn't have yielded!");
1550 
1551     assert((size_t) i < _n_workers, "invariant");
1552     _live_bytes[i] = calccl.tot_live();
1553     _used_bytes[i] = calccl.tot_used();
1554   }
1555   size_t live_bytes()  {
1556     size_t live_bytes = 0;
1557     for (size_t i = 0; i < _n_workers; ++i)
1558       live_bytes += _live_bytes[i];
1559     return live_bytes;
1560   }
1561   size_t used_bytes()  {
1562     size_t used_bytes = 0;
1563     for (size_t i = 0; i < _n_workers; ++i)
1564       used_bytes += _used_bytes[i];
1565     return used_bytes;
1566   }
1567 };
1568 
1569 class G1ParNoteEndTask;
1570 
1571 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1572   G1CollectedHeap* _g1;
1573   int _worker_num;
1574   size_t _max_live_bytes;
1575   size_t _regions_claimed;
1576   size_t _freed_bytes;
1577   FreeRegionList* _local_cleanup_list;
1578   OldRegionSet* _old_proxy_set;
1579   HumongousRegionSet* _humongous_proxy_set;
1580   HRRSCleanupTask* _hrrs_cleanup_task;
1581   double _claimed_region_time;
1582   double _max_region_time;
1583 
1584 public:
1585   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1586                              int worker_num,
1587                              FreeRegionList* local_cleanup_list,
1588                              OldRegionSet* old_proxy_set,
1589                              HumongousRegionSet* humongous_proxy_set,
1590                              HRRSCleanupTask* hrrs_cleanup_task) :
1591     _g1(g1), _worker_num(worker_num),
1592     _max_live_bytes(0), _regions_claimed(0),
1593     _freed_bytes(0),
1594     _claimed_region_time(0.0), _max_region_time(0.0),
1595     _local_cleanup_list(local_cleanup_list),
1596     _old_proxy_set(old_proxy_set),
1597     _humongous_proxy_set(humongous_proxy_set),
1598     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1599 
1600   size_t freed_bytes() { return _freed_bytes; }
1601 
1602   bool doHeapRegion(HeapRegion *hr) {
1603     // We use a claim value of zero here because all regions
1604     // were claimed with value 1 in the FinalCount task.
1605     hr->reset_gc_time_stamp();
1606     if (!hr->continuesHumongous()) {
1607       double start = os::elapsedTime();
1608       _regions_claimed++;
1609       hr->note_end_of_marking();
1610       _max_live_bytes += hr->max_live_bytes();
1611       _g1->free_region_if_empty(hr,
1612                                 &_freed_bytes,
1613                                 _local_cleanup_list,
1614                                 _old_proxy_set,
1615                                 _humongous_proxy_set,
1616                                 _hrrs_cleanup_task,
1617                                 true /* par */);
1618       double region_time = (os::elapsedTime() - start);
1619       _claimed_region_time += region_time;
1620       if (region_time > _max_region_time) {
1621         _max_region_time = region_time;
1622       }
1623     }
1624     return false;
1625   }
1626 
1627   size_t max_live_bytes() { return _max_live_bytes; }
1628   size_t regions_claimed() { return _regions_claimed; }
1629   double claimed_region_time_sec() { return _claimed_region_time; }
1630   double max_region_time_sec() { return _max_region_time; }
1631 };
1632 
1633 class G1ParNoteEndTask: public AbstractGangTask {
1634   friend class G1NoteEndOfConcMarkClosure;
1635 
1636 protected:
1637   G1CollectedHeap* _g1h;
1638   size_t _max_live_bytes;
1639   size_t _freed_bytes;
1640   FreeRegionList* _cleanup_list;
1641 
1642 public:
1643   G1ParNoteEndTask(G1CollectedHeap* g1h,
1644                    FreeRegionList* cleanup_list) :
1645     AbstractGangTask("G1 note end"), _g1h(g1h),
1646     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1647 
1648   void work(int i) {
1649     double start = os::elapsedTime();
1650     FreeRegionList local_cleanup_list("Local Cleanup List");
1651     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1652     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1653     HRRSCleanupTask hrrs_cleanup_task;
1654     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1655                                            &old_proxy_set,
1656                                            &humongous_proxy_set,
1657                                            &hrrs_cleanup_task);
1658     if (G1CollectedHeap::use_parallel_gc_threads()) {
1659       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1660                                             _g1h->workers()->active_workers(),
1661                                             HeapRegion::NoteEndClaimValue);
1662     } else {
1663       _g1h->heap_region_iterate(&g1_note_end);
1664     }
1665     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1666 
1667     // Now update the lists
1668     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1669                                             NULL /* free_list */,
1670                                             &old_proxy_set,
1671                                             &humongous_proxy_set,
1672                                             true /* par */);
1673     {
1674       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1675       _max_live_bytes += g1_note_end.max_live_bytes();
1676       _freed_bytes += g1_note_end.freed_bytes();
1677 
1678       // If we iterate over the global cleanup list at the end of
1679       // cleanup to do this printing we will not guarantee to only
1680       // generate output for the newly-reclaimed regions (the list
1681       // might not be empty at the beginning of cleanup; we might
1682       // still be working on its previous contents). So we do the
1683       // printing here, before we append the new regions to the global
1684       // cleanup list.
1685 
1686       G1HRPrinter* hr_printer = _g1h->hr_printer();
1687       if (hr_printer->is_active()) {
1688         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1689         while (iter.more_available()) {
1690           HeapRegion* hr = iter.get_next();
1691           hr_printer->cleanup(hr);
1692         }
1693       }
1694 
1695       _cleanup_list->add_as_tail(&local_cleanup_list);
1696       assert(local_cleanup_list.is_empty(), "post-condition");
1697 
1698       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1699     }
1700     double end = os::elapsedTime();
1701     if (G1PrintParCleanupStats) {
1702       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1703                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1704                           i, start, end, (end-start)*1000.0,
1705                           g1_note_end.regions_claimed(),
1706                           g1_note_end.claimed_region_time_sec()*1000.0,
1707                           g1_note_end.max_region_time_sec()*1000.0);
1708     }
1709   }
1710   size_t max_live_bytes() { return _max_live_bytes; }
1711   size_t freed_bytes() { return _freed_bytes; }
1712 };
1713 
1714 class G1ParScrubRemSetTask: public AbstractGangTask {
1715 protected:
1716   G1RemSet* _g1rs;
1717   BitMap* _region_bm;
1718   BitMap* _card_bm;
1719 public:
1720   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1721                        BitMap* region_bm, BitMap* card_bm) :
1722     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1723     _region_bm(region_bm), _card_bm(card_bm)
1724   {}
1725 
1726   void work(int i) {
1727     if (G1CollectedHeap::use_parallel_gc_threads()) {
1728       _g1rs->scrub_par(_region_bm, _card_bm, i,
1729                        HeapRegion::ScrubRemSetClaimValue);
1730     } else {
1731       _g1rs->scrub(_region_bm, _card_bm);
1732     }
1733   }
1734 
1735 };
1736 
1737 void ConcurrentMark::cleanup() {
1738   // world is stopped at this checkpoint
1739   assert(SafepointSynchronize::is_at_safepoint(),
1740          "world should be stopped");
1741   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1742 
1743   // If a full collection has happened, we shouldn't do this.
1744   if (has_aborted()) {
1745     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1746     return;
1747   }
1748 
1749   HRSPhaseSetter x(HRSPhaseCleanup);
1750   g1h->verify_region_sets_optional();
1751 
1752   if (VerifyDuringGC) {
1753     HandleMark hm;  // handle scope
1754     gclog_or_tty->print(" VerifyDuringGC:(before)");
1755     Universe::heap()->prepare_for_verify();
1756     Universe::verify(/* allow dirty */ true,
1757                      /* silent      */ false,
1758                      /* option      */ VerifyOption_G1UsePrevMarking);
1759   }
1760 
1761   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1762   g1p->record_concurrent_mark_cleanup_start();
1763 
1764   double start = os::elapsedTime();
1765 
1766   HeapRegionRemSet::reset_for_cleanup_tasks();
1767 
1768   g1h->set_par_threads();
1769   size_t n_workers = g1h->n_par_threads();
1770 
1771   // Do counting once more with the world stopped for good measure.
1772   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1773                                         &_region_bm, &_card_bm);
1774   if (G1CollectedHeap::use_parallel_gc_threads()) {
1775     assert(g1h->check_heap_region_claim_values(
1776                                                HeapRegion::InitialClaimValue),
1777            "sanity check");
1778 
1779     assert(g1h->n_par_threads() == (int) n_workers,
1780       "Should not have been reset");
1781     g1h->workers()->run_task(&g1_par_count_task);
1782     // Done with the parallel phase so reset to 0.
1783     g1h->set_par_threads(0);
1784 
1785     assert(g1h->check_heap_region_claim_values(
1786                                              HeapRegion::FinalCountClaimValue),
1787            "sanity check");
1788   } else {
1789     g1_par_count_task.work(0);
1790   }
1791 
1792   size_t known_garbage_bytes =
1793     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1794   g1p->set_known_garbage_bytes(known_garbage_bytes);
1795 
1796   size_t start_used_bytes = g1h->used();
1797   _at_least_one_mark_complete = true;
1798   g1h->set_marking_complete();
1799 
1800   ergo_verbose4(ErgoConcCycles,
1801            "finish cleanup",
1802            ergo_format_byte("occupancy")
1803            ergo_format_byte("capacity")
1804            ergo_format_byte_perc("known garbage"),
1805            start_used_bytes, g1h->capacity(),
1806            known_garbage_bytes,
1807            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1808 
1809   double count_end = os::elapsedTime();
1810   double this_final_counting_time = (count_end - start);
1811   if (G1PrintParCleanupStats) {
1812     gclog_or_tty->print_cr("Cleanup:");
1813     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1814                            this_final_counting_time*1000.0);
1815   }
1816   _total_counting_time += this_final_counting_time;
1817 
1818   if (G1PrintRegionLivenessInfo) {
1819     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1820     _g1h->heap_region_iterate(&cl);
1821   }
1822 
1823   // Install newly created mark bitMap as "prev".
1824   swapMarkBitMaps();
1825 
1826   g1h->reset_gc_time_stamp();
1827 
1828   // Note end of marking in all heap regions.
1829   double note_end_start = os::elapsedTime();
1830   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1831   if (G1CollectedHeap::use_parallel_gc_threads()) {
1832     g1h->set_par_threads((int)n_workers);
1833     g1h->workers()->run_task(&g1_par_note_end_task);
1834     g1h->set_par_threads(0);
1835 
1836     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1837            "sanity check");
1838   } else {
1839     g1_par_note_end_task.work(0);
1840   }
1841 
1842   if (!cleanup_list_is_empty()) {
1843     // The cleanup list is not empty, so we'll have to process it
1844     // concurrently. Notify anyone else that might be wanting free
1845     // regions that there will be more free regions coming soon.
1846     g1h->set_free_regions_coming();
1847   }
1848   double note_end_end = os::elapsedTime();
1849   if (G1PrintParCleanupStats) {
1850     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1851                            (note_end_end - note_end_start)*1000.0);
1852   }
1853 
1854 
1855   // call below, since it affects the metric by which we sort the heap
1856   // regions.
1857   if (G1ScrubRemSets) {
1858     double rs_scrub_start = os::elapsedTime();
1859     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1860     if (G1CollectedHeap::use_parallel_gc_threads()) {
1861       g1h->set_par_threads((int)n_workers);
1862       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1863       g1h->set_par_threads(0);
1864 
1865       assert(g1h->check_heap_region_claim_values(
1866                                             HeapRegion::ScrubRemSetClaimValue),
1867              "sanity check");
1868     } else {
1869       g1_par_scrub_rs_task.work(0);
1870     }
1871 
1872     double rs_scrub_end = os::elapsedTime();
1873     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1874     _total_rs_scrub_time += this_rs_scrub_time;
1875   }
1876 
1877   // this will also free any regions totally full of garbage objects,
1878   // and sort the regions.
1879   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1880 
1881   // Statistics.
1882   double end = os::elapsedTime();
1883   _cleanup_times.add((end - start) * 1000.0);
1884 
1885   // G1CollectedHeap::heap()->print();
1886   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1887   // G1CollectedHeap::heap()->get_gc_time_stamp());
1888 
1889   if (PrintGC || PrintGCDetails) {
1890     g1h->print_size_transition(gclog_or_tty,
1891                                start_used_bytes,
1892                                g1h->used(),
1893                                g1h->capacity());
1894   }
1895 
1896   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1897   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1898 
1899   // Clean up will have freed any regions completely full of garbage.
1900   // Update the soft reference policy with the new heap occupancy.
1901   Universe::update_heap_info_at_gc();
1902 
1903   // We need to make this be a "collection" so any collection pause that
1904   // races with it goes around and waits for completeCleanup to finish.
1905   g1h->increment_total_collections();
1906 
1907   if (VerifyDuringGC) {
1908     HandleMark hm;  // handle scope
1909     gclog_or_tty->print(" VerifyDuringGC:(after)");
1910     Universe::heap()->prepare_for_verify();
1911     Universe::verify(/* allow dirty */ true,
1912                      /* silent      */ false,
1913                      /* option      */ VerifyOption_G1UsePrevMarking);
1914   }
1915 
1916   g1h->verify_region_sets_optional();
1917 }
1918 
1919 void ConcurrentMark::completeCleanup() {
1920   if (has_aborted()) return;
1921 
1922   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1923 
1924   _cleanup_list.verify_optional();
1925   FreeRegionList tmp_free_list("Tmp Free List");
1926 
1927   if (G1ConcRegionFreeingVerbose) {
1928     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1929                            "cleanup list has "SIZE_FORMAT" entries",
1930                            _cleanup_list.length());
1931   }
1932 
1933   // Noone else should be accessing the _cleanup_list at this point,
1934   // so it's not necessary to take any locks
1935   while (!_cleanup_list.is_empty()) {
1936     HeapRegion* hr = _cleanup_list.remove_head();
1937     assert(hr != NULL, "the list was not empty");
1938     hr->par_clear();
1939     tmp_free_list.add_as_tail(hr);
1940 
1941     // Instead of adding one region at a time to the secondary_free_list,
1942     // we accumulate them in the local list and move them a few at a
1943     // time. This also cuts down on the number of notify_all() calls
1944     // we do during this process. We'll also append the local list when
1945     // _cleanup_list is empty (which means we just removed the last
1946     // region from the _cleanup_list).
1947     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1948         _cleanup_list.is_empty()) {
1949       if (G1ConcRegionFreeingVerbose) {
1950         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1951                                "appending "SIZE_FORMAT" entries to the "
1952                                "secondary_free_list, clean list still has "
1953                                SIZE_FORMAT" entries",
1954                                tmp_free_list.length(),
1955                                _cleanup_list.length());
1956       }
1957 
1958       {
1959         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1960         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1961         SecondaryFreeList_lock->notify_all();
1962       }
1963 
1964       if (G1StressConcRegionFreeing) {
1965         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1966           os::sleep(Thread::current(), (jlong) 1, false);
1967         }
1968       }
1969     }
1970   }
1971   assert(tmp_free_list.is_empty(), "post-condition");
1972 }
1973 
1974 // Support closures for reference procssing in G1
1975 
1976 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1977   HeapWord* addr = (HeapWord*)obj;
1978   return addr != NULL &&
1979          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1980 }
1981 
1982 class G1CMKeepAliveClosure: public OopClosure {
1983   G1CollectedHeap* _g1;
1984   ConcurrentMark*  _cm;
1985   CMBitMap*        _bitMap;
1986  public:
1987   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1988                        CMBitMap* bitMap) :
1989     _g1(g1), _cm(cm),
1990     _bitMap(bitMap) {}
1991 
1992   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1993   virtual void do_oop(      oop* p) { do_oop_work(p); }
1994 
1995   template <class T> void do_oop_work(T* p) {
1996     oop obj = oopDesc::load_decode_heap_oop(p);
1997     HeapWord* addr = (HeapWord*)obj;
1998 
1999     if (_cm->verbose_high()) {
2000       gclog_or_tty->print_cr("\t[0] we're looking at location "
2001                              "*"PTR_FORMAT" = "PTR_FORMAT,
2002                              p, (void*) obj);
2003     }
2004 
2005     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2006       _bitMap->mark(addr);
2007       _cm->mark_stack_push(obj);
2008     }
2009   }
2010 };
2011 
2012 class G1CMDrainMarkingStackClosure: public VoidClosure {
2013   CMMarkStack*                  _markStack;
2014   CMBitMap*                     _bitMap;
2015   G1CMKeepAliveClosure*         _oopClosure;
2016  public:
2017   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2018                                G1CMKeepAliveClosure* oopClosure) :
2019     _bitMap(bitMap),
2020     _markStack(markStack),
2021     _oopClosure(oopClosure)
2022   {}
2023 
2024   void do_void() {
2025     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2026   }
2027 };
2028 
2029 // 'Keep Alive' closure used by parallel reference processing.
2030 // An instance of this closure is used in the parallel reference processing
2031 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2032 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2033 // placed on to discovered ref lists once so we can mark and push with no
2034 // need to check whether the object has already been marked. Using the
2035 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2036 // operating on the global mark stack. This means that an individual
2037 // worker would be doing lock-free pushes while it processes its own
2038 // discovered ref list followed by drain call. If the discovered ref lists
2039 // are unbalanced then this could cause interference with the other
2040 // workers. Using a CMTask (and its embedded local data structures)
2041 // avoids that potential interference.
2042 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2043   ConcurrentMark*  _cm;
2044   CMTask*          _task;
2045   int              _ref_counter_limit;
2046   int              _ref_counter;
2047  public:
2048   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2049     _cm(cm), _task(task),
2050     _ref_counter_limit(G1RefProcDrainInterval) {
2051     assert(_ref_counter_limit > 0, "sanity");
2052     _ref_counter = _ref_counter_limit;
2053   }
2054 
2055   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2056   virtual void do_oop(      oop* p) { do_oop_work(p); }
2057 
2058   template <class T> void do_oop_work(T* p) {
2059     if (!_cm->has_overflown()) {
2060       oop obj = oopDesc::load_decode_heap_oop(p);
2061       if (_cm->verbose_high()) {
2062         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2063                                "*"PTR_FORMAT" = "PTR_FORMAT,
2064                                _task->task_id(), p, (void*) obj);
2065       }
2066 
2067       _task->deal_with_reference(obj);
2068       _ref_counter--;
2069 
2070       if (_ref_counter == 0) {
2071         // We have dealt with _ref_counter_limit references, pushing them and objects
2072         // reachable from them on to the local stack (and possibly the global stack).
2073         // Call do_marking_step() to process these entries. We call the routine in a
2074         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2075         // with the entries that we've pushed as a result of the deal_with_reference
2076         // calls above) or we overflow.
2077         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2078         // while there may still be some work to do. (See the comment at the
2079         // beginning of CMTask::do_marking_step() for those conditions - one of which
2080         // is reaching the specified time target.) It is only when
2081         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2082         // that the marking has completed.
2083         do {
2084           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2085           _task->do_marking_step(mark_step_duration_ms,
2086                                  false /* do_stealing    */,
2087                                  false /* do_termination */);
2088         } while (_task->has_aborted() && !_cm->has_overflown());
2089         _ref_counter = _ref_counter_limit;
2090       }
2091     } else {
2092       if (_cm->verbose_high()) {
2093          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2094       }
2095     }
2096   }
2097 };
2098 
2099 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2100   ConcurrentMark* _cm;
2101   CMTask* _task;
2102  public:
2103   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2104     _cm(cm), _task(task)
2105   {}
2106 
2107   void do_void() {
2108     do {
2109       if (_cm->verbose_high()) {
2110         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2111                                _task->task_id());
2112       }
2113 
2114       // We call CMTask::do_marking_step() to completely drain the local and
2115       // global marking stacks. The routine is called in a loop, which we'll
2116       // exit if there's nothing more to do (i.e. we'completely drained the
2117       // entries that were pushed as a result of applying the
2118       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2119       // lists above) or we overflow the global marking stack.
2120       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2121       // while there may still be some work to do. (See the comment at the
2122       // beginning of CMTask::do_marking_step() for those conditions - one of which
2123       // is reaching the specified time target.) It is only when
2124       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2125       // that the marking has completed.
2126 
2127       _task->do_marking_step(1000000000.0 /* something very large */,
2128                              true /* do_stealing    */,
2129                              true /* do_termination */);
2130     } while (_task->has_aborted() && !_cm->has_overflown());
2131   }
2132 };
2133 
2134 // Implementation of AbstractRefProcTaskExecutor for parallel
2135 // reference processing at the end of G1 concurrent marking
2136 
2137 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2138 private:
2139   G1CollectedHeap* _g1h;
2140   ConcurrentMark*  _cm;
2141   WorkGang*        _workers;
2142   int              _active_workers;
2143 
2144 public:
2145   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2146                         ConcurrentMark* cm,
2147                         WorkGang* workers,
2148                         int n_workers) :
2149     _g1h(g1h), _cm(cm),
2150     _workers(workers), _active_workers(n_workers) { }
2151 
2152   // Executes the given task using concurrent marking worker threads.
2153   virtual void execute(ProcessTask& task);
2154   virtual void execute(EnqueueTask& task);
2155 };
2156 
2157 class G1CMRefProcTaskProxy: public AbstractGangTask {
2158   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2159   ProcessTask&     _proc_task;
2160   G1CollectedHeap* _g1h;
2161   ConcurrentMark*  _cm;
2162 
2163 public:
2164   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2165                      G1CollectedHeap* g1h,
2166                      ConcurrentMark* cm) :
2167     AbstractGangTask("Process reference objects in parallel"),
2168     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2169 
2170   virtual void work(int i) {
2171     CMTask* marking_task = _cm->task(i);
2172     G1CMIsAliveClosure g1_is_alive(_g1h);
2173     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2174     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2175 
2176     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2177   }
2178 };
2179 
2180 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2181   assert(_workers != NULL, "Need parallel worker threads.");
2182 
2183   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2184 
2185   // We need to reset the phase for each task execution so that
2186   // the termination protocol of CMTask::do_marking_step works.
2187   _cm->set_phase(_active_workers, false /* concurrent */);
2188   _g1h->set_par_threads(_active_workers);
2189   _workers->run_task(&proc_task_proxy);
2190   _g1h->set_par_threads(0);
2191 }
2192 
2193 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2194   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2195   EnqueueTask& _enq_task;
2196 
2197 public:
2198   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2199     AbstractGangTask("Enqueue reference objects in parallel"),
2200     _enq_task(enq_task) { }
2201 
2202   virtual void work(int i) {
2203     _enq_task.work(i);
2204   }
2205 };
2206 
2207 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2208   assert(_workers != NULL, "Need parallel worker threads.");
2209 
2210   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2211 
2212   _g1h->set_par_threads(_active_workers);
2213   _workers->run_task(&enq_task_proxy);
2214   _g1h->set_par_threads(0);
2215 }
2216 
2217 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2218   ResourceMark rm;
2219   HandleMark   hm;
2220 
2221   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2222 
2223   // Is alive closure.
2224   G1CMIsAliveClosure g1_is_alive(g1h);
2225 
2226   // Inner scope to exclude the cleaning of the string and symbol
2227   // tables from the displayed time.
2228   {
2229     bool verbose = PrintGC && PrintGCDetails;
2230     if (verbose) {
2231       gclog_or_tty->put(' ');
2232     }
2233     TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2234 
2235     ReferenceProcessor* rp = g1h->ref_processor_cm();
2236 
2237     // See the comment in G1CollectedHeap::ref_processing_init()
2238     // about how reference processing currently works in G1.
2239 
2240     // Process weak references.
2241     rp->setup_policy(clear_all_soft_refs);
2242     assert(_markStack.isEmpty(), "mark stack should be empty");
2243 
2244     G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2245     G1CMDrainMarkingStackClosure
2246       g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2247 
2248     // We use the work gang from the G1CollectedHeap and we utilize all
2249     // the worker threads.
2250     int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2251     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2252 
2253     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2254                                               g1h->workers(), active_workers);
2255 
2256     if (rp->processing_is_mt()) {
2257       // Set the degree of MT here.  If the discovery is done MT, there
2258       // may have been a different number of threads doing the discovery
2259       // and a different number of discovered lists may have Ref objects.
2260       // That is OK as long as the Reference lists are balanced (see
2261       // balance_all_queues() and balance_queues()).
2262       rp->set_active_mt_degree(active_workers);
2263 
2264       rp->process_discovered_references(&g1_is_alive,
2265                                       &g1_keep_alive,
2266                                       &g1_drain_mark_stack,
2267                                       &par_task_executor);
2268 
2269       // The work routines of the parallel keep_alive and drain_marking_stack
2270       // will set the has_overflown flag if we overflow the global marking
2271       // stack.
2272     } else {
2273       rp->process_discovered_references(&g1_is_alive,
2274                                         &g1_keep_alive,
2275                                         &g1_drain_mark_stack,
2276                                         NULL);
2277     }
2278 
2279     assert(_markStack.overflow() || _markStack.isEmpty(),
2280             "mark stack should be empty (unless it overflowed)");
2281     if (_markStack.overflow()) {
2282       // Should have been done already when we tried to push an
2283       // entry on to the global mark stack. But let's do it again.
2284       set_has_overflown();
2285     }
2286 
2287     if (rp->processing_is_mt()) {
2288       assert(rp->num_q() == active_workers, "why not");
2289       rp->enqueue_discovered_references(&par_task_executor);
2290     } else {
2291       rp->enqueue_discovered_references();
2292     }
2293 
2294     rp->verify_no_references_recorded();
2295     assert(!rp->discovery_enabled(), "Post condition");
2296   }
2297 
2298   // Now clean up stale oops in StringTable
2299   StringTable::unlink(&g1_is_alive);
2300   // Clean up unreferenced symbols in symbol table.
2301   SymbolTable::unlink();
2302 }
2303 
2304 void ConcurrentMark::swapMarkBitMaps() {
2305   CMBitMapRO* temp = _prevMarkBitMap;
2306   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2307   _nextMarkBitMap  = (CMBitMap*)  temp;
2308 }
2309 
2310 class CMRemarkTask: public AbstractGangTask {
2311 private:
2312   ConcurrentMark *_cm;
2313 
2314 public:
2315   void work(int worker_i) {
2316     // Since all available tasks are actually started, we should
2317     // only proceed if we're supposed to be actived.
2318     if ((size_t)worker_i < _cm->active_tasks()) {
2319       CMTask* task = _cm->task(worker_i);
2320       task->record_start_time();
2321       do {
2322         task->do_marking_step(1000000000.0 /* something very large */,
2323                               true /* do_stealing    */,
2324                               true /* do_termination */);
2325       } while (task->has_aborted() && !_cm->has_overflown());
2326       // If we overflow, then we do not want to restart. We instead
2327       // want to abort remark and do concurrent marking again.
2328       task->record_end_time();
2329     }
2330   }
2331 
2332   CMRemarkTask(ConcurrentMark* cm) :
2333     AbstractGangTask("Par Remark"), _cm(cm) {
2334     _cm->terminator()->reset_for_reuse(cm->_g1h->workers()->active_workers());
2335   }
2336 };
2337 
2338 void ConcurrentMark::checkpointRootsFinalWork() {
2339   ResourceMark rm;
2340   HandleMark   hm;
2341   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2342 
2343   g1h->ensure_parsability(false);
2344 
2345   if (G1CollectedHeap::use_parallel_gc_threads()) {
2346     G1CollectedHeap::StrongRootsScope srs(g1h);
2347     // this is remark, so we'll use up all active threads
2348     int active_workers = g1h->workers()->active_workers();
2349     if (active_workers == 0) {
2350       assert(active_workers > 0, "Should have been set earlier");
2351       active_workers = ParallelGCThreads;
2352       g1h->workers()->set_active_workers(active_workers);
2353     }
2354     set_phase(active_workers, false /* concurrent */);
2355     // Leave _parallel_marking_threads at it's
2356     // value originally calculated in the ConcurrentMark
2357     // constructor and pass values of the active workers
2358     // through the gang in the task.
2359 
2360     CMRemarkTask remarkTask(this);
2361     g1h->set_par_threads(active_workers);
2362     g1h->workers()->run_task(&remarkTask);
2363     g1h->set_par_threads(0);
2364   } else {
2365     G1CollectedHeap::StrongRootsScope srs(g1h);
2366     // this is remark, so we'll use up all available threads
2367     int active_workers = 1;
2368     set_phase(active_workers, false /* concurrent */);
2369 
2370     CMRemarkTask remarkTask(this);
2371     // We will start all available threads, even if we decide that the
2372     // active_workers will be fewer. The extra ones will just bail out
2373     // immediately.
2374     remarkTask.work(0);
2375   }
2376   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2377   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2378 
2379   print_stats();
2380 
2381 #if VERIFY_OBJS_PROCESSED
2382   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2383     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2384                            _scan_obj_cl.objs_processed,
2385                            ThreadLocalObjQueue::objs_enqueued);
2386     guarantee(_scan_obj_cl.objs_processed ==
2387               ThreadLocalObjQueue::objs_enqueued,
2388               "Different number of objs processed and enqueued.");
2389   }
2390 #endif
2391 }
2392 
2393 #ifndef PRODUCT
2394 
2395 class PrintReachableOopClosure: public OopClosure {
2396 private:
2397   G1CollectedHeap* _g1h;
2398   outputStream*    _out;
2399   VerifyOption     _vo;
2400   bool             _all;
2401 
2402 public:
2403   PrintReachableOopClosure(outputStream* out,
2404                            VerifyOption  vo,
2405                            bool          all) :
2406     _g1h(G1CollectedHeap::heap()),
2407     _out(out), _vo(vo), _all(all) { }
2408 
2409   void do_oop(narrowOop* p) { do_oop_work(p); }
2410   void do_oop(      oop* p) { do_oop_work(p); }
2411 
2412   template <class T> void do_oop_work(T* p) {
2413     oop         obj = oopDesc::load_decode_heap_oop(p);
2414     const char* str = NULL;
2415     const char* str2 = "";
2416 
2417     if (obj == NULL) {
2418       str = "";
2419     } else if (!_g1h->is_in_g1_reserved(obj)) {
2420       str = " O";
2421     } else {
2422       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2423       guarantee(hr != NULL, "invariant");
2424       bool over_tams = false;
2425       bool marked = false;
2426 
2427       switch (_vo) {
2428         case VerifyOption_G1UsePrevMarking:
2429           over_tams = hr->obj_allocated_since_prev_marking(obj);
2430           marked = _g1h->isMarkedPrev(obj);
2431           break;
2432         case VerifyOption_G1UseNextMarking:
2433           over_tams = hr->obj_allocated_since_next_marking(obj);
2434           marked = _g1h->isMarkedNext(obj);
2435           break;
2436         case VerifyOption_G1UseMarkWord:
2437           marked = obj->is_gc_marked();
2438           break;
2439         default:
2440           ShouldNotReachHere();
2441       }
2442 
2443       if (over_tams) {
2444         str = " >";
2445         if (marked) {
2446           str2 = " AND MARKED";
2447         }
2448       } else if (marked) {
2449         str = " M";
2450       } else {
2451         str = " NOT";
2452       }
2453     }
2454 
2455     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2456                    p, (void*) obj, str, str2);
2457   }
2458 };
2459 
2460 class PrintReachableObjectClosure : public ObjectClosure {
2461 private:
2462   G1CollectedHeap* _g1h;
2463   outputStream*    _out;
2464   VerifyOption     _vo;
2465   bool             _all;
2466   HeapRegion*      _hr;
2467 
2468 public:
2469   PrintReachableObjectClosure(outputStream* out,
2470                               VerifyOption  vo,
2471                               bool          all,
2472                               HeapRegion*   hr) :
2473     _g1h(G1CollectedHeap::heap()),
2474     _out(out), _vo(vo), _all(all), _hr(hr) { }
2475 
2476   void do_object(oop o) {
2477     bool over_tams = false;
2478     bool marked = false;
2479 
2480     switch (_vo) {
2481       case VerifyOption_G1UsePrevMarking:
2482         over_tams = _hr->obj_allocated_since_prev_marking(o);
2483         marked = _g1h->isMarkedPrev(o);
2484         break;
2485       case VerifyOption_G1UseNextMarking:
2486         over_tams = _hr->obj_allocated_since_next_marking(o);
2487         marked = _g1h->isMarkedNext(o);
2488         break;
2489       case VerifyOption_G1UseMarkWord:
2490         marked = o->is_gc_marked();
2491         break;
2492       default:
2493         ShouldNotReachHere();
2494     }
2495     bool print_it = _all || over_tams || marked;
2496 
2497     if (print_it) {
2498       _out->print_cr(" "PTR_FORMAT"%s",
2499                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2500       PrintReachableOopClosure oopCl(_out, _vo, _all);
2501       o->oop_iterate(&oopCl);
2502     }
2503   }
2504 };
2505 
2506 class PrintReachableRegionClosure : public HeapRegionClosure {
2507 private:
2508   outputStream* _out;
2509   VerifyOption  _vo;
2510   bool          _all;
2511 
2512 public:
2513   bool doHeapRegion(HeapRegion* hr) {
2514     HeapWord* b = hr->bottom();
2515     HeapWord* e = hr->end();
2516     HeapWord* t = hr->top();
2517     HeapWord* p = NULL;
2518 
2519     switch (_vo) {
2520       case VerifyOption_G1UsePrevMarking:
2521         p = hr->prev_top_at_mark_start();
2522         break;
2523       case VerifyOption_G1UseNextMarking:
2524         p = hr->next_top_at_mark_start();
2525         break;
2526       case VerifyOption_G1UseMarkWord:
2527         // When we are verifying marking using the mark word
2528         // TAMS has no relevance.
2529         assert(p == NULL, "post-condition");
2530         break;
2531       default:
2532         ShouldNotReachHere();
2533     }
2534     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2535                    "TAMS: "PTR_FORMAT, b, e, t, p);
2536     _out->cr();
2537 
2538     HeapWord* from = b;
2539     HeapWord* to   = t;
2540 
2541     if (to > from) {
2542       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2543       _out->cr();
2544       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2545       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2546       _out->cr();
2547     }
2548 
2549     return false;
2550   }
2551 
2552   PrintReachableRegionClosure(outputStream* out,
2553                               VerifyOption  vo,
2554                               bool          all) :
2555     _out(out), _vo(vo), _all(all) { }
2556 };
2557 
2558 static const char* verify_option_to_tams(VerifyOption vo) {
2559   switch (vo) {
2560     case VerifyOption_G1UsePrevMarking:
2561       return "PTAMS";
2562     case VerifyOption_G1UseNextMarking:
2563       return "NTAMS";
2564     default:
2565       return "NONE";
2566   }
2567 }
2568 
2569 void ConcurrentMark::print_reachable(const char* str,
2570                                      VerifyOption vo,
2571                                      bool all) {
2572   gclog_or_tty->cr();
2573   gclog_or_tty->print_cr("== Doing heap dump... ");
2574 
2575   if (G1PrintReachableBaseFile == NULL) {
2576     gclog_or_tty->print_cr("  #### error: no base file defined");
2577     return;
2578   }
2579 
2580   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2581       (JVM_MAXPATHLEN - 1)) {
2582     gclog_or_tty->print_cr("  #### error: file name too long");
2583     return;
2584   }
2585 
2586   char file_name[JVM_MAXPATHLEN];
2587   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2588   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2589 
2590   fileStream fout(file_name);
2591   if (!fout.is_open()) {
2592     gclog_or_tty->print_cr("  #### error: could not open file");
2593     return;
2594   }
2595 
2596   outputStream* out = &fout;
2597   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2598   out->cr();
2599 
2600   out->print_cr("--- ITERATING OVER REGIONS");
2601   out->cr();
2602   PrintReachableRegionClosure rcl(out, vo, all);
2603   _g1h->heap_region_iterate(&rcl);
2604   out->cr();
2605 
2606   gclog_or_tty->print_cr("  done");
2607   gclog_or_tty->flush();
2608 }
2609 
2610 #endif // PRODUCT
2611 
2612 // This note is for drainAllSATBBuffers and the code in between.
2613 // In the future we could reuse a task to do this work during an
2614 // evacuation pause (since now tasks are not active and can be claimed
2615 // during an evacuation pause). This was a late change to the code and
2616 // is currently not being taken advantage of.
2617 
2618 class CMGlobalObjectClosure : public ObjectClosure {
2619 private:
2620   ConcurrentMark* _cm;
2621 
2622 public:
2623   void do_object(oop obj) {
2624     _cm->deal_with_reference(obj);
2625   }
2626 
2627   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2628 };
2629 
2630 void ConcurrentMark::deal_with_reference(oop obj) {
2631   if (verbose_high()) {
2632     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2633                            (void*) obj);
2634   }
2635 
2636   HeapWord* objAddr = (HeapWord*) obj;
2637   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2638   if (_g1h->is_in_g1_reserved(objAddr)) {
2639     assert(obj != NULL, "null check is implicit");
2640     if (!_nextMarkBitMap->isMarked(objAddr)) {
2641       // Only get the containing region if the object is not marked on the
2642       // bitmap (otherwise, it's a waste of time since we won't do
2643       // anything with it).
2644       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2645       if (!hr->obj_allocated_since_next_marking(obj)) {
2646         if (verbose_high()) {
2647           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2648                                  "marked", (void*) obj);
2649         }
2650 
2651         // we need to mark it first
2652         if (_nextMarkBitMap->parMark(objAddr)) {
2653           // No OrderAccess:store_load() is needed. It is implicit in the
2654           // CAS done in parMark(objAddr) above
2655           HeapWord* finger = _finger;
2656           if (objAddr < finger) {
2657             if (verbose_high()) {
2658               gclog_or_tty->print_cr("[global] below the global finger "
2659                                      "("PTR_FORMAT"), pushing it", finger);
2660             }
2661             if (!mark_stack_push(obj)) {
2662               if (verbose_low()) {
2663                 gclog_or_tty->print_cr("[global] global stack overflow during "
2664                                        "deal_with_reference");
2665               }
2666             }
2667           }
2668         }
2669       }
2670     }
2671   }
2672 }
2673 
2674 void ConcurrentMark::drainAllSATBBuffers() {
2675   CMGlobalObjectClosure oc(this);
2676   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2677   satb_mq_set.set_closure(&oc);
2678 
2679   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2680     if (verbose_medium()) {
2681       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2682     }
2683   }
2684 
2685   // no need to check whether we should do this, as this is only
2686   // called during an evacuation pause
2687   satb_mq_set.iterate_closure_all_threads();
2688 
2689   satb_mq_set.set_closure(NULL);
2690   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2691 }
2692 
2693 void ConcurrentMark::markPrev(oop p) {
2694   // Note we are overriding the read-only view of the prev map here, via
2695   // the cast.
2696   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2697 }
2698 
2699 void ConcurrentMark::clear(oop p) {
2700   assert(p != NULL && p->is_oop(), "expected an oop");
2701   HeapWord* addr = (HeapWord*)p;
2702   assert(addr >= _nextMarkBitMap->startWord() ||
2703          addr < _nextMarkBitMap->endWord(), "in a region");
2704 
2705   _nextMarkBitMap->clear(addr);
2706 }
2707 
2708 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2709   // Note we are overriding the read-only view of the prev map here, via
2710   // the cast.
2711   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2712   _nextMarkBitMap->clearRange(mr);
2713 }
2714 
2715 HeapRegion*
2716 ConcurrentMark::claim_region(int task_num) {
2717   // "checkpoint" the finger
2718   HeapWord* finger = _finger;
2719 
2720   // _heap_end will not change underneath our feet; it only changes at
2721   // yield points.
2722   while (finger < _heap_end) {
2723     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2724 
2725     // Note on how this code handles humongous regions. In the
2726     // normal case the finger will reach the start of a "starts
2727     // humongous" (SH) region. Its end will either be the end of the
2728     // last "continues humongous" (CH) region in the sequence, or the
2729     // standard end of the SH region (if the SH is the only region in
2730     // the sequence). That way claim_region() will skip over the CH
2731     // regions. However, there is a subtle race between a CM thread
2732     // executing this method and a mutator thread doing a humongous
2733     // object allocation. The two are not mutually exclusive as the CM
2734     // thread does not need to hold the Heap_lock when it gets
2735     // here. So there is a chance that claim_region() will come across
2736     // a free region that's in the progress of becoming a SH or a CH
2737     // region. In the former case, it will either
2738     //   a) Miss the update to the region's end, in which case it will
2739     //      visit every subsequent CH region, will find their bitmaps
2740     //      empty, and do nothing, or
2741     //   b) Will observe the update of the region's end (in which case
2742     //      it will skip the subsequent CH regions).
2743     // If it comes across a region that suddenly becomes CH, the
2744     // scenario will be similar to b). So, the race between
2745     // claim_region() and a humongous object allocation might force us
2746     // to do a bit of unnecessary work (due to some unnecessary bitmap
2747     // iterations) but it should not introduce and correctness issues.
2748     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2749     HeapWord*   bottom        = curr_region->bottom();
2750     HeapWord*   end           = curr_region->end();
2751     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2752 
2753     if (verbose_low()) {
2754       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2755                              "["PTR_FORMAT", "PTR_FORMAT"), "
2756                              "limit = "PTR_FORMAT,
2757                              task_num, curr_region, bottom, end, limit);
2758     }
2759 
2760     // Is the gap between reading the finger and doing the CAS too long?
2761     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2762     if (res == finger) {
2763       // we succeeded
2764 
2765       // notice that _finger == end cannot be guaranteed here since,
2766       // someone else might have moved the finger even further
2767       assert(_finger >= end, "the finger should have moved forward");
2768 
2769       if (verbose_low()) {
2770         gclog_or_tty->print_cr("[%d] we were successful with region = "
2771                                PTR_FORMAT, task_num, curr_region);
2772       }
2773 
2774       if (limit > bottom) {
2775         if (verbose_low()) {
2776           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2777                                  "returning it ", task_num, curr_region);
2778         }
2779         return curr_region;
2780       } else {
2781         assert(limit == bottom,
2782                "the region limit should be at bottom");
2783         if (verbose_low()) {
2784           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2785                                  "returning NULL", task_num, curr_region);
2786         }
2787         // we return NULL and the caller should try calling
2788         // claim_region() again.
2789         return NULL;
2790       }
2791     } else {
2792       assert(_finger > finger, "the finger should have moved forward");
2793       if (verbose_low()) {
2794         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2795                                "global finger = "PTR_FORMAT", "
2796                                "our finger = "PTR_FORMAT,
2797                                task_num, _finger, finger);
2798       }
2799 
2800       // read it again
2801       finger = _finger;
2802     }
2803   }
2804 
2805   return NULL;
2806 }
2807 
2808 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2809   bool result = false;
2810   for (int i = 0; i < (int)_max_task_num; ++i) {
2811     CMTask* the_task = _tasks[i];
2812     MemRegion mr = the_task->aborted_region();
2813     if (mr.start() != NULL) {
2814       assert(mr.end() != NULL, "invariant");
2815       assert(mr.word_size() > 0, "invariant");
2816       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2817       assert(hr != NULL, "invariant");
2818       if (hr->in_collection_set()) {
2819         // The region points into the collection set
2820         the_task->set_aborted_region(MemRegion());
2821         result = true;
2822       }
2823     }
2824   }
2825   return result;
2826 }
2827 
2828 bool ConcurrentMark::has_aborted_regions() {
2829   for (int i = 0; i < (int)_max_task_num; ++i) {
2830     CMTask* the_task = _tasks[i];
2831     MemRegion mr = the_task->aborted_region();
2832     if (mr.start() != NULL) {
2833       assert(mr.end() != NULL, "invariant");
2834       assert(mr.word_size() > 0, "invariant");
2835       return true;
2836     }
2837   }
2838   return false;
2839 }
2840 
2841 void ConcurrentMark::oops_do(OopClosure* cl) {
2842   if (_markStack.size() > 0 && verbose_low()) {
2843     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2844                            "size = %d", _markStack.size());
2845   }
2846   // we first iterate over the contents of the mark stack...
2847   _markStack.oops_do(cl);
2848 
2849   for (int i = 0; i < (int)_max_task_num; ++i) {
2850     OopTaskQueue* queue = _task_queues->queue((int)i);
2851 
2852     if (queue->size() > 0 && verbose_low()) {
2853       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2854                              "size = %d", i, queue->size());
2855     }
2856 
2857     // ...then over the contents of the all the task queues.
2858     queue->oops_do(cl);
2859   }
2860 
2861   // Invalidate any entries, that are in the region stack, that
2862   // point into the collection set
2863   if (_regionStack.invalidate_entries_into_cset()) {
2864     // otherwise, any gray objects copied during the evacuation pause
2865     // might not be visited.
2866     assert(_should_gray_objects, "invariant");
2867   }
2868 
2869   // Invalidate any aborted regions, recorded in the individual CM
2870   // tasks, that point into the collection set.
2871   if (invalidate_aborted_regions_in_cset()) {
2872     // otherwise, any gray objects copied during the evacuation pause
2873     // might not be visited.
2874     assert(_should_gray_objects, "invariant");
2875   }
2876 
2877 }
2878 
2879 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2880   _markStack.setEmpty();
2881   _markStack.clear_overflow();
2882   _regionStack.setEmpty();
2883   _regionStack.clear_overflow();
2884   if (clear_overflow) {
2885     clear_has_overflown();
2886   } else {
2887     assert(has_overflown(), "pre-condition");
2888   }
2889   _finger = _heap_start;
2890 
2891   for (int i = 0; i < (int)_max_task_num; ++i) {
2892     OopTaskQueue* queue = _task_queues->queue(i);
2893     queue->set_empty();
2894     // Clear any partial regions from the CMTasks
2895     _tasks[i]->clear_aborted_region();
2896   }
2897 }
2898 
2899 void ConcurrentMark::print_stats() {
2900   if (verbose_stats()) {
2901     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2902     for (size_t i = 0; i < _active_tasks; ++i) {
2903       _tasks[i]->print_stats();
2904       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2905     }
2906   }
2907 }
2908 
2909 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
2910 
2911 class CSetMarkOopClosure: public OopClosure {
2912   friend class CSetMarkBitMapClosure;
2913 
2914   G1CollectedHeap* _g1h;
2915   CMBitMap*        _bm;
2916   ConcurrentMark*  _cm;
2917   oop*             _ms;
2918   jint*            _array_ind_stack;
2919   int              _ms_size;
2920   int              _ms_ind;
2921   int              _array_increment;
2922   int              _worker_i;
2923 
2924   bool push(oop obj, int arr_ind = 0) {
2925     if (_ms_ind == _ms_size) {
2926       gclog_or_tty->print_cr("Mark stack is full.");
2927       return false;
2928     }
2929     _ms[_ms_ind] = obj;
2930     if (obj->is_objArray()) {
2931       _array_ind_stack[_ms_ind] = arr_ind;
2932     }
2933     _ms_ind++;
2934     return true;
2935   }
2936 
2937   oop pop() {
2938     if (_ms_ind == 0) {
2939       return NULL;
2940     } else {
2941       _ms_ind--;
2942       return _ms[_ms_ind];
2943     }
2944   }
2945 
2946   template <class T> bool drain() {
2947     while (_ms_ind > 0) {
2948       oop obj = pop();
2949       assert(obj != NULL, "Since index was non-zero.");
2950       if (obj->is_objArray()) {
2951         jint arr_ind = _array_ind_stack[_ms_ind];
2952         objArrayOop aobj = objArrayOop(obj);
2953         jint len = aobj->length();
2954         jint next_arr_ind = arr_ind + _array_increment;
2955         if (next_arr_ind < len) {
2956           push(obj, next_arr_ind);
2957         }
2958         // Now process this portion of this one.
2959         int lim = MIN2(next_arr_ind, len);
2960         for (int j = arr_ind; j < lim; j++) {
2961           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2962         }
2963       } else {
2964         obj->oop_iterate(this);
2965       }
2966       if (abort()) return false;
2967     }
2968     return true;
2969   }
2970 
2971 public:
2972   CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
2973     _g1h(G1CollectedHeap::heap()),
2974     _cm(cm),
2975     _bm(cm->nextMarkBitMap()),
2976     _ms_size(ms_size), _ms_ind(0),
2977     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2978     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2979     _array_increment(MAX2(ms_size/8, 16)),
2980     _worker_i(worker_i) { }
2981 
2982   ~CSetMarkOopClosure() {
2983     FREE_C_HEAP_ARRAY(oop, _ms);
2984     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2985   }
2986 
2987   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2988   virtual void do_oop(      oop* p) { do_oop_work(p); }
2989 
2990   template <class T> void do_oop_work(T* p) {
2991     T heap_oop = oopDesc::load_heap_oop(p);
2992     if (oopDesc::is_null(heap_oop)) return;
2993     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2994     if (obj->is_forwarded()) {
2995       // If the object has already been forwarded, we have to make sure
2996       // that it's marked.  So follow the forwarding pointer.  Note that
2997       // this does the right thing for self-forwarding pointers in the
2998       // evacuation failure case.
2999       obj = obj->forwardee();
3000     }
3001     HeapRegion* hr = _g1h->heap_region_containing(obj);
3002     if (hr != NULL) {
3003       if (hr->in_collection_set()) {
3004         if (_g1h->is_obj_ill(obj)) {
3005           if (_bm->parMark((HeapWord*)obj)) {
3006             if (!push(obj)) {
3007               gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3008               set_abort();
3009             }
3010           }
3011         }
3012       } else {
3013         // Outside the collection set; we need to gray it
3014         _cm->deal_with_reference(obj);
3015       }
3016     }
3017   }
3018 };
3019 
3020 class CSetMarkBitMapClosure: public BitMapClosure {
3021   G1CollectedHeap*   _g1h;
3022   CMBitMap*          _bitMap;
3023   ConcurrentMark*    _cm;
3024   CSetMarkOopClosure _oop_cl;
3025   int                _worker_i;
3026 
3027 public:
3028   CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3029     _g1h(G1CollectedHeap::heap()),
3030     _bitMap(cm->nextMarkBitMap()),
3031     _oop_cl(cm, ms_size, worker_i),
3032     _worker_i(worker_i) { }
3033 
3034   bool do_bit(size_t offset) {
3035     // convert offset into a HeapWord*
3036     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3037     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3038            "address out of range");
3039     assert(_bitMap->isMarked(addr), "tautology");
3040     oop obj = oop(addr);
3041     if (!obj->is_forwarded()) {
3042       if (!_oop_cl.push(obj)) return false;
3043       if (UseCompressedOops) {
3044         if (!_oop_cl.drain<narrowOop>()) return false;
3045       } else {
3046         if (!_oop_cl.drain<oop>()) return false;
3047       }
3048     }
3049     // Otherwise...
3050     return true;
3051   }
3052 };
3053 
3054 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3055   CMBitMap*             _bm;
3056   CSetMarkBitMapClosure _bit_cl;
3057   int                   _worker_i;
3058 
3059   enum SomePrivateConstants {
3060     MSSize = 1000
3061   };
3062 
3063 public:
3064   CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
3065     _bm(cm->nextMarkBitMap()),
3066     _bit_cl(cm, MSSize, worker_i),
3067     _worker_i(worker_i) { }
3068 
3069   bool doHeapRegion(HeapRegion* hr) {
3070     if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3071       // The current worker has successfully claimed the region.
3072       if (!hr->evacuation_failed()) {
3073         MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3074         if (!mr.is_empty()) {
3075           bool done = false;
3076           while (!done) {
3077             done = _bm->iterate(&_bit_cl, mr);
3078           }
3079         }
3080       }
3081     }
3082     return false;
3083   }
3084 };
3085 
3086 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
3087   jint _claim_value;
3088 
3089 public:
3090   SetClaimValuesInCSetHRClosure(jint claim_value) :
3091     _claim_value(claim_value) { }
3092 
3093   bool doHeapRegion(HeapRegion* hr) {
3094     hr->set_claim_value(_claim_value);
3095     return false;
3096   }
3097 };
3098 
3099 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3100 protected:
3101   G1CollectedHeap* _g1h;
3102   ConcurrentMark*  _cm;
3103 
3104 public:
3105   G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3106                               ConcurrentMark* cm) :
3107     AbstractGangTask("Complete Mark in CSet"),
3108     _g1h(g1h), _cm(cm) { }
3109 
3110   void work(int worker_i) {
3111     CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
3112     HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
3113     _g1h->collection_set_iterate_from(hr, &cmplt);
3114   }
3115 };
3116 
3117 void ConcurrentMark::complete_marking_in_collection_set() {
3118   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3119 
3120   if (!g1h->mark_in_progress()) {
3121     g1h->g1_policy()->record_mark_closure_time(0.0);
3122     return;
3123   }
3124 
3125   double start = os::elapsedTime();
3126   int n_workers = g1h->workers()->total_workers();
3127 
3128   G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3129 
3130   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3131 
3132   if (G1CollectedHeap::use_parallel_gc_threads()) {
3133     g1h->set_par_threads(n_workers);
3134     g1h->workers()->run_task(&complete_mark_task);
3135     g1h->set_par_threads(0);
3136   } else {
3137     complete_mark_task.work(0);
3138   }
3139 
3140   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3141 
3142   // Now reset the claim values in the regions in the collection set.
3143   SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
3144   g1h->collection_set_iterate(&set_cv_cl);
3145 
3146   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3147 
3148   double end_time = os::elapsedTime();
3149   double elapsed_time_ms = (end_time - start) * 1000.0;
3150   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3151 }
3152 
3153 // The next two methods deal with the following optimisation. Some
3154 // objects are gray by being marked and located above the finger. If
3155 // they are copied, during an evacuation pause, below the finger then
3156 // the need to be pushed on the stack. The observation is that, if
3157 // there are no regions in the collection set located above the
3158 // finger, then the above cannot happen, hence we do not need to
3159 // explicitly gray any objects when copying them to below the
3160 // finger. The global stack will be scanned to ensure that, if it
3161 // points to objects being copied, it will update their
3162 // location. There is a tricky situation with the gray objects in
3163 // region stack that are being coped, however. See the comment in
3164 // newCSet().
3165 
3166 void ConcurrentMark::newCSet() {
3167   if (!concurrent_marking_in_progress()) {
3168     // nothing to do if marking is not in progress
3169     return;
3170   }
3171 
3172   // find what the lowest finger is among the global and local fingers
3173   _min_finger = _finger;
3174   for (int i = 0; i < (int)_max_task_num; ++i) {
3175     CMTask* task = _tasks[i];
3176     HeapWord* task_finger = task->finger();
3177     if (task_finger != NULL && task_finger < _min_finger) {
3178       _min_finger = task_finger;
3179     }
3180   }
3181 
3182   _should_gray_objects = false;
3183 
3184   // This fixes a very subtle and fustrating bug. It might be the case
3185   // that, during en evacuation pause, heap regions that contain
3186   // objects that are gray (by being in regions contained in the
3187   // region stack) are included in the collection set. Since such gray
3188   // objects will be moved, and because it's not easy to redirect
3189   // region stack entries to point to a new location (because objects
3190   // in one region might be scattered to multiple regions after they
3191   // are copied), one option is to ensure that all marked objects
3192   // copied during a pause are pushed on the stack. Notice, however,
3193   // that this problem can only happen when the region stack is not
3194   // empty during an evacuation pause. So, we make the fix a bit less
3195   // conservative and ensure that regions are pushed on the stack,
3196   // irrespective whether all collection set regions are below the
3197   // finger, if the region stack is not empty. This is expected to be
3198   // a rare case, so I don't think it's necessary to be smarted about it.
3199   if (!region_stack_empty() || has_aborted_regions()) {
3200     _should_gray_objects = true;
3201   }
3202 }
3203 
3204 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3205   if (!concurrent_marking_in_progress()) return;
3206 
3207   HeapWord* region_end = hr->end();
3208   if (region_end > _min_finger) {
3209     _should_gray_objects = true;
3210   }
3211 }
3212 
3213 // Resets the region fields of active CMTasks whose values point
3214 // into the collection set.
3215 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3216   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3217   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3218 
3219   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3220     CMTask* task = _tasks[i];
3221     HeapWord* task_finger = task->finger();
3222     if (task_finger != NULL) {
3223       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3224       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3225       if (finger_region->in_collection_set()) {
3226         // The task's current region is in the collection set.
3227         // This region will be evacuated in the current GC and
3228         // the region fields in the task will be stale.
3229         task->giveup_current_region();
3230       }
3231     }
3232   }
3233 }
3234 
3235 // abandon current marking iteration due to a Full GC
3236 void ConcurrentMark::abort() {
3237   // Clear all marks to force marking thread to do nothing
3238   _nextMarkBitMap->clearAll();
3239   // Empty mark stack
3240   clear_marking_state();
3241   for (int i = 0; i < (int)_max_task_num; ++i) {
3242     _tasks[i]->clear_region_fields();
3243   }
3244   _has_aborted = true;
3245 
3246   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3247   satb_mq_set.abandon_partial_marking();
3248   // This can be called either during or outside marking, we'll read
3249   // the expected_active value from the SATB queue set.
3250   satb_mq_set.set_active_all_threads(
3251                                  false, /* new active value */
3252                                  satb_mq_set.is_active() /* expected_active */);
3253 }
3254 
3255 static void print_ms_time_info(const char* prefix, const char* name,
3256                                NumberSeq& ns) {
3257   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3258                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3259   if (ns.num() > 0) {
3260     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3261                            prefix, ns.sd(), ns.maximum());
3262   }
3263 }
3264 
3265 void ConcurrentMark::print_summary_info() {
3266   gclog_or_tty->print_cr(" Concurrent marking:");
3267   print_ms_time_info("  ", "init marks", _init_times);
3268   print_ms_time_info("  ", "remarks", _remark_times);
3269   {
3270     print_ms_time_info("     ", "final marks", _remark_mark_times);
3271     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3272 
3273   }
3274   print_ms_time_info("  ", "cleanups", _cleanup_times);
3275   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3276                          _total_counting_time,
3277                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3278                           (double)_cleanup_times.num()
3279                          : 0.0));
3280   if (G1ScrubRemSets) {
3281     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3282                            _total_rs_scrub_time,
3283                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3284                             (double)_cleanup_times.num()
3285                            : 0.0));
3286   }
3287   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3288                          (_init_times.sum() + _remark_times.sum() +
3289                           _cleanup_times.sum())/1000.0);
3290   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3291                 "(%8.2f s marking, %8.2f s counting).",
3292                 cmThread()->vtime_accum(),
3293                 cmThread()->vtime_mark_accum(),
3294                 cmThread()->vtime_count_accum());
3295 }
3296 
3297 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3298   _parallel_workers->print_worker_threads_on(st);
3299 }
3300 
3301 // Closures
3302 // XXX: there seems to be a lot of code  duplication here;
3303 // should refactor and consolidate the shared code.
3304 
3305 // This closure is used to mark refs into the CMS generation in
3306 // the CMS bit map. Called at the first checkpoint.
3307 
3308 // We take a break if someone is trying to stop the world.
3309 bool ConcurrentMark::do_yield_check(int worker_i) {
3310   if (should_yield()) {
3311     if (worker_i == 0) {
3312       _g1h->g1_policy()->record_concurrent_pause();
3313     }
3314     cmThread()->yield();
3315     if (worker_i == 0) {
3316       _g1h->g1_policy()->record_concurrent_pause_end();
3317     }
3318     return true;
3319   } else {
3320     return false;
3321   }
3322 }
3323 
3324 bool ConcurrentMark::should_yield() {
3325   return cmThread()->should_yield();
3326 }
3327 
3328 bool ConcurrentMark::containing_card_is_marked(void* p) {
3329   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3330   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3331 }
3332 
3333 bool ConcurrentMark::containing_cards_are_marked(void* start,
3334                                                  void* last) {
3335   return containing_card_is_marked(start) &&
3336          containing_card_is_marked(last);
3337 }
3338 
3339 #ifndef PRODUCT
3340 // for debugging purposes
3341 void ConcurrentMark::print_finger() {
3342   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3343                          _heap_start, _heap_end, _finger);
3344   for (int i = 0; i < (int) _max_task_num; ++i) {
3345     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3346   }
3347   gclog_or_tty->print_cr("");
3348 }
3349 #endif
3350 
3351 void CMTask::scan_object(oop obj) {
3352   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3353 
3354   if (_cm->verbose_high()) {
3355     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3356                            _task_id, (void*) obj);
3357   }
3358 
3359   size_t obj_size = obj->size();
3360   _words_scanned += obj_size;
3361 
3362   obj->oop_iterate(_cm_oop_closure);
3363   statsOnly( ++_objs_scanned );
3364   check_limits();
3365 }
3366 
3367 // Closure for iteration over bitmaps
3368 class CMBitMapClosure : public BitMapClosure {
3369 private:
3370   // the bitmap that is being iterated over
3371   CMBitMap*                   _nextMarkBitMap;
3372   ConcurrentMark*             _cm;
3373   CMTask*                     _task;
3374   // true if we're scanning a heap region claimed by the task (so that
3375   // we move the finger along), false if we're not, i.e. currently when
3376   // scanning a heap region popped from the region stack (so that we
3377   // do not move the task finger along; it'd be a mistake if we did so).
3378   bool                        _scanning_heap_region;
3379 
3380 public:
3381   CMBitMapClosure(CMTask *task,
3382                   ConcurrentMark* cm,
3383                   CMBitMap* nextMarkBitMap)
3384     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3385 
3386   void set_scanning_heap_region(bool scanning_heap_region) {
3387     _scanning_heap_region = scanning_heap_region;
3388   }
3389 
3390   bool do_bit(size_t offset) {
3391     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3392     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3393     assert( addr < _cm->finger(), "invariant");
3394 
3395     if (_scanning_heap_region) {
3396       statsOnly( _task->increase_objs_found_on_bitmap() );
3397       assert(addr >= _task->finger(), "invariant");
3398       // We move that task's local finger along.
3399       _task->move_finger_to(addr);
3400     } else {
3401       // We move the task's region finger along.
3402       _task->move_region_finger_to(addr);
3403     }
3404 
3405     _task->scan_object(oop(addr));
3406     // we only partially drain the local queue and global stack
3407     _task->drain_local_queue(true);
3408     _task->drain_global_stack(true);
3409 
3410     // if the has_aborted flag has been raised, we need to bail out of
3411     // the iteration
3412     return !_task->has_aborted();
3413   }
3414 };
3415 
3416 // Closure for iterating over objects, currently only used for
3417 // processing SATB buffers.
3418 class CMObjectClosure : public ObjectClosure {
3419 private:
3420   CMTask* _task;
3421 
3422 public:
3423   void do_object(oop obj) {
3424     _task->deal_with_reference(obj);
3425   }
3426 
3427   CMObjectClosure(CMTask* task) : _task(task) { }
3428 };
3429 
3430 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3431                                ConcurrentMark* cm,
3432                                CMTask* task)
3433   : _g1h(g1h), _cm(cm), _task(task) {
3434   assert(_ref_processor == NULL, "should be initialized to NULL");
3435 
3436   if (G1UseConcMarkReferenceProcessing) {
3437     _ref_processor = g1h->ref_processor_cm();
3438     assert(_ref_processor != NULL, "should not be NULL");
3439   }
3440 }
3441 
3442 void CMTask::setup_for_region(HeapRegion* hr) {
3443   // Separated the asserts so that we know which one fires.
3444   assert(hr != NULL,
3445         "claim_region() should have filtered out continues humongous regions");
3446   assert(!hr->continuesHumongous(),
3447         "claim_region() should have filtered out continues humongous regions");
3448 
3449   if (_cm->verbose_low()) {
3450     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3451                            _task_id, hr);
3452   }
3453 
3454   _curr_region  = hr;
3455   _finger       = hr->bottom();
3456   update_region_limit();
3457 }
3458 
3459 void CMTask::update_region_limit() {
3460   HeapRegion* hr            = _curr_region;
3461   HeapWord* bottom          = hr->bottom();
3462   HeapWord* limit           = hr->next_top_at_mark_start();
3463 
3464   if (limit == bottom) {
3465     if (_cm->verbose_low()) {
3466       gclog_or_tty->print_cr("[%d] found an empty region "
3467                              "["PTR_FORMAT", "PTR_FORMAT")",
3468                              _task_id, bottom, limit);
3469     }
3470     // The region was collected underneath our feet.
3471     // We set the finger to bottom to ensure that the bitmap
3472     // iteration that will follow this will not do anything.
3473     // (this is not a condition that holds when we set the region up,
3474     // as the region is not supposed to be empty in the first place)
3475     _finger = bottom;
3476   } else if (limit >= _region_limit) {
3477     assert(limit >= _finger, "peace of mind");
3478   } else {
3479     assert(limit < _region_limit, "only way to get here");
3480     // This can happen under some pretty unusual circumstances.  An
3481     // evacuation pause empties the region underneath our feet (NTAMS
3482     // at bottom). We then do some allocation in the region (NTAMS
3483     // stays at bottom), followed by the region being used as a GC
3484     // alloc region (NTAMS will move to top() and the objects
3485     // originally below it will be grayed). All objects now marked in
3486     // the region are explicitly grayed, if below the global finger,
3487     // and we do not need in fact to scan anything else. So, we simply
3488     // set _finger to be limit to ensure that the bitmap iteration
3489     // doesn't do anything.
3490     _finger = limit;
3491   }
3492 
3493   _region_limit = limit;
3494 }
3495 
3496 void CMTask::giveup_current_region() {
3497   assert(_curr_region != NULL, "invariant");
3498   if (_cm->verbose_low()) {
3499     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3500                            _task_id, _curr_region);
3501   }
3502   clear_region_fields();
3503 }
3504 
3505 void CMTask::clear_region_fields() {
3506   // Values for these three fields that indicate that we're not
3507   // holding on to a region.
3508   _curr_region   = NULL;
3509   _finger        = NULL;
3510   _region_limit  = NULL;
3511 
3512   _region_finger = NULL;
3513 }
3514 
3515 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3516   if (cm_oop_closure == NULL) {
3517     assert(_cm_oop_closure != NULL, "invariant");
3518   } else {
3519     assert(_cm_oop_closure == NULL, "invariant");
3520   }
3521   _cm_oop_closure = cm_oop_closure;
3522 }
3523 
3524 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3525   guarantee(nextMarkBitMap != NULL, "invariant");
3526 
3527   if (_cm->verbose_low()) {
3528     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3529   }
3530 
3531   _nextMarkBitMap                = nextMarkBitMap;
3532   clear_region_fields();
3533   assert(_aborted_region.is_empty(), "should have been cleared");
3534 
3535   _calls                         = 0;
3536   _elapsed_time_ms               = 0.0;
3537   _termination_time_ms           = 0.0;
3538   _termination_start_time_ms     = 0.0;
3539 
3540 #if _MARKING_STATS_
3541   _local_pushes                  = 0;
3542   _local_pops                    = 0;
3543   _local_max_size                = 0;
3544   _objs_scanned                  = 0;
3545   _global_pushes                 = 0;
3546   _global_pops                   = 0;
3547   _global_max_size               = 0;
3548   _global_transfers_to           = 0;
3549   _global_transfers_from         = 0;
3550   _region_stack_pops             = 0;
3551   _regions_claimed               = 0;
3552   _objs_found_on_bitmap          = 0;
3553   _satb_buffers_processed        = 0;
3554   _steal_attempts                = 0;
3555   _steals                        = 0;
3556   _aborted                       = 0;
3557   _aborted_overflow              = 0;
3558   _aborted_cm_aborted            = 0;
3559   _aborted_yield                 = 0;
3560   _aborted_timed_out             = 0;
3561   _aborted_satb                  = 0;
3562   _aborted_termination           = 0;
3563 #endif // _MARKING_STATS_
3564 }
3565 
3566 bool CMTask::should_exit_termination() {
3567   regular_clock_call();
3568   // This is called when we are in the termination protocol. We should
3569   // quit if, for some reason, this task wants to abort or the global
3570   // stack is not empty (this means that we can get work from it).
3571   return !_cm->mark_stack_empty() || has_aborted();
3572 }
3573 
3574 void CMTask::reached_limit() {
3575   assert(_words_scanned >= _words_scanned_limit ||
3576          _refs_reached >= _refs_reached_limit ,
3577          "shouldn't have been called otherwise");
3578   regular_clock_call();
3579 }
3580 
3581 void CMTask::regular_clock_call() {
3582   if (has_aborted()) return;
3583 
3584   // First, we need to recalculate the words scanned and refs reached
3585   // limits for the next clock call.
3586   recalculate_limits();
3587 
3588   // During the regular clock call we do the following
3589 
3590   // (1) If an overflow has been flagged, then we abort.
3591   if (_cm->has_overflown()) {
3592     set_has_aborted();
3593     return;
3594   }
3595 
3596   // If we are not concurrent (i.e. we're doing remark) we don't need
3597   // to check anything else. The other steps are only needed during
3598   // the concurrent marking phase.
3599   if (!concurrent()) return;
3600 
3601   // (2) If marking has been aborted for Full GC, then we also abort.
3602   if (_cm->has_aborted()) {
3603     set_has_aborted();
3604     statsOnly( ++_aborted_cm_aborted );
3605     return;
3606   }
3607 
3608   double curr_time_ms = os::elapsedVTime() * 1000.0;
3609 
3610   // (3) If marking stats are enabled, then we update the step history.
3611 #if _MARKING_STATS_
3612   if (_words_scanned >= _words_scanned_limit) {
3613     ++_clock_due_to_scanning;
3614   }
3615   if (_refs_reached >= _refs_reached_limit) {
3616     ++_clock_due_to_marking;
3617   }
3618 
3619   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3620   _interval_start_time_ms = curr_time_ms;
3621   _all_clock_intervals_ms.add(last_interval_ms);
3622 
3623   if (_cm->verbose_medium()) {
3624       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3625                         "scanned = %d%s, refs reached = %d%s",
3626                         _task_id, last_interval_ms,
3627                         _words_scanned,
3628                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3629                         _refs_reached,
3630                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3631   }
3632 #endif // _MARKING_STATS_
3633 
3634   // (4) We check whether we should yield. If we have to, then we abort.
3635   if (_cm->should_yield()) {
3636     // We should yield. To do this we abort the task. The caller is
3637     // responsible for yielding.
3638     set_has_aborted();
3639     statsOnly( ++_aborted_yield );
3640     return;
3641   }
3642 
3643   // (5) We check whether we've reached our time quota. If we have,
3644   // then we abort.
3645   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3646   if (elapsed_time_ms > _time_target_ms) {
3647     set_has_aborted();
3648     _has_timed_out = true;
3649     statsOnly( ++_aborted_timed_out );
3650     return;
3651   }
3652 
3653   // (6) Finally, we check whether there are enough completed STAB
3654   // buffers available for processing. If there are, we abort.
3655   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3656   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3657     if (_cm->verbose_low()) {
3658       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3659                              _task_id);
3660     }
3661     // we do need to process SATB buffers, we'll abort and restart
3662     // the marking task to do so
3663     set_has_aborted();
3664     statsOnly( ++_aborted_satb );
3665     return;
3666   }
3667 }
3668 
3669 void CMTask::recalculate_limits() {
3670   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3671   _words_scanned_limit      = _real_words_scanned_limit;
3672 
3673   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3674   _refs_reached_limit       = _real_refs_reached_limit;
3675 }
3676 
3677 void CMTask::decrease_limits() {
3678   // This is called when we believe that we're going to do an infrequent
3679   // operation which will increase the per byte scanned cost (i.e. move
3680   // entries to/from the global stack). It basically tries to decrease the
3681   // scanning limit so that the clock is called earlier.
3682 
3683   if (_cm->verbose_medium()) {
3684     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3685   }
3686 
3687   _words_scanned_limit = _real_words_scanned_limit -
3688     3 * words_scanned_period / 4;
3689   _refs_reached_limit  = _real_refs_reached_limit -
3690     3 * refs_reached_period / 4;
3691 }
3692 
3693 void CMTask::move_entries_to_global_stack() {
3694   // local array where we'll store the entries that will be popped
3695   // from the local queue
3696   oop buffer[global_stack_transfer_size];
3697 
3698   int n = 0;
3699   oop obj;
3700   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3701     buffer[n] = obj;
3702     ++n;
3703   }
3704 
3705   if (n > 0) {
3706     // we popped at least one entry from the local queue
3707 
3708     statsOnly( ++_global_transfers_to; _local_pops += n );
3709 
3710     if (!_cm->mark_stack_push(buffer, n)) {
3711       if (_cm->verbose_low()) {
3712         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3713                                _task_id);
3714       }
3715       set_has_aborted();
3716     } else {
3717       // the transfer was successful
3718 
3719       if (_cm->verbose_medium()) {
3720         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3721                                _task_id, n);
3722       }
3723       statsOnly( int tmp_size = _cm->mark_stack_size();
3724                  if (tmp_size > _global_max_size) {
3725                    _global_max_size = tmp_size;
3726                  }
3727                  _global_pushes += n );
3728     }
3729   }
3730 
3731   // this operation was quite expensive, so decrease the limits
3732   decrease_limits();
3733 }
3734 
3735 void CMTask::get_entries_from_global_stack() {
3736   // local array where we'll store the entries that will be popped
3737   // from the global stack.
3738   oop buffer[global_stack_transfer_size];
3739   int n;
3740   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3741   assert(n <= global_stack_transfer_size,
3742          "we should not pop more than the given limit");
3743   if (n > 0) {
3744     // yes, we did actually pop at least one entry
3745 
3746     statsOnly( ++_global_transfers_from; _global_pops += n );
3747     if (_cm->verbose_medium()) {
3748       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3749                              _task_id, n);
3750     }
3751     for (int i = 0; i < n; ++i) {
3752       bool success = _task_queue->push(buffer[i]);
3753       // We only call this when the local queue is empty or under a
3754       // given target limit. So, we do not expect this push to fail.
3755       assert(success, "invariant");
3756     }
3757 
3758     statsOnly( int tmp_size = _task_queue->size();
3759                if (tmp_size > _local_max_size) {
3760                  _local_max_size = tmp_size;
3761                }
3762                _local_pushes += n );
3763   }
3764 
3765   // this operation was quite expensive, so decrease the limits
3766   decrease_limits();
3767 }
3768 
3769 void CMTask::drain_local_queue(bool partially) {
3770   if (has_aborted()) return;
3771 
3772   // Decide what the target size is, depending whether we're going to
3773   // drain it partially (so that other tasks can steal if they run out
3774   // of things to do) or totally (at the very end).
3775   size_t target_size;
3776   if (partially) {
3777     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3778   } else {
3779     target_size = 0;
3780   }
3781 
3782   if (_task_queue->size() > target_size) {
3783     if (_cm->verbose_high()) {
3784       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3785                              _task_id, target_size);
3786     }
3787 
3788     oop obj;
3789     bool ret = _task_queue->pop_local(obj);
3790     while (ret) {
3791       statsOnly( ++_local_pops );
3792 
3793       if (_cm->verbose_high()) {
3794         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3795                                (void*) obj);
3796       }
3797 
3798       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3799       assert(!_g1h->is_on_master_free_list(
3800                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3801 
3802       scan_object(obj);
3803 
3804       if (_task_queue->size() <= target_size || has_aborted()) {
3805         ret = false;
3806       } else {
3807         ret = _task_queue->pop_local(obj);
3808       }
3809     }
3810 
3811     if (_cm->verbose_high()) {
3812       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3813                              _task_id, _task_queue->size());
3814     }
3815   }
3816 }
3817 
3818 void CMTask::drain_global_stack(bool partially) {
3819   if (has_aborted()) return;
3820 
3821   // We have a policy to drain the local queue before we attempt to
3822   // drain the global stack.
3823   assert(partially || _task_queue->size() == 0, "invariant");
3824 
3825   // Decide what the target size is, depending whether we're going to
3826   // drain it partially (so that other tasks can steal if they run out
3827   // of things to do) or totally (at the very end).  Notice that,
3828   // because we move entries from the global stack in chunks or
3829   // because another task might be doing the same, we might in fact
3830   // drop below the target. But, this is not a problem.
3831   size_t target_size;
3832   if (partially) {
3833     target_size = _cm->partial_mark_stack_size_target();
3834   } else {
3835     target_size = 0;
3836   }
3837 
3838   if (_cm->mark_stack_size() > target_size) {
3839     if (_cm->verbose_low()) {
3840       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3841                              _task_id, target_size);
3842     }
3843 
3844     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3845       get_entries_from_global_stack();
3846       drain_local_queue(partially);
3847     }
3848 
3849     if (_cm->verbose_low()) {
3850       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3851                              _task_id, _cm->mark_stack_size());
3852     }
3853   }
3854 }
3855 
3856 // SATB Queue has several assumptions on whether to call the par or
3857 // non-par versions of the methods. this is why some of the code is
3858 // replicated. We should really get rid of the single-threaded version
3859 // of the code to simplify things.
3860 void CMTask::drain_satb_buffers() {
3861   if (has_aborted()) return;
3862 
3863   // We set this so that the regular clock knows that we're in the
3864   // middle of draining buffers and doesn't set the abort flag when it
3865   // notices that SATB buffers are available for draining. It'd be
3866   // very counter productive if it did that. :-)
3867   _draining_satb_buffers = true;
3868 
3869   CMObjectClosure oc(this);
3870   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3871   if (G1CollectedHeap::use_parallel_gc_threads()) {
3872     satb_mq_set.set_par_closure(_task_id, &oc);
3873   } else {
3874     satb_mq_set.set_closure(&oc);
3875   }
3876 
3877   // This keeps claiming and applying the closure to completed buffers
3878   // until we run out of buffers or we need to abort.
3879   if (G1CollectedHeap::use_parallel_gc_threads()) {
3880     while (!has_aborted() &&
3881            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3882       if (_cm->verbose_medium()) {
3883         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3884       }
3885       statsOnly( ++_satb_buffers_processed );
3886       regular_clock_call();
3887     }
3888   } else {
3889     while (!has_aborted() &&
3890            satb_mq_set.apply_closure_to_completed_buffer()) {
3891       if (_cm->verbose_medium()) {
3892         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3893       }
3894       statsOnly( ++_satb_buffers_processed );
3895       regular_clock_call();
3896     }
3897   }
3898 
3899   if (!concurrent() && !has_aborted()) {
3900     // We should only do this during remark.
3901     if (G1CollectedHeap::use_parallel_gc_threads()) {
3902       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3903     } else {
3904       satb_mq_set.iterate_closure_all_threads();
3905     }
3906   }
3907 
3908   _draining_satb_buffers = false;
3909 
3910   assert(has_aborted() ||
3911          concurrent() ||
3912          satb_mq_set.completed_buffers_num() == 0, "invariant");
3913 
3914   if (G1CollectedHeap::use_parallel_gc_threads()) {
3915     satb_mq_set.set_par_closure(_task_id, NULL);
3916   } else {
3917     satb_mq_set.set_closure(NULL);
3918   }
3919 
3920   // again, this was a potentially expensive operation, decrease the
3921   // limits to get the regular clock call early
3922   decrease_limits();
3923 }
3924 
3925 void CMTask::drain_region_stack(BitMapClosure* bc) {
3926   if (has_aborted()) return;
3927 
3928   assert(_region_finger == NULL,
3929          "it should be NULL when we're not scanning a region");
3930 
3931   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3932     if (_cm->verbose_low()) {
3933       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3934                              _task_id, _cm->region_stack_size());
3935     }
3936 
3937     MemRegion mr;
3938 
3939     if (!_aborted_region.is_empty()) {
3940       mr = _aborted_region;
3941       _aborted_region = MemRegion();
3942 
3943       if (_cm->verbose_low()) {
3944         gclog_or_tty->print_cr("[%d] scanning aborted region "
3945                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3946                                _task_id, mr.start(), mr.end());
3947       }
3948     } else {
3949       mr = _cm->region_stack_pop_lock_free();
3950       // it returns MemRegion() if the pop fails
3951       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3952     }
3953 
3954     while (mr.start() != NULL) {
3955       if (_cm->verbose_medium()) {
3956         gclog_or_tty->print_cr("[%d] we are scanning region "
3957                                "["PTR_FORMAT", "PTR_FORMAT")",
3958                                _task_id, mr.start(), mr.end());
3959       }
3960 
3961       assert(mr.end() <= _cm->finger(),
3962              "otherwise the region shouldn't be on the stack");
3963       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3964       if (_nextMarkBitMap->iterate(bc, mr)) {
3965         assert(!has_aborted(),
3966                "cannot abort the task without aborting the bitmap iteration");
3967 
3968         // We finished iterating over the region without aborting.
3969         regular_clock_call();
3970         if (has_aborted()) {
3971           mr = MemRegion();
3972         } else {
3973           mr = _cm->region_stack_pop_lock_free();
3974           // it returns MemRegion() if the pop fails
3975           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3976         }
3977       } else {
3978         assert(has_aborted(), "currently the only way to do so");
3979 
3980         // The only way to abort the bitmap iteration is to return
3981         // false from the do_bit() method. However, inside the
3982         // do_bit() method we move the _region_finger to point to the
3983         // object currently being looked at. So, if we bail out, we
3984         // have definitely set _region_finger to something non-null.
3985         assert(_region_finger != NULL, "invariant");
3986 
3987         // Make sure that any previously aborted region has been
3988         // cleared.
3989         assert(_aborted_region.is_empty(), "aborted region not cleared");
3990 
3991         // The iteration was actually aborted. So now _region_finger
3992         // points to the address of the object we last scanned. If we
3993         // leave it there, when we restart this task, we will rescan
3994         // the object. It is easy to avoid this. We move the finger by
3995         // enough to point to the next possible object header (the
3996         // bitmap knows by how much we need to move it as it knows its
3997         // granularity).
3998         MemRegion newRegion =
3999           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4000 
4001         if (!newRegion.is_empty()) {
4002           if (_cm->verbose_low()) {
4003             gclog_or_tty->print_cr("[%d] recording unscanned region"
4004                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4005                                    _task_id,
4006                                    newRegion.start(), newRegion.end());
4007           }
4008           // Now record the part of the region we didn't scan to
4009           // make sure this task scans it later.
4010           _aborted_region = newRegion;
4011         }
4012         // break from while
4013         mr = MemRegion();
4014       }
4015       _region_finger = NULL;
4016     }
4017 
4018     if (_cm->verbose_low()) {
4019       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4020                              _task_id, _cm->region_stack_size());
4021     }
4022   }
4023 }
4024 
4025 void CMTask::print_stats() {
4026   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4027                          _task_id, _calls);
4028   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4029                          _elapsed_time_ms, _termination_time_ms);
4030   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4031                          _step_times_ms.num(), _step_times_ms.avg(),
4032                          _step_times_ms.sd());
4033   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4034                          _step_times_ms.maximum(), _step_times_ms.sum());
4035 
4036 #if _MARKING_STATS_
4037   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4038                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4039                          _all_clock_intervals_ms.sd());
4040   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4041                          _all_clock_intervals_ms.maximum(),
4042                          _all_clock_intervals_ms.sum());
4043   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4044                          _clock_due_to_scanning, _clock_due_to_marking);
4045   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4046                          _objs_scanned, _objs_found_on_bitmap);
4047   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4048                          _local_pushes, _local_pops, _local_max_size);
4049   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4050                          _global_pushes, _global_pops, _global_max_size);
4051   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4052                          _global_transfers_to,_global_transfers_from);
4053   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4054                          _regions_claimed, _region_stack_pops);
4055   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4056   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4057                          _steal_attempts, _steals);
4058   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4059   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4060                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4061   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4062                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4063 #endif // _MARKING_STATS_
4064 }
4065 
4066 /*****************************************************************************
4067 
4068     The do_marking_step(time_target_ms) method is the building block
4069     of the parallel marking framework. It can be called in parallel
4070     with other invocations of do_marking_step() on different tasks
4071     (but only one per task, obviously) and concurrently with the
4072     mutator threads, or during remark, hence it eliminates the need
4073     for two versions of the code. When called during remark, it will
4074     pick up from where the task left off during the concurrent marking
4075     phase. Interestingly, tasks are also claimable during evacuation
4076     pauses too, since do_marking_step() ensures that it aborts before
4077     it needs to yield.
4078 
4079     The data structures that is uses to do marking work are the
4080     following:
4081 
4082       (1) Marking Bitmap. If there are gray objects that appear only
4083       on the bitmap (this happens either when dealing with an overflow
4084       or when the initial marking phase has simply marked the roots
4085       and didn't push them on the stack), then tasks claim heap
4086       regions whose bitmap they then scan to find gray objects. A
4087       global finger indicates where the end of the last claimed region
4088       is. A local finger indicates how far into the region a task has
4089       scanned. The two fingers are used to determine how to gray an
4090       object (i.e. whether simply marking it is OK, as it will be
4091       visited by a task in the future, or whether it needs to be also
4092       pushed on a stack).
4093 
4094       (2) Local Queue. The local queue of the task which is accessed
4095       reasonably efficiently by the task. Other tasks can steal from
4096       it when they run out of work. Throughout the marking phase, a
4097       task attempts to keep its local queue short but not totally
4098       empty, so that entries are available for stealing by other
4099       tasks. Only when there is no more work, a task will totally
4100       drain its local queue.
4101 
4102       (3) Global Mark Stack. This handles local queue overflow. During
4103       marking only sets of entries are moved between it and the local
4104       queues, as access to it requires a mutex and more fine-grain
4105       interaction with it which might cause contention. If it
4106       overflows, then the marking phase should restart and iterate
4107       over the bitmap to identify gray objects. Throughout the marking
4108       phase, tasks attempt to keep the global mark stack at a small
4109       length but not totally empty, so that entries are available for
4110       popping by other tasks. Only when there is no more work, tasks
4111       will totally drain the global mark stack.
4112 
4113       (4) Global Region Stack. Entries on it correspond to areas of
4114       the bitmap that need to be scanned since they contain gray
4115       objects. Pushes on the region stack only happen during
4116       evacuation pauses and typically correspond to areas covered by
4117       GC LABS. If it overflows, then the marking phase should restart
4118       and iterate over the bitmap to identify gray objects. Tasks will
4119       try to totally drain the region stack as soon as possible.
4120 
4121       (5) SATB Buffer Queue. This is where completed SATB buffers are
4122       made available. Buffers are regularly removed from this queue
4123       and scanned for roots, so that the queue doesn't get too
4124       long. During remark, all completed buffers are processed, as
4125       well as the filled in parts of any uncompleted buffers.
4126 
4127     The do_marking_step() method tries to abort when the time target
4128     has been reached. There are a few other cases when the
4129     do_marking_step() method also aborts:
4130 
4131       (1) When the marking phase has been aborted (after a Full GC).
4132 
4133       (2) When a global overflow (either on the global stack or the
4134       region stack) has been triggered. Before the task aborts, it
4135       will actually sync up with the other tasks to ensure that all
4136       the marking data structures (local queues, stacks, fingers etc.)
4137       are re-initialised so that when do_marking_step() completes,
4138       the marking phase can immediately restart.
4139 
4140       (3) When enough completed SATB buffers are available. The
4141       do_marking_step() method only tries to drain SATB buffers right
4142       at the beginning. So, if enough buffers are available, the
4143       marking step aborts and the SATB buffers are processed at
4144       the beginning of the next invocation.
4145 
4146       (4) To yield. when we have to yield then we abort and yield
4147       right at the end of do_marking_step(). This saves us from a lot
4148       of hassle as, by yielding we might allow a Full GC. If this
4149       happens then objects will be compacted underneath our feet, the
4150       heap might shrink, etc. We save checking for this by just
4151       aborting and doing the yield right at the end.
4152 
4153     From the above it follows that the do_marking_step() method should
4154     be called in a loop (or, otherwise, regularly) until it completes.
4155 
4156     If a marking step completes without its has_aborted() flag being
4157     true, it means it has completed the current marking phase (and
4158     also all other marking tasks have done so and have all synced up).
4159 
4160     A method called regular_clock_call() is invoked "regularly" (in
4161     sub ms intervals) throughout marking. It is this clock method that
4162     checks all the abort conditions which were mentioned above and
4163     decides when the task should abort. A work-based scheme is used to
4164     trigger this clock method: when the number of object words the
4165     marking phase has scanned or the number of references the marking
4166     phase has visited reach a given limit. Additional invocations to
4167     the method clock have been planted in a few other strategic places
4168     too. The initial reason for the clock method was to avoid calling
4169     vtime too regularly, as it is quite expensive. So, once it was in
4170     place, it was natural to piggy-back all the other conditions on it
4171     too and not constantly check them throughout the code.
4172 
4173  *****************************************************************************/
4174 
4175 void CMTask::do_marking_step(double time_target_ms,
4176                              bool do_stealing,
4177                              bool do_termination) {
4178   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4179   assert(concurrent() == _cm->concurrent(), "they should be the same");
4180 
4181   assert(concurrent() || _cm->region_stack_empty(),
4182          "the region stack should have been cleared before remark");
4183   assert(concurrent() || !_cm->has_aborted_regions(),
4184          "aborted regions should have been cleared before remark");
4185   assert(_region_finger == NULL,
4186          "this should be non-null only when a region is being scanned");
4187 
4188   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4189   assert(_task_queues != NULL, "invariant");
4190   assert(_task_queue != NULL, "invariant");
4191   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4192 
4193   assert(!_claimed,
4194          "only one thread should claim this task at any one time");
4195 
4196   // OK, this doesn't safeguard again all possible scenarios, as it is
4197   // possible for two threads to set the _claimed flag at the same
4198   // time. But it is only for debugging purposes anyway and it will
4199   // catch most problems.
4200   _claimed = true;
4201 
4202   _start_time_ms = os::elapsedVTime() * 1000.0;
4203   statsOnly( _interval_start_time_ms = _start_time_ms );
4204 
4205   double diff_prediction_ms =
4206     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4207   _time_target_ms = time_target_ms - diff_prediction_ms;
4208 
4209   // set up the variables that are used in the work-based scheme to
4210   // call the regular clock method
4211   _words_scanned = 0;
4212   _refs_reached  = 0;
4213   recalculate_limits();
4214 
4215   // clear all flags
4216   clear_has_aborted();
4217   _has_timed_out = false;
4218   _draining_satb_buffers = false;
4219 
4220   ++_calls;
4221 
4222   if (_cm->verbose_low()) {
4223     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4224                            "target = %1.2lfms >>>>>>>>>>",
4225                            _task_id, _calls, _time_target_ms);
4226   }
4227 
4228   // Set up the bitmap and oop closures. Anything that uses them is
4229   // eventually called from this method, so it is OK to allocate these
4230   // statically.
4231   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4232   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4233   set_cm_oop_closure(&cm_oop_closure);
4234 
4235   if (_cm->has_overflown()) {
4236     // This can happen if the region stack or the mark stack overflows
4237     // during a GC pause and this task, after a yield point,
4238     // restarts. We have to abort as we need to get into the overflow
4239     // protocol which happens right at the end of this task.
4240     set_has_aborted();
4241   }
4242 
4243   // First drain any available SATB buffers. After this, we will not
4244   // look at SATB buffers before the next invocation of this method.
4245   // If enough completed SATB buffers are queued up, the regular clock
4246   // will abort this task so that it restarts.
4247   drain_satb_buffers();
4248   // ...then partially drain the local queue and the global stack
4249   drain_local_queue(true);
4250   drain_global_stack(true);
4251 
4252   // Then totally drain the region stack.  We will not look at
4253   // it again before the next invocation of this method. Entries on
4254   // the region stack are only added during evacuation pauses, for
4255   // which we have to yield. When we do, we abort the task anyway so
4256   // it will look at the region stack again when it restarts.
4257   bitmap_closure.set_scanning_heap_region(false);
4258   drain_region_stack(&bitmap_closure);
4259   // ...then partially drain the local queue and the global stack
4260   drain_local_queue(true);
4261   drain_global_stack(true);
4262 
4263   do {
4264     if (!has_aborted() && _curr_region != NULL) {
4265       // This means that we're already holding on to a region.
4266       assert(_finger != NULL, "if region is not NULL, then the finger "
4267              "should not be NULL either");
4268 
4269       // We might have restarted this task after an evacuation pause
4270       // which might have evacuated the region we're holding on to
4271       // underneath our feet. Let's read its limit again to make sure
4272       // that we do not iterate over a region of the heap that
4273       // contains garbage (update_region_limit() will also move
4274       // _finger to the start of the region if it is found empty).
4275       update_region_limit();
4276       // We will start from _finger not from the start of the region,
4277       // as we might be restarting this task after aborting half-way
4278       // through scanning this region. In this case, _finger points to
4279       // the address where we last found a marked object. If this is a
4280       // fresh region, _finger points to start().
4281       MemRegion mr = MemRegion(_finger, _region_limit);
4282 
4283       if (_cm->verbose_low()) {
4284         gclog_or_tty->print_cr("[%d] we're scanning part "
4285                                "["PTR_FORMAT", "PTR_FORMAT") "
4286                                "of region "PTR_FORMAT,
4287                                _task_id, _finger, _region_limit, _curr_region);
4288       }
4289 
4290       // Let's iterate over the bitmap of the part of the
4291       // region that is left.
4292       bitmap_closure.set_scanning_heap_region(true);
4293       if (mr.is_empty() ||
4294           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4295         // We successfully completed iterating over the region. Now,
4296         // let's give up the region.
4297         giveup_current_region();
4298         regular_clock_call();
4299       } else {
4300         assert(has_aborted(), "currently the only way to do so");
4301         // The only way to abort the bitmap iteration is to return
4302         // false from the do_bit() method. However, inside the
4303         // do_bit() method we move the _finger to point to the
4304         // object currently being looked at. So, if we bail out, we
4305         // have definitely set _finger to something non-null.
4306         assert(_finger != NULL, "invariant");
4307 
4308         // Region iteration was actually aborted. So now _finger
4309         // points to the address of the object we last scanned. If we
4310         // leave it there, when we restart this task, we will rescan
4311         // the object. It is easy to avoid this. We move the finger by
4312         // enough to point to the next possible object header (the
4313         // bitmap knows by how much we need to move it as it knows its
4314         // granularity).
4315         assert(_finger < _region_limit, "invariant");
4316         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4317         // Check if bitmap iteration was aborted while scanning the last object
4318         if (new_finger >= _region_limit) {
4319             giveup_current_region();
4320         } else {
4321             move_finger_to(new_finger);
4322         }
4323       }
4324     }
4325     // At this point we have either completed iterating over the
4326     // region we were holding on to, or we have aborted.
4327 
4328     // We then partially drain the local queue and the global stack.
4329     // (Do we really need this?)
4330     drain_local_queue(true);
4331     drain_global_stack(true);
4332 
4333     // Read the note on the claim_region() method on why it might
4334     // return NULL with potentially more regions available for
4335     // claiming and why we have to check out_of_regions() to determine
4336     // whether we're done or not.
4337     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4338       // We are going to try to claim a new region. We should have
4339       // given up on the previous one.
4340       // Separated the asserts so that we know which one fires.
4341       assert(_curr_region  == NULL, "invariant");
4342       assert(_finger       == NULL, "invariant");
4343       assert(_region_limit == NULL, "invariant");
4344       if (_cm->verbose_low()) {
4345         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4346       }
4347       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4348       if (claimed_region != NULL) {
4349         // Yes, we managed to claim one
4350         statsOnly( ++_regions_claimed );
4351 
4352         if (_cm->verbose_low()) {
4353           gclog_or_tty->print_cr("[%d] we successfully claimed "
4354                                  "region "PTR_FORMAT,
4355                                  _task_id, claimed_region);
4356         }
4357 
4358         setup_for_region(claimed_region);
4359         assert(_curr_region == claimed_region, "invariant");
4360       }
4361       // It is important to call the regular clock here. It might take
4362       // a while to claim a region if, for example, we hit a large
4363       // block of empty regions. So we need to call the regular clock
4364       // method once round the loop to make sure it's called
4365       // frequently enough.
4366       regular_clock_call();
4367     }
4368 
4369     if (!has_aborted() && _curr_region == NULL) {
4370       assert(_cm->out_of_regions(),
4371              "at this point we should be out of regions");
4372     }
4373   } while ( _curr_region != NULL && !has_aborted());
4374 
4375   if (!has_aborted()) {
4376     // We cannot check whether the global stack is empty, since other
4377     // tasks might be pushing objects to it concurrently. We also cannot
4378     // check if the region stack is empty because if a thread is aborting
4379     // it can push a partially done region back.
4380     assert(_cm->out_of_regions(),
4381            "at this point we should be out of regions");
4382 
4383     if (_cm->verbose_low()) {
4384       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4385     }
4386 
4387     // Try to reduce the number of available SATB buffers so that
4388     // remark has less work to do.
4389     drain_satb_buffers();
4390   }
4391 
4392   // Since we've done everything else, we can now totally drain the
4393   // local queue and global stack.
4394   drain_local_queue(false);
4395   drain_global_stack(false);
4396 
4397   // Attempt at work stealing from other task's queues.
4398   if (do_stealing && !has_aborted()) {
4399     // We have not aborted. This means that we have finished all that
4400     // we could. Let's try to do some stealing...
4401 
4402     // We cannot check whether the global stack is empty, since other
4403     // tasks might be pushing objects to it concurrently. We also cannot
4404     // check if the region stack is empty because if a thread is aborting
4405     // it can push a partially done region back.
4406     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4407            "only way to reach here");
4408 
4409     if (_cm->verbose_low()) {
4410       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4411     }
4412 
4413     while (!has_aborted()) {
4414       oop obj;
4415       statsOnly( ++_steal_attempts );
4416 
4417       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4418         if (_cm->verbose_medium()) {
4419           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4420                                  _task_id, (void*) obj);
4421         }
4422 
4423         statsOnly( ++_steals );
4424 
4425         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4426                "any stolen object should be marked");
4427         scan_object(obj);
4428 
4429         // And since we're towards the end, let's totally drain the
4430         // local queue and global stack.
4431         drain_local_queue(false);
4432         drain_global_stack(false);
4433       } else {
4434         break;
4435       }
4436     }
4437   }
4438 
4439   // If we are about to wrap up and go into termination, check if we
4440   // should raise the overflow flag.
4441   if (do_termination && !has_aborted()) {
4442     if (_cm->force_overflow()->should_force()) {
4443       _cm->set_has_overflown();
4444       regular_clock_call();
4445     }
4446   }
4447 
4448   // We still haven't aborted. Now, let's try to get into the
4449   // termination protocol.
4450   if (do_termination && !has_aborted()) {
4451     // We cannot check whether the global stack is empty, since other
4452     // tasks might be concurrently pushing objects on it. We also cannot
4453     // check if the region stack is empty because if a thread is aborting
4454     // it can push a partially done region back.
4455     // Separated the asserts so that we know which one fires.
4456     assert(_cm->out_of_regions(), "only way to reach here");
4457     assert(_task_queue->size() == 0, "only way to reach here");
4458 
4459     if (_cm->verbose_low()) {
4460       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4461     }
4462 
4463     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4464     // The CMTask class also extends the TerminatorTerminator class,
4465     // hence its should_exit_termination() method will also decide
4466     // whether to exit the termination protocol or not.
4467     bool finished = _cm->terminator()->offer_termination(this);
4468     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4469     _termination_time_ms +=
4470       termination_end_time_ms - _termination_start_time_ms;
4471 
4472     if (finished) {
4473       // We're all done.
4474 
4475       if (_task_id == 0) {
4476         // let's allow task 0 to do this
4477         if (concurrent()) {
4478           assert(_cm->concurrent_marking_in_progress(), "invariant");
4479           // we need to set this to false before the next
4480           // safepoint. This way we ensure that the marking phase
4481           // doesn't observe any more heap expansions.
4482           _cm->clear_concurrent_marking_in_progress();
4483         }
4484       }
4485 
4486       // We can now guarantee that the global stack is empty, since
4487       // all other tasks have finished. We separated the guarantees so
4488       // that, if a condition is false, we can immediately find out
4489       // which one.
4490       guarantee(_cm->out_of_regions(), "only way to reach here");
4491       guarantee(_aborted_region.is_empty(), "only way to reach here");
4492       guarantee(_cm->region_stack_empty(), "only way to reach here");
4493       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4494       guarantee(_task_queue->size() == 0, "only way to reach here");
4495       guarantee(!_cm->has_overflown(), "only way to reach here");
4496       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4497       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4498 
4499       if (_cm->verbose_low()) {
4500         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4501       }
4502     } else {
4503       // Apparently there's more work to do. Let's abort this task. It
4504       // will restart it and we can hopefully find more things to do.
4505 
4506       if (_cm->verbose_low()) {
4507         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4508                                _task_id);
4509       }
4510 
4511       set_has_aborted();
4512       statsOnly( ++_aborted_termination );
4513     }
4514   }
4515 
4516   // Mainly for debugging purposes to make sure that a pointer to the
4517   // closure which was statically allocated in this frame doesn't
4518   // escape it by accident.
4519   set_cm_oop_closure(NULL);
4520   double end_time_ms = os::elapsedVTime() * 1000.0;
4521   double elapsed_time_ms = end_time_ms - _start_time_ms;
4522   // Update the step history.
4523   _step_times_ms.add(elapsed_time_ms);
4524 
4525   if (has_aborted()) {
4526     // The task was aborted for some reason.
4527 
4528     statsOnly( ++_aborted );
4529 
4530     if (_has_timed_out) {
4531       double diff_ms = elapsed_time_ms - _time_target_ms;
4532       // Keep statistics of how well we did with respect to hitting
4533       // our target only if we actually timed out (if we aborted for
4534       // other reasons, then the results might get skewed).
4535       _marking_step_diffs_ms.add(diff_ms);
4536     }
4537 
4538     if (_cm->has_overflown()) {
4539       // This is the interesting one. We aborted because a global
4540       // overflow was raised. This means we have to restart the
4541       // marking phase and start iterating over regions. However, in
4542       // order to do this we have to make sure that all tasks stop
4543       // what they are doing and re-initialise in a safe manner. We
4544       // will achieve this with the use of two barrier sync points.
4545 
4546       if (_cm->verbose_low()) {
4547         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4548       }
4549 
4550       _cm->enter_first_sync_barrier(_task_id);
4551       // When we exit this sync barrier we know that all tasks have
4552       // stopped doing marking work. So, it's now safe to
4553       // re-initialise our data structures. At the end of this method,
4554       // task 0 will clear the global data structures.
4555 
4556       statsOnly( ++_aborted_overflow );
4557 
4558       // We clear the local state of this task...
4559       clear_region_fields();
4560 
4561       // ...and enter the second barrier.
4562       _cm->enter_second_sync_barrier(_task_id);
4563       // At this point everything has bee re-initialised and we're
4564       // ready to restart.
4565     }
4566 
4567     if (_cm->verbose_low()) {
4568       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4569                              "elapsed = %1.2lfms <<<<<<<<<<",
4570                              _task_id, _time_target_ms, elapsed_time_ms);
4571       if (_cm->has_aborted()) {
4572         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4573                                _task_id);
4574       }
4575     }
4576   } else {
4577     if (_cm->verbose_low()) {
4578       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4579                              "elapsed = %1.2lfms <<<<<<<<<<",
4580                              _task_id, _time_target_ms, elapsed_time_ms);
4581     }
4582   }
4583 
4584   _claimed = false;
4585 }
4586 
4587 CMTask::CMTask(int task_id,
4588                ConcurrentMark* cm,
4589                CMTaskQueue* task_queue,
4590                CMTaskQueueSet* task_queues)
4591   : _g1h(G1CollectedHeap::heap()),
4592     _task_id(task_id), _cm(cm),
4593     _claimed(false),
4594     _nextMarkBitMap(NULL), _hash_seed(17),
4595     _task_queue(task_queue),
4596     _task_queues(task_queues),
4597     _cm_oop_closure(NULL),
4598     _aborted_region(MemRegion()) {
4599   guarantee(task_queue != NULL, "invariant");
4600   guarantee(task_queues != NULL, "invariant");
4601 
4602   statsOnly( _clock_due_to_scanning = 0;
4603              _clock_due_to_marking  = 0 );
4604 
4605   _marking_step_diffs_ms.add(0.5);
4606 }
4607 
4608 // These are formatting macros that are used below to ensure
4609 // consistent formatting. The *_H_* versions are used to format the
4610 // header for a particular value and they should be kept consistent
4611 // with the corresponding macro. Also note that most of the macros add
4612 // the necessary white space (as a prefix) which makes them a bit
4613 // easier to compose.
4614 
4615 // All the output lines are prefixed with this string to be able to
4616 // identify them easily in a large log file.
4617 #define G1PPRL_LINE_PREFIX            "###"
4618 
4619 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4620 #ifdef _LP64
4621 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4622 #else // _LP64
4623 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4624 #endif // _LP64
4625 
4626 // For per-region info
4627 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4628 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4629 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4630 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4631 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4632 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4633 
4634 // For summary info
4635 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4636 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4637 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4638 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4639 
4640 G1PrintRegionLivenessInfoClosure::
4641 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4642   : _out(out),
4643     _total_used_bytes(0), _total_capacity_bytes(0),
4644     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4645     _hum_used_bytes(0), _hum_capacity_bytes(0),
4646     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4647   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4648   MemRegion g1_committed = g1h->g1_committed();
4649   MemRegion g1_reserved = g1h->g1_reserved();
4650   double now = os::elapsedTime();
4651 
4652   // Print the header of the output.
4653   _out->cr();
4654   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4655   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4656                  G1PPRL_SUM_ADDR_FORMAT("committed")
4657                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4658                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4659                  g1_committed.start(), g1_committed.end(),
4660                  g1_reserved.start(), g1_reserved.end(),
4661                  HeapRegion::GrainBytes);
4662   _out->print_cr(G1PPRL_LINE_PREFIX);
4663   _out->print_cr(G1PPRL_LINE_PREFIX
4664                  G1PPRL_TYPE_H_FORMAT
4665                  G1PPRL_ADDR_BASE_H_FORMAT
4666                  G1PPRL_BYTE_H_FORMAT
4667                  G1PPRL_BYTE_H_FORMAT
4668                  G1PPRL_BYTE_H_FORMAT
4669                  G1PPRL_DOUBLE_H_FORMAT,
4670                  "type", "address-range",
4671                  "used", "prev-live", "next-live", "gc-eff");
4672   _out->print_cr(G1PPRL_LINE_PREFIX
4673                  G1PPRL_TYPE_H_FORMAT
4674                  G1PPRL_ADDR_BASE_H_FORMAT
4675                  G1PPRL_BYTE_H_FORMAT
4676                  G1PPRL_BYTE_H_FORMAT
4677                  G1PPRL_BYTE_H_FORMAT
4678                  G1PPRL_DOUBLE_H_FORMAT,
4679                  "", "",
4680                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4681 }
4682 
4683 // It takes as a parameter a reference to one of the _hum_* fields, it
4684 // deduces the corresponding value for a region in a humongous region
4685 // series (either the region size, or what's left if the _hum_* field
4686 // is < the region size), and updates the _hum_* field accordingly.
4687 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4688   size_t bytes = 0;
4689   // The > 0 check is to deal with the prev and next live bytes which
4690   // could be 0.
4691   if (*hum_bytes > 0) {
4692     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4693     *hum_bytes -= bytes;
4694   }
4695   return bytes;
4696 }
4697 
4698 // It deduces the values for a region in a humongous region series
4699 // from the _hum_* fields and updates those accordingly. It assumes
4700 // that that _hum_* fields have already been set up from the "starts
4701 // humongous" region and we visit the regions in address order.
4702 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4703                                                      size_t* capacity_bytes,
4704                                                      size_t* prev_live_bytes,
4705                                                      size_t* next_live_bytes) {
4706   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4707   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4708   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4709   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4710   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4711 }
4712 
4713 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4714   const char* type = "";
4715   HeapWord* bottom       = r->bottom();
4716   HeapWord* end          = r->end();
4717   size_t capacity_bytes  = r->capacity();
4718   size_t used_bytes      = r->used();
4719   size_t prev_live_bytes = r->live_bytes();
4720   size_t next_live_bytes = r->next_live_bytes();
4721   double gc_eff          = r->gc_efficiency();
4722   if (r->used() == 0) {
4723     type = "FREE";
4724   } else if (r->is_survivor()) {
4725     type = "SURV";
4726   } else if (r->is_young()) {
4727     type = "EDEN";
4728   } else if (r->startsHumongous()) {
4729     type = "HUMS";
4730 
4731     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4732            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4733            "they should have been zeroed after the last time we used them");
4734     // Set up the _hum_* fields.
4735     _hum_capacity_bytes  = capacity_bytes;
4736     _hum_used_bytes      = used_bytes;
4737     _hum_prev_live_bytes = prev_live_bytes;
4738     _hum_next_live_bytes = next_live_bytes;
4739     get_hum_bytes(&used_bytes, &capacity_bytes,
4740                   &prev_live_bytes, &next_live_bytes);
4741     end = bottom + HeapRegion::GrainWords;
4742   } else if (r->continuesHumongous()) {
4743     type = "HUMC";
4744     get_hum_bytes(&used_bytes, &capacity_bytes,
4745                   &prev_live_bytes, &next_live_bytes);
4746     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4747   } else {
4748     type = "OLD";
4749   }
4750 
4751   _total_used_bytes      += used_bytes;
4752   _total_capacity_bytes  += capacity_bytes;
4753   _total_prev_live_bytes += prev_live_bytes;
4754   _total_next_live_bytes += next_live_bytes;
4755 
4756   // Print a line for this particular region.
4757   _out->print_cr(G1PPRL_LINE_PREFIX
4758                  G1PPRL_TYPE_FORMAT
4759                  G1PPRL_ADDR_BASE_FORMAT
4760                  G1PPRL_BYTE_FORMAT
4761                  G1PPRL_BYTE_FORMAT
4762                  G1PPRL_BYTE_FORMAT
4763                  G1PPRL_DOUBLE_FORMAT,
4764                  type, bottom, end,
4765                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4766 
4767   return false;
4768 }
4769 
4770 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4771   // Print the footer of the output.
4772   _out->print_cr(G1PPRL_LINE_PREFIX);
4773   _out->print_cr(G1PPRL_LINE_PREFIX
4774                  " SUMMARY"
4775                  G1PPRL_SUM_MB_FORMAT("capacity")
4776                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4777                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4778                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4779                  bytes_to_mb(_total_capacity_bytes),
4780                  bytes_to_mb(_total_used_bytes),
4781                  perc(_total_used_bytes, _total_capacity_bytes),
4782                  bytes_to_mb(_total_prev_live_bytes),
4783                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4784                  bytes_to_mb(_total_next_live_bytes),
4785                  perc(_total_next_live_bytes, _total_capacity_bytes));
4786   _out->cr();
4787 }