1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 size_t ConcurrentMark::scale_parallel_threads(size_t n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, (size_t)1);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 466                                int max_regions) :
 467   _markBitMap1(rs, MinObjAlignment - 1),
 468   _markBitMap2(rs, MinObjAlignment - 1),
 469 
 470   _parallel_marking_threads(0),
 471   _max_parallel_marking_threads(0),
 472   _sleep_factor(0.0),
 473   _marking_task_overhead(1.0),
 474   _cleanup_sleep_factor(0.0),
 475   _cleanup_task_overhead(1.0),
 476   _cleanup_list("Cleanup List"),
 477   _region_bm(max_regions, false /* in_resource_area*/),
 478   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 479            CardTableModRefBS::card_shift,
 480            false /* in_resource_area*/),
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483   _at_least_one_mark_complete(false),
 484 
 485   _markStack(this),
 486   _regionStack(),
 487   // _finger set in set_non_marking_state
 488 
 489   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 490   // _active_tasks set in set_non_marking_state
 491   // _tasks set inside the constructor
 492   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 493   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 494 
 495   _has_overflown(false),
 496   _concurrent(false),
 497   _has_aborted(false),
 498   _restart_for_overflow(false),
 499   _concurrent_marking_in_progress(false),
 500   _should_gray_objects(false),
 501 
 502   // _verbose_level set below
 503 
 504   _init_times(),
 505   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 506   _cleanup_times(),
 507   _total_counting_time(0.0),
 508   _total_rs_scrub_time(0.0),
 509 
 510   _parallel_workers(NULL) {
 511   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 512   if (verbose_level < no_verbose) {
 513     verbose_level = no_verbose;
 514   }
 515   if (verbose_level > high_verbose) {
 516     verbose_level = high_verbose;
 517   }
 518   _verbose_level = verbose_level;
 519 
 520   if (verbose_low()) {
 521     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 522                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 523   }
 524 
 525   _markStack.allocate(MarkStackSize);
 526   _regionStack.allocate(G1MarkRegionStackSize);
 527 
 528   // Create & start a ConcurrentMark thread.
 529   _cmThread = new ConcurrentMarkThread(this);
 530   assert(cmThread() != NULL, "CM Thread should have been created");
 531   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 532 
 533   _g1h = G1CollectedHeap::heap();
 534   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 535   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 536   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 537 
 538   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 539   satb_qs.set_buffer_size(G1SATBBufferSize);
 540 
 541   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 542   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 543 
 544   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 545   _active_tasks = _max_task_num;
 546   for (int i = 0; i < (int) _max_task_num; ++i) {
 547     CMTaskQueue* task_queue = new CMTaskQueue();
 548     task_queue->initialize();
 549     _task_queues->register_queue(i, task_queue);
 550 
 551     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 552     _accum_task_vtime[i] = 0.0;
 553   }
 554 
 555   if (ConcGCThreads > ParallelGCThreads) {
 556     vm_exit_during_initialization("Can't have more ConcGCThreads "
 557                                   "than ParallelGCThreads.");
 558   }
 559   if (ParallelGCThreads == 0) {
 560     // if we are not running with any parallel GC threads we will not
 561     // spawn any marking threads either
 562     _parallel_marking_threads =       0;
 563     _max_parallel_marking_threads =   0;
 564     _sleep_factor             =     0.0;
 565     _marking_task_overhead    =     1.0;
 566   } else {
 567     if (ConcGCThreads > 0) {
 568       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 569       // if both are set
 570 
 571       _parallel_marking_threads = ConcGCThreads;
 572       _max_parallel_marking_threads = _parallel_marking_threads;
 573       _sleep_factor             = 0.0;
 574       _marking_task_overhead    = 1.0;
 575     } else if (G1MarkingOverheadPercent > 0) {
 576       // we will calculate the number of parallel marking threads
 577       // based on a target overhead with respect to the soft real-time
 578       // goal
 579 
 580       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 581       double overall_cm_overhead =
 582         (double) MaxGCPauseMillis * marking_overhead /
 583         (double) GCPauseIntervalMillis;
 584       double cpu_ratio = 1.0 / (double) os::processor_count();
 585       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 586       double marking_task_overhead =
 587         overall_cm_overhead / marking_thread_num *
 588                                                 (double) os::processor_count();
 589       double sleep_factor =
 590                          (1.0 - marking_task_overhead) / marking_task_overhead;
 591 
 592       _parallel_marking_threads = (size_t) marking_thread_num;
 593       _max_parallel_marking_threads = _parallel_marking_threads;
 594       _sleep_factor             = sleep_factor;
 595       _marking_task_overhead    = marking_task_overhead;
 596     } else {
 597       _parallel_marking_threads = scale_parallel_threads(ParallelGCThreads);
 598       _max_parallel_marking_threads = _parallel_marking_threads;
 599       _sleep_factor             = 0.0;
 600       _marking_task_overhead    = 1.0;
 601     }
 602 
 603     if (parallel_marking_threads() > 1) {
 604       _cleanup_task_overhead = 1.0;
 605     } else {
 606       _cleanup_task_overhead = marking_task_overhead();
 607     }
 608     _cleanup_sleep_factor =
 609                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 610 
 611 #if 0
 612     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 613     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 614     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 615     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 616     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 617 #endif
 618 
 619     guarantee(parallel_marking_threads() > 0, "peace of mind");
 620     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 621          (int) _max_parallel_marking_threads, false, true);
 622     if (_parallel_workers == NULL) {
 623       vm_exit_during_initialization("Failed necessary allocation.");
 624     } else {
 625       _parallel_workers->initialize_workers();
 626     }
 627   }
 628 
 629   // so that the call below can read a sensible value
 630   _heap_start = (HeapWord*) rs.base();
 631   set_non_marking_state();
 632 }
 633 
 634 void ConcurrentMark::update_g1_committed(bool force) {
 635   // If concurrent marking is not in progress, then we do not need to
 636   // update _heap_end. This has a subtle and important
 637   // side-effect. Imagine that two evacuation pauses happen between
 638   // marking completion and remark. The first one can grow the
 639   // heap (hence now the finger is below the heap end). Then, the
 640   // second one could unnecessarily push regions on the region
 641   // stack. This causes the invariant that the region stack is empty
 642   // at the beginning of remark to be false. By ensuring that we do
 643   // not observe heap expansions after marking is complete, then we do
 644   // not have this problem.
 645   if (!concurrent_marking_in_progress() && !force) return;
 646 
 647   MemRegion committed = _g1h->g1_committed();
 648   assert(committed.start() == _heap_start, "start shouldn't change");
 649   HeapWord* new_end = committed.end();
 650   if (new_end > _heap_end) {
 651     // The heap has been expanded.
 652 
 653     _heap_end = new_end;
 654   }
 655   // Notice that the heap can also shrink. However, this only happens
 656   // during a Full GC (at least currently) and the entire marking
 657   // phase will bail out and the task will not be restarted. So, let's
 658   // do nothing.
 659 }
 660 
 661 void ConcurrentMark::reset() {
 662   // Starting values for these two. This should be called in a STW
 663   // phase. CM will be notified of any future g1_committed expansions
 664   // will be at the end of evacuation pauses, when tasks are
 665   // inactive.
 666   MemRegion committed = _g1h->g1_committed();
 667   _heap_start = committed.start();
 668   _heap_end   = committed.end();
 669 
 670   // Separated the asserts so that we know which one fires.
 671   assert(_heap_start != NULL, "heap bounds should look ok");
 672   assert(_heap_end != NULL, "heap bounds should look ok");
 673   assert(_heap_start < _heap_end, "heap bounds should look ok");
 674 
 675   // reset all the marking data structures and any necessary flags
 676   clear_marking_state();
 677 
 678   if (verbose_low()) {
 679     gclog_or_tty->print_cr("[global] resetting");
 680   }
 681 
 682   // We do reset all of them, since different phases will use
 683   // different number of active threads. So, it's easiest to have all
 684   // of them ready.
 685   for (int i = 0; i < (int) _max_task_num; ++i) {
 686     _tasks[i]->reset(_nextMarkBitMap);
 687   }
 688 
 689   // we need this to make sure that the flag is on during the evac
 690   // pause with initial mark piggy-backed
 691   set_concurrent_marking_in_progress();
 692 }
 693 
 694 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 695   assert(active_tasks <= _max_task_num, "we should not have more");
 696 
 697   _active_tasks = active_tasks;
 698   // Need to update the three data structures below according to the
 699   // number of active threads for this phase.
 700   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 701   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 702   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 703 
 704   _concurrent = concurrent;
 705   // We propagate this to all tasks, not just the active ones.
 706   for (int i = 0; i < (int) _max_task_num; ++i)
 707     _tasks[i]->set_concurrent(concurrent);
 708 
 709   if (concurrent) {
 710     set_concurrent_marking_in_progress();
 711   } else {
 712     // We currently assume that the concurrent flag has been set to
 713     // false before we start remark. At this point we should also be
 714     // in a STW phase.
 715     assert(!concurrent_marking_in_progress(), "invariant");
 716     assert(_finger == _heap_end, "only way to get here");
 717     update_g1_committed(true);
 718   }
 719 }
 720 
 721 void ConcurrentMark::set_non_marking_state() {
 722   // We set the global marking state to some default values when we're
 723   // not doing marking.
 724   clear_marking_state();
 725   _active_tasks = 0;
 726   clear_concurrent_marking_in_progress();
 727 }
 728 
 729 ConcurrentMark::~ConcurrentMark() {
 730   for (int i = 0; i < (int) _max_task_num; ++i) {
 731     delete _task_queues->queue(i);
 732     delete _tasks[i];
 733   }
 734   delete _task_queues;
 735   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 736 }
 737 
 738 // This closure is used to mark refs into the g1 generation
 739 // from external roots in the CMS bit map.
 740 // Called at the first checkpoint.
 741 //
 742 
 743 void ConcurrentMark::clearNextBitmap() {
 744   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 745   G1CollectorPolicy* g1p = g1h->g1_policy();
 746 
 747   // Make sure that the concurrent mark thread looks to still be in
 748   // the current cycle.
 749   guarantee(cmThread()->during_cycle(), "invariant");
 750 
 751   // We are finishing up the current cycle by clearing the next
 752   // marking bitmap and getting it ready for the next cycle. During
 753   // this time no other cycle can start. So, let's make sure that this
 754   // is the case.
 755   guarantee(!g1h->mark_in_progress(), "invariant");
 756 
 757   // clear the mark bitmap (no grey objects to start with).
 758   // We need to do this in chunks and offer to yield in between
 759   // each chunk.
 760   HeapWord* start  = _nextMarkBitMap->startWord();
 761   HeapWord* end    = _nextMarkBitMap->endWord();
 762   HeapWord* cur    = start;
 763   size_t chunkSize = M;
 764   while (cur < end) {
 765     HeapWord* next = cur + chunkSize;
 766     if (next > end) {
 767       next = end;
 768     }
 769     MemRegion mr(cur,next);
 770     _nextMarkBitMap->clearRange(mr);
 771     cur = next;
 772     do_yield_check();
 773 
 774     // Repeat the asserts from above. We'll do them as asserts here to
 775     // minimize their overhead on the product. However, we'll have
 776     // them as guarantees at the beginning / end of the bitmap
 777     // clearing to get some checking in the product.
 778     assert(cmThread()->during_cycle(), "invariant");
 779     assert(!g1h->mark_in_progress(), "invariant");
 780   }
 781 
 782   // Repeat the asserts from above.
 783   guarantee(cmThread()->during_cycle(), "invariant");
 784   guarantee(!g1h->mark_in_progress(), "invariant");
 785 }
 786 
 787 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 788 public:
 789   bool doHeapRegion(HeapRegion* r) {
 790     if (!r->continuesHumongous()) {
 791       r->note_start_of_marking(true);
 792     }
 793     return false;
 794   }
 795 };
 796 
 797 void ConcurrentMark::checkpointRootsInitialPre() {
 798   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 799   G1CollectorPolicy* g1p = g1h->g1_policy();
 800 
 801   _has_aborted = false;
 802 
 803 #ifndef PRODUCT
 804   if (G1PrintReachableAtInitialMark) {
 805     print_reachable("at-cycle-start",
 806                     VerifyOption_G1UsePrevMarking, true /* all */);
 807   }
 808 #endif
 809 
 810   // Initialise marking structures. This has to be done in a STW phase.
 811   reset();
 812 }
 813 
 814 
 815 void ConcurrentMark::checkpointRootsInitialPost() {
 816   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 817 
 818   // If we force an overflow during remark, the remark operation will
 819   // actually abort and we'll restart concurrent marking. If we always
 820   // force an oveflow during remark we'll never actually complete the
 821   // marking phase. So, we initilize this here, at the start of the
 822   // cycle, so that at the remaining overflow number will decrease at
 823   // every remark and we'll eventually not need to cause one.
 824   force_overflow_stw()->init();
 825 
 826   // For each region note start of marking.
 827   NoteStartOfMarkHRClosure startcl;
 828   g1h->heap_region_iterate(&startcl);
 829 
 830   // Start Concurrent Marking weak-reference discovery.
 831   ReferenceProcessor* rp = g1h->ref_processor_cm();
 832   // enable ("weak") refs discovery
 833   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 834   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 835 
 836   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 837   // This is the start of  the marking cycle, we're expected all
 838   // threads to have SATB queues with active set to false.
 839   satb_mq_set.set_active_all_threads(true, /* new active value */
 840                                      false /* expected_active */);
 841 
 842   // update_g1_committed() will be called at the end of an evac pause
 843   // when marking is on. So, it's also called at the end of the
 844   // initial-mark pause to update the heap end, if the heap expands
 845   // during it. No need to call it here.
 846 }
 847 
 848 /*
 849  * Notice that in the next two methods, we actually leave the STS
 850  * during the barrier sync and join it immediately afterwards. If we
 851  * do not do this, the following deadlock can occur: one thread could
 852  * be in the barrier sync code, waiting for the other thread to also
 853  * sync up, whereas another one could be trying to yield, while also
 854  * waiting for the other threads to sync up too.
 855  *
 856  * Note, however, that this code is also used during remark and in
 857  * this case we should not attempt to leave / enter the STS, otherwise
 858  * we'll either hit an asseert (debug / fastdebug) or deadlock
 859  * (product). So we should only leave / enter the STS if we are
 860  * operating concurrently.
 861  *
 862  * Because the thread that does the sync barrier has left the STS, it
 863  * is possible to be suspended for a Full GC or an evacuation pause
 864  * could occur. This is actually safe, since the entering the sync
 865  * barrier is one of the last things do_marking_step() does, and it
 866  * doesn't manipulate any data structures afterwards.
 867  */
 868 
 869 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 870   if (verbose_low()) {
 871     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 872   }
 873 
 874   if (concurrent()) {
 875     ConcurrentGCThread::stsLeave();
 876   }
 877   _first_overflow_barrier_sync.enter();
 878   if (concurrent()) {
 879     ConcurrentGCThread::stsJoin();
 880   }
 881   // at this point everyone should have synced up and not be doing any
 882   // more work
 883 
 884   if (verbose_low()) {
 885     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 886   }
 887 
 888   // let task 0 do this
 889   if (task_num == 0) {
 890     // task 0 is responsible for clearing the global data structures
 891     // We should be here because of an overflow. During STW we should
 892     // not clear the overflow flag since we rely on it being true when
 893     // we exit this method to abort the pause and restart concurent
 894     // marking.
 895     clear_marking_state(concurrent() /* clear_overflow */);
 896     force_overflow()->update();
 897 
 898     if (PrintGC) {
 899       gclog_or_tty->date_stamp(PrintGCDateStamps);
 900       gclog_or_tty->stamp(PrintGCTimeStamps);
 901       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 902     }
 903   }
 904 
 905   // after this, each task should reset its own data structures then
 906   // then go into the second barrier
 907 }
 908 
 909 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 910   if (verbose_low()) {
 911     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 912   }
 913 
 914   if (concurrent()) {
 915     ConcurrentGCThread::stsLeave();
 916   }
 917   _second_overflow_barrier_sync.enter();
 918   if (concurrent()) {
 919     ConcurrentGCThread::stsJoin();
 920   }
 921   // at this point everything should be re-initialised and ready to go
 922 
 923   if (verbose_low()) {
 924     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 925   }
 926 }
 927 
 928 #ifndef PRODUCT
 929 void ForceOverflowSettings::init() {
 930   _num_remaining = G1ConcMarkForceOverflow;
 931   _force = false;
 932   update();
 933 }
 934 
 935 void ForceOverflowSettings::update() {
 936   if (_num_remaining > 0) {
 937     _num_remaining -= 1;
 938     _force = true;
 939   } else {
 940     _force = false;
 941   }
 942 }
 943 
 944 bool ForceOverflowSettings::should_force() {
 945   if (_force) {
 946     _force = false;
 947     return true;
 948   } else {
 949     return false;
 950   }
 951 }
 952 #endif // !PRODUCT
 953 
 954 void ConcurrentMark::grayRoot(oop p) {
 955   HeapWord* addr = (HeapWord*) p;
 956   // We can't really check against _heap_start and _heap_end, since it
 957   // is possible during an evacuation pause with piggy-backed
 958   // initial-mark that the committed space is expanded during the
 959   // pause without CM observing this change. So the assertions below
 960   // is a bit conservative; but better than nothing.
 961   assert(_g1h->g1_committed().contains(addr),
 962          "address should be within the heap bounds");
 963 
 964   if (!_nextMarkBitMap->isMarked(addr)) {
 965     _nextMarkBitMap->parMark(addr);
 966   }
 967 }
 968 
 969 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 970   // The objects on the region have already been marked "in bulk" by
 971   // the caller. We only need to decide whether to push the region on
 972   // the region stack or not.
 973 
 974   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 975     // We're done with marking and waiting for remark. We do not need to
 976     // push anything else on the region stack.
 977     return;
 978   }
 979 
 980   HeapWord* finger = _finger;
 981 
 982   if (verbose_low()) {
 983     gclog_or_tty->print_cr("[global] attempting to push "
 984                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 985                            PTR_FORMAT, mr.start(), mr.end(), finger);
 986   }
 987 
 988   if (mr.start() < finger) {
 989     // The finger is always heap region aligned and it is not possible
 990     // for mr to span heap regions.
 991     assert(mr.end() <= finger, "invariant");
 992 
 993     // Separated the asserts so that we know which one fires.
 994     assert(mr.start() <= mr.end(),
 995            "region boundaries should fall within the committed space");
 996     assert(_heap_start <= mr.start(),
 997            "region boundaries should fall within the committed space");
 998     assert(mr.end() <= _heap_end,
 999            "region boundaries should fall within the committed space");
1000     if (verbose_low()) {
1001       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1002                              "below the finger, pushing it",
1003                              mr.start(), mr.end());
1004     }
1005 
1006     if (!region_stack_push_lock_free(mr)) {
1007       if (verbose_low()) {
1008         gclog_or_tty->print_cr("[global] region stack has overflown.");
1009       }
1010     }
1011   }
1012 }
1013 
1014 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1015   // The object is not marked by the caller. We need to at least mark
1016   // it and maybe push in on the stack.
1017 
1018   HeapWord* addr = (HeapWord*)p;
1019   if (!_nextMarkBitMap->isMarked(addr)) {
1020     // We definitely need to mark it, irrespective whether we bail out
1021     // because we're done with marking.
1022     if (_nextMarkBitMap->parMark(addr)) {
1023       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1024         // If we're done with concurrent marking and we're waiting for
1025         // remark, then we're not pushing anything on the stack.
1026         return;
1027       }
1028 
1029       // No OrderAccess:store_load() is needed. It is implicit in the
1030       // CAS done in parMark(addr) above
1031       HeapWord* finger = _finger;
1032 
1033       if (addr < finger) {
1034         if (!mark_stack_push(oop(addr))) {
1035           if (verbose_low()) {
1036             gclog_or_tty->print_cr("[global] global stack overflow "
1037                                    "during parMark");
1038           }
1039         }
1040       }
1041     }
1042   }
1043 }
1044 
1045 class CMConcurrentMarkingTask: public AbstractGangTask {
1046 private:
1047   ConcurrentMark*       _cm;
1048   ConcurrentMarkThread* _cmt;
1049 
1050 public:
1051   void work(int worker_i) {
1052     assert(Thread::current()->is_ConcurrentGC_thread(),
1053            "this should only be done by a conc GC thread");
1054     ResourceMark rm;
1055 
1056     double start_vtime = os::elapsedVTime();
1057 
1058     ConcurrentGCThread::stsJoin();
1059 
1060     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1061     CMTask* the_task = _cm->task(worker_i);
1062     the_task->record_start_time();
1063     if (!_cm->has_aborted()) {
1064       do {
1065         double start_vtime_sec = os::elapsedVTime();
1066         double start_time_sec = os::elapsedTime();
1067         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1068 
1069         the_task->do_marking_step(mark_step_duration_ms,
1070                                   true /* do_stealing    */,
1071                                   true /* do_termination */);
1072 
1073         double end_time_sec = os::elapsedTime();
1074         double end_vtime_sec = os::elapsedVTime();
1075         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1076         double elapsed_time_sec = end_time_sec - start_time_sec;
1077         _cm->clear_has_overflown();
1078 
1079         bool ret = _cm->do_yield_check(worker_i);
1080 
1081         jlong sleep_time_ms;
1082         if (!_cm->has_aborted() && the_task->has_aborted()) {
1083           sleep_time_ms =
1084             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1085           ConcurrentGCThread::stsLeave();
1086           os::sleep(Thread::current(), sleep_time_ms, false);
1087           ConcurrentGCThread::stsJoin();
1088         }
1089         double end_time2_sec = os::elapsedTime();
1090         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1091 
1092 #if 0
1093           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1094                                  "overhead %1.4lf",
1095                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1096                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1097           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1098                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1099 #endif
1100       } while (!_cm->has_aborted() && the_task->has_aborted());
1101     }
1102     the_task->record_end_time();
1103     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1104 
1105     ConcurrentGCThread::stsLeave();
1106 
1107     double end_vtime = os::elapsedVTime();
1108     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1109   }
1110 
1111   CMConcurrentMarkingTask(ConcurrentMark* cm,
1112                           ConcurrentMarkThread* cmt) :
1113       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1114 
1115   ~CMConcurrentMarkingTask() { }
1116 };
1117 
1118 // Calculates the number of active workers for a concurrent
1119 // phase.
1120 size_t ConcurrentMark::calc_parallel_marking_threads() {
1121   if (G1CollectedHeap::use_parallel_gc_threads()) {
1122     size_t n_conc_workers = 0;
1123     if (!UseDynamicNumberOfGCThreads ||
1124         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1125          !ForceDynamicNumberOfGCThreads)) {
1126       n_conc_workers = max_parallel_marking_threads();
1127     } else {
1128       n_conc_workers =
1129         AdaptiveSizePolicy::calc_default_active_workers(
1130                                      max_parallel_marking_threads(),
1131                                      1, /* Minimum workers */
1132                                      parallel_marking_threads(),
1133                                      Threads::number_of_non_daemon_threads());
1134       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1135       // that scaling has already gone into "_max_parallel_marking_threads".
1136     }
1137     assert(n_conc_workers > 0, "Always need at least 1");
1138     return n_conc_workers;
1139   }
1140   // If we are not running with any parallel GC threads we will not
1141   // have spawned any marking threads either. Hence the number of
1142   // concurrent workers should be 0.
1143   return 0;
1144 }
1145 
1146 void ConcurrentMark::markFromRoots() {
1147   // we might be tempted to assert that:
1148   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1149   //        "inconsistent argument?");
1150   // However that wouldn't be right, because it's possible that
1151   // a safepoint is indeed in progress as a younger generation
1152   // stop-the-world GC happens even as we mark in this generation.
1153 
1154   _restart_for_overflow = false;
1155   force_overflow_conc()->init();
1156 
1157   // _g1h has _n_par_threads
1158   _parallel_marking_threads = calc_parallel_marking_threads();
1159   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1160     "Maximum number of marking threads exceeded");
1161 
1162   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1163 
1164   // Parallel task terminator is set in "set_phase()"
1165   set_phase(active_workers, true /* concurrent */);
1166 
1167   CMConcurrentMarkingTask markingTask(this, cmThread());
1168   if (parallel_marking_threads() > 0) {
1169     _parallel_workers->set_active_workers((int)active_workers);
1170     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1171     // and the decisions on that MT processing is made elsewhere.
1172     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1173     _parallel_workers->run_task(&markingTask);
1174   } else {
1175     markingTask.work(0);
1176   }
1177   print_stats();
1178 }
1179 
1180 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1181   // world is stopped at this checkpoint
1182   assert(SafepointSynchronize::is_at_safepoint(),
1183          "world should be stopped");
1184 
1185   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1186 
1187   // If a full collection has happened, we shouldn't do this.
1188   if (has_aborted()) {
1189     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1190     return;
1191   }
1192 
1193   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1194 
1195   if (VerifyDuringGC) {
1196     HandleMark hm;  // handle scope
1197     gclog_or_tty->print(" VerifyDuringGC:(before)");
1198     Universe::heap()->prepare_for_verify();
1199     Universe::verify(/* allow dirty */ true,
1200                      /* silent      */ false,
1201                      /* option      */ VerifyOption_G1UsePrevMarking);
1202   }
1203 
1204   G1CollectorPolicy* g1p = g1h->g1_policy();
1205   g1p->record_concurrent_mark_remark_start();
1206 
1207   double start = os::elapsedTime();
1208 
1209   checkpointRootsFinalWork();
1210 
1211   double mark_work_end = os::elapsedTime();
1212 
1213   weakRefsWork(clear_all_soft_refs);
1214 
1215   if (has_overflown()) {
1216     // Oops.  We overflowed.  Restart concurrent marking.
1217     _restart_for_overflow = true;
1218     // Clear the flag. We do not need it any more.
1219     clear_has_overflown();
1220     if (G1TraceMarkStackOverflow) {
1221       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1222     }
1223   } else {
1224     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225     // We're done with marking.
1226     // This is the end of  the marking cycle, we're expected all
1227     // threads to have SATB queues with active set to true.
1228     satb_mq_set.set_active_all_threads(false, /* new active value */
1229                                        true /* expected_active */);
1230 
1231     if (VerifyDuringGC) {
1232 
1233       HandleMark hm;  // handle scope
1234       gclog_or_tty->print(" VerifyDuringGC:(after)");
1235       Universe::heap()->prepare_for_verify();
1236       Universe::verify(/* allow dirty */ true,
1237                        /* silent      */ false,
1238                        /* option      */ VerifyOption_G1UseNextMarking);
1239     }
1240     assert(!restart_for_overflow(), "sanity");
1241   }
1242 
1243   // Reset the marking state if marking completed
1244   if (!restart_for_overflow()) {
1245     set_non_marking_state();
1246   }
1247 
1248 #if VERIFY_OBJS_PROCESSED
1249   _scan_obj_cl.objs_processed = 0;
1250   ThreadLocalObjQueue::objs_enqueued = 0;
1251 #endif
1252 
1253   // Statistics
1254   double now = os::elapsedTime();
1255   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1256   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1257   _remark_times.add((now - start) * 1000.0);
1258 
1259   g1p->record_concurrent_mark_remark_end();
1260 }
1261 
1262 #define CARD_BM_TEST_MODE 0
1263 
1264 class CalcLiveObjectsClosure: public HeapRegionClosure {
1265 
1266   CMBitMapRO* _bm;
1267   ConcurrentMark* _cm;
1268   bool _changed;
1269   bool _yield;
1270   size_t _words_done;
1271   size_t _tot_live;
1272   size_t _tot_used;
1273   size_t _regions_done;
1274   double _start_vtime_sec;
1275 
1276   BitMap* _region_bm;
1277   BitMap* _card_bm;
1278   intptr_t _bottom_card_num;
1279   bool _final;
1280 
1281   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1282     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1283 #if CARD_BM_TEST_MODE
1284       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1285 #else
1286       _card_bm->par_at_put(i - _bottom_card_num, 1);
1287 #endif
1288     }
1289   }
1290 
1291 public:
1292   CalcLiveObjectsClosure(bool final,
1293                          CMBitMapRO *bm, ConcurrentMark *cm,
1294                          BitMap* region_bm, BitMap* card_bm) :
1295     _bm(bm), _cm(cm), _changed(false), _yield(true),
1296     _words_done(0), _tot_live(0), _tot_used(0),
1297     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1298     _regions_done(0), _start_vtime_sec(0.0)
1299   {
1300     _bottom_card_num =
1301       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1302                CardTableModRefBS::card_shift);
1303   }
1304 
1305   // It takes a region that's not empty (i.e., it has at least one
1306   // live object in it and sets its corresponding bit on the region
1307   // bitmap to 1. If the region is "starts humongous" it will also set
1308   // to 1 the bits on the region bitmap that correspond to its
1309   // associated "continues humongous" regions.
1310   void set_bit_for_region(HeapRegion* hr) {
1311     assert(!hr->continuesHumongous(), "should have filtered those out");
1312 
1313     size_t index = hr->hrs_index();
1314     if (!hr->startsHumongous()) {
1315       // Normal (non-humongous) case: just set the bit.
1316       _region_bm->par_at_put((BitMap::idx_t) index, true);
1317     } else {
1318       // Starts humongous case: calculate how many regions are part of
1319       // this humongous region and then set the bit range. It might
1320       // have been a bit more efficient to look at the object that
1321       // spans these humongous regions to calculate their number from
1322       // the object's size. However, it's a good idea to calculate
1323       // this based on the metadata itself, and not the region
1324       // contents, so that this code is not aware of what goes into
1325       // the humongous regions (in case this changes in the future).
1326       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1327       size_t end_index = index + 1;
1328       while (end_index < g1h->n_regions()) {
1329         HeapRegion* chr = g1h->region_at(end_index);
1330         if (!chr->continuesHumongous()) break;
1331         end_index += 1;
1332       }
1333       _region_bm->par_at_put_range((BitMap::idx_t) index,
1334                                    (BitMap::idx_t) end_index, true);
1335     }
1336   }
1337 
1338   bool doHeapRegion(HeapRegion* hr) {
1339     if (!_final && _regions_done == 0) {
1340       _start_vtime_sec = os::elapsedVTime();
1341     }
1342 
1343     if (hr->continuesHumongous()) {
1344       // We will ignore these here and process them when their
1345       // associated "starts humongous" region is processed (see
1346       // set_bit_for_heap_region()). Note that we cannot rely on their
1347       // associated "starts humongous" region to have their bit set to
1348       // 1 since, due to the region chunking in the parallel region
1349       // iteration, a "continues humongous" region might be visited
1350       // before its associated "starts humongous".
1351       return false;
1352     }
1353 
1354     HeapWord* nextTop = hr->next_top_at_mark_start();
1355     HeapWord* start   = hr->top_at_conc_mark_count();
1356     assert(hr->bottom() <= start && start <= hr->end() &&
1357            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358            start <= nextTop,
1359            "Preconditions.");
1360     // Otherwise, record the number of word's we'll examine.
1361     size_t words_done = (nextTop - start);
1362     // Find the first marked object at or after "start".
1363     start = _bm->getNextMarkedWordAddress(start, nextTop);
1364     size_t marked_bytes = 0;
1365 
1366     // Below, the term "card num" means the result of shifting an address
1367     // by the card shift -- address 0 corresponds to card number 0.  One
1368     // must subtract the card num of the bottom of the heap to obtain a
1369     // card table index.
1370     // The first card num of the sequence of live cards currently being
1371     // constructed.  -1 ==> no sequence.
1372     intptr_t start_card_num = -1;
1373     // The last card num of the sequence of live cards currently being
1374     // constructed.  -1 ==> no sequence.
1375     intptr_t last_card_num = -1;
1376 
1377     while (start < nextTop) {
1378       if (_yield && _cm->do_yield_check()) {
1379         // We yielded.  It might be for a full collection, in which case
1380         // all bets are off; terminate the traversal.
1381         if (_cm->has_aborted()) {
1382           _changed = false;
1383           return true;
1384         } else {
1385           // Otherwise, it might be a collection pause, and the region
1386           // we're looking at might be in the collection set.  We'll
1387           // abandon this region.
1388           return false;
1389         }
1390       }
1391       oop obj = oop(start);
1392       int obj_sz = obj->size();
1393       // The card num of the start of the current object.
1394       intptr_t obj_card_num =
1395         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396 
1397       HeapWord* obj_last = start + obj_sz - 1;
1398       intptr_t obj_last_card_num =
1399         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 
1401       if (obj_card_num != last_card_num) {
1402         if (start_card_num == -1) {
1403           assert(last_card_num == -1, "Both or neither.");
1404           start_card_num = obj_card_num;
1405         } else {
1406           assert(last_card_num != -1, "Both or neither.");
1407           assert(obj_card_num >= last_card_num, "Inv");
1408           if ((obj_card_num - last_card_num) > 1) {
1409             // Mark the last run, and start a new one.
1410             mark_card_num_range(start_card_num, last_card_num);
1411             start_card_num = obj_card_num;
1412           }
1413         }
1414 #if CARD_BM_TEST_MODE
1415         /*
1416         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417                                obj_card_num - _bottom_card_num,
1418                                obj_last_card_num - _bottom_card_num);
1419         */
1420         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421           _card_bm->par_at_put(j - _bottom_card_num, 1);
1422         }
1423 #endif
1424       }
1425       // In any case, we set the last card num.
1426       last_card_num = obj_last_card_num;
1427 
1428       marked_bytes += (size_t)obj_sz * HeapWordSize;
1429       // Find the next marked object after this one.
1430       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431       _changed = true;
1432     }
1433     // Handle the last range, if any.
1434     if (start_card_num != -1) {
1435       mark_card_num_range(start_card_num, last_card_num);
1436     }
1437     if (_final) {
1438       // Mark the allocated-since-marking portion...
1439       HeapWord* tp = hr->top();
1440       if (nextTop < tp) {
1441         start_card_num =
1442           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1443         last_card_num =
1444           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1445         mark_card_num_range(start_card_num, last_card_num);
1446         // This definitely means the region has live objects.
1447         set_bit_for_region(hr);
1448       }
1449     }
1450 
1451     hr->add_to_marked_bytes(marked_bytes);
1452     // Update the live region bitmap.
1453     if (marked_bytes > 0) {
1454       set_bit_for_region(hr);
1455     }
1456     hr->set_top_at_conc_mark_count(nextTop);
1457     _tot_live += hr->next_live_bytes();
1458     _tot_used += hr->used();
1459     _words_done = words_done;
1460 
1461     if (!_final) {
1462       ++_regions_done;
1463       if (_regions_done % 10 == 0) {
1464         double end_vtime_sec = os::elapsedVTime();
1465         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1466         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1467           jlong sleep_time_ms =
1468             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1469           os::sleep(Thread::current(), sleep_time_ms, false);
1470           _start_vtime_sec = end_vtime_sec;
1471         }
1472       }
1473     }
1474 
1475     return false;
1476   }
1477 
1478   bool changed() { return _changed;  }
1479   void reset()   { _changed = false; _words_done = 0; }
1480   void no_yield() { _yield = false; }
1481   size_t words_done() { return _words_done; }
1482   size_t tot_live() { return _tot_live; }
1483   size_t tot_used() { return _tot_used; }
1484 };
1485 
1486 
1487 void ConcurrentMark::calcDesiredRegions() {
1488   _region_bm.clear();
1489   _card_bm.clear();
1490   CalcLiveObjectsClosure calccl(false /*final*/,
1491                                 nextMarkBitMap(), this,
1492                                 &_region_bm, &_card_bm);
1493   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1494   g1h->heap_region_iterate(&calccl);
1495 
1496   do {
1497     calccl.reset();
1498     g1h->heap_region_iterate(&calccl);
1499   } while (calccl.changed());
1500 }
1501 
1502 class G1ParFinalCountTask: public AbstractGangTask {
1503 protected:
1504   G1CollectedHeap* _g1h;
1505   CMBitMap* _bm;
1506   size_t _n_workers;
1507   size_t *_live_bytes;
1508   size_t *_used_bytes;
1509   BitMap* _region_bm;
1510   BitMap* _card_bm;
1511 public:
1512   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1513                       BitMap* region_bm, BitMap* card_bm)
1514     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1515     _bm(bm), _region_bm(region_bm), _card_bm(card_bm),
1516     _n_workers(0)
1517   {
1518     // Use the value already set as the number of active threads
1519     // in the call to run_task().  Needed for the allocation of
1520     // _live_bytes and _used_bytes.
1521     if (G1CollectedHeap::use_parallel_gc_threads()) {
1522       assert( _g1h->workers()->active_workers() > 0,
1523         "Should have been previously set");
1524       _n_workers = _g1h->workers()->active_workers();
1525     } else {
1526       _n_workers = 1;
1527     }
1528 
1529     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1530     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1531   }
1532 
1533   ~G1ParFinalCountTask() {
1534     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1535     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1536   }
1537 
1538   void work(int i) {
1539     CalcLiveObjectsClosure calccl(true /*final*/,
1540                                   _bm, _g1h->concurrent_mark(),
1541                                   _region_bm, _card_bm);
1542     calccl.no_yield();
1543     if (G1CollectedHeap::use_parallel_gc_threads()) {
1544       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1545                                             (int) _n_workers,
1546                                             HeapRegion::FinalCountClaimValue);
1547     } else {
1548       _g1h->heap_region_iterate(&calccl);
1549     }
1550     assert(calccl.complete(), "Shouldn't have yielded!");
1551 
1552     assert((size_t) i < _n_workers, "invariant");
1553     _live_bytes[i] = calccl.tot_live();
1554     _used_bytes[i] = calccl.tot_used();
1555   }
1556   size_t live_bytes()  {
1557     size_t live_bytes = 0;
1558     for (size_t i = 0; i < _n_workers; ++i)
1559       live_bytes += _live_bytes[i];
1560     return live_bytes;
1561   }
1562   size_t used_bytes()  {
1563     size_t used_bytes = 0;
1564     for (size_t i = 0; i < _n_workers; ++i)
1565       used_bytes += _used_bytes[i];
1566     return used_bytes;
1567   }
1568 };
1569 
1570 class G1ParNoteEndTask;
1571 
1572 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1573   G1CollectedHeap* _g1;
1574   int _worker_num;
1575   size_t _max_live_bytes;
1576   size_t _regions_claimed;
1577   size_t _freed_bytes;
1578   FreeRegionList* _local_cleanup_list;
1579   OldRegionSet* _old_proxy_set;
1580   HumongousRegionSet* _humongous_proxy_set;
1581   HRRSCleanupTask* _hrrs_cleanup_task;
1582   double _claimed_region_time;
1583   double _max_region_time;
1584 
1585 public:
1586   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1587                              int worker_num,
1588                              FreeRegionList* local_cleanup_list,
1589                              OldRegionSet* old_proxy_set,
1590                              HumongousRegionSet* humongous_proxy_set,
1591                              HRRSCleanupTask* hrrs_cleanup_task) :
1592     _g1(g1), _worker_num(worker_num),
1593     _max_live_bytes(0), _regions_claimed(0),
1594     _freed_bytes(0),
1595     _claimed_region_time(0.0), _max_region_time(0.0),
1596     _local_cleanup_list(local_cleanup_list),
1597     _old_proxy_set(old_proxy_set),
1598     _humongous_proxy_set(humongous_proxy_set),
1599     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1600 
1601   size_t freed_bytes() { return _freed_bytes; }
1602 
1603   bool doHeapRegion(HeapRegion *hr) {
1604     // We use a claim value of zero here because all regions
1605     // were claimed with value 1 in the FinalCount task.
1606     hr->reset_gc_time_stamp();
1607     if (!hr->continuesHumongous()) {
1608       double start = os::elapsedTime();
1609       _regions_claimed++;
1610       hr->note_end_of_marking();
1611       _max_live_bytes += hr->max_live_bytes();
1612       _g1->free_region_if_empty(hr,
1613                                 &_freed_bytes,
1614                                 _local_cleanup_list,
1615                                 _old_proxy_set,
1616                                 _humongous_proxy_set,
1617                                 _hrrs_cleanup_task,
1618                                 true /* par */);
1619       double region_time = (os::elapsedTime() - start);
1620       _claimed_region_time += region_time;
1621       if (region_time > _max_region_time) {
1622         _max_region_time = region_time;
1623       }
1624     }
1625     return false;
1626   }
1627 
1628   size_t max_live_bytes() { return _max_live_bytes; }
1629   size_t regions_claimed() { return _regions_claimed; }
1630   double claimed_region_time_sec() { return _claimed_region_time; }
1631   double max_region_time_sec() { return _max_region_time; }
1632 };
1633 
1634 class G1ParNoteEndTask: public AbstractGangTask {
1635   friend class G1NoteEndOfConcMarkClosure;
1636 
1637 protected:
1638   G1CollectedHeap* _g1h;
1639   size_t _max_live_bytes;
1640   size_t _freed_bytes;
1641   FreeRegionList* _cleanup_list;
1642 
1643 public:
1644   G1ParNoteEndTask(G1CollectedHeap* g1h,
1645                    FreeRegionList* cleanup_list) :
1646     AbstractGangTask("G1 note end"), _g1h(g1h),
1647     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1648 
1649   void work(int i) {
1650     double start = os::elapsedTime();
1651     FreeRegionList local_cleanup_list("Local Cleanup List");
1652     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1653     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1654     HRRSCleanupTask hrrs_cleanup_task;
1655     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1656                                            &old_proxy_set,
1657                                            &humongous_proxy_set,
1658                                            &hrrs_cleanup_task);
1659     if (G1CollectedHeap::use_parallel_gc_threads()) {
1660       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1661                                             _g1h->workers()->active_workers(),
1662                                             HeapRegion::NoteEndClaimValue);
1663     } else {
1664       _g1h->heap_region_iterate(&g1_note_end);
1665     }
1666     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1667 
1668     // Now update the lists
1669     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1670                                             NULL /* free_list */,
1671                                             &old_proxy_set,
1672                                             &humongous_proxy_set,
1673                                             true /* par */);
1674     {
1675       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1676       _max_live_bytes += g1_note_end.max_live_bytes();
1677       _freed_bytes += g1_note_end.freed_bytes();
1678 
1679       // If we iterate over the global cleanup list at the end of
1680       // cleanup to do this printing we will not guarantee to only
1681       // generate output for the newly-reclaimed regions (the list
1682       // might not be empty at the beginning of cleanup; we might
1683       // still be working on its previous contents). So we do the
1684       // printing here, before we append the new regions to the global
1685       // cleanup list.
1686 
1687       G1HRPrinter* hr_printer = _g1h->hr_printer();
1688       if (hr_printer->is_active()) {
1689         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1690         while (iter.more_available()) {
1691           HeapRegion* hr = iter.get_next();
1692           hr_printer->cleanup(hr);
1693         }
1694       }
1695 
1696       _cleanup_list->add_as_tail(&local_cleanup_list);
1697       assert(local_cleanup_list.is_empty(), "post-condition");
1698 
1699       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1700     }
1701     double end = os::elapsedTime();
1702     if (G1PrintParCleanupStats) {
1703       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1704                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1705                           i, start, end, (end-start)*1000.0,
1706                           g1_note_end.regions_claimed(),
1707                           g1_note_end.claimed_region_time_sec()*1000.0,
1708                           g1_note_end.max_region_time_sec()*1000.0);
1709     }
1710   }
1711   size_t max_live_bytes() { return _max_live_bytes; }
1712   size_t freed_bytes() { return _freed_bytes; }
1713 };
1714 
1715 class G1ParScrubRemSetTask: public AbstractGangTask {
1716 protected:
1717   G1RemSet* _g1rs;
1718   BitMap* _region_bm;
1719   BitMap* _card_bm;
1720 public:
1721   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1722                        BitMap* region_bm, BitMap* card_bm) :
1723     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1724     _region_bm(region_bm), _card_bm(card_bm)
1725   {}
1726 
1727   void work(int i) {
1728     if (G1CollectedHeap::use_parallel_gc_threads()) {
1729       _g1rs->scrub_par(_region_bm, _card_bm, i,
1730                        HeapRegion::ScrubRemSetClaimValue);
1731     } else {
1732       _g1rs->scrub(_region_bm, _card_bm);
1733     }
1734   }
1735 
1736 };
1737 
1738 void ConcurrentMark::cleanup() {
1739   // world is stopped at this checkpoint
1740   assert(SafepointSynchronize::is_at_safepoint(),
1741          "world should be stopped");
1742   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1743 
1744   // If a full collection has happened, we shouldn't do this.
1745   if (has_aborted()) {
1746     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1747     return;
1748   }
1749 
1750   HRSPhaseSetter x(HRSPhaseCleanup);
1751   g1h->verify_region_sets_optional();
1752 
1753   if (VerifyDuringGC) {
1754     HandleMark hm;  // handle scope
1755     gclog_or_tty->print(" VerifyDuringGC:(before)");
1756     Universe::heap()->prepare_for_verify();
1757     Universe::verify(/* allow dirty */ true,
1758                      /* silent      */ false,
1759                      /* option      */ VerifyOption_G1UsePrevMarking);
1760   }
1761 
1762   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1763   g1p->record_concurrent_mark_cleanup_start();
1764 
1765   double start = os::elapsedTime();
1766 
1767   HeapRegionRemSet::reset_for_cleanup_tasks();
1768 
1769   size_t n_workers;
1770 
1771   // Do counting once more with the world stopped for good measure.
1772   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1773                                         &_region_bm, &_card_bm);
1774   if (G1CollectedHeap::use_parallel_gc_threads()) {
1775     assert(g1h->check_heap_region_claim_values(
1776                                                HeapRegion::InitialClaimValue),
1777            "sanity check");
1778 
1779     g1h->set_par_threads();
1780     n_workers = g1h->n_par_threads();
1781     assert(g1h->n_par_threads() == (int) n_workers,
1782            "Should not have been reset");
1783     g1h->workers()->run_task(&g1_par_count_task);
1784     // Done with the parallel phase so reset to 0.
1785     g1h->set_par_threads(0);
1786 
1787     assert(g1h->check_heap_region_claim_values(
1788                                              HeapRegion::FinalCountClaimValue),
1789            "sanity check");
1790   } else {
1791     n_workers = 1;
1792     g1_par_count_task.work(0);
1793   }
1794 
1795   size_t known_garbage_bytes =
1796     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1797   g1p->set_known_garbage_bytes(known_garbage_bytes);
1798 
1799   size_t start_used_bytes = g1h->used();
1800   _at_least_one_mark_complete = true;
1801   g1h->set_marking_complete();
1802 
1803   ergo_verbose4(ErgoConcCycles,
1804            "finish cleanup",
1805            ergo_format_byte("occupancy")
1806            ergo_format_byte("capacity")
1807            ergo_format_byte_perc("known garbage"),
1808            start_used_bytes, g1h->capacity(),
1809            known_garbage_bytes,
1810            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1811 
1812   double count_end = os::elapsedTime();
1813   double this_final_counting_time = (count_end - start);
1814   if (G1PrintParCleanupStats) {
1815     gclog_or_tty->print_cr("Cleanup:");
1816     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1817                            this_final_counting_time*1000.0);
1818   }
1819   _total_counting_time += this_final_counting_time;
1820 
1821   if (G1PrintRegionLivenessInfo) {
1822     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1823     _g1h->heap_region_iterate(&cl);
1824   }
1825 
1826   // Install newly created mark bitMap as "prev".
1827   swapMarkBitMaps();
1828 
1829   g1h->reset_gc_time_stamp();
1830 
1831   // Note end of marking in all heap regions.
1832   double note_end_start = os::elapsedTime();
1833   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1834   if (G1CollectedHeap::use_parallel_gc_threads()) {
1835     g1h->set_par_threads((int)n_workers);
1836     g1h->workers()->run_task(&g1_par_note_end_task);
1837     g1h->set_par_threads(0);
1838 
1839     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1840            "sanity check");
1841   } else {
1842     g1_par_note_end_task.work(0);
1843   }
1844 
1845   if (!cleanup_list_is_empty()) {
1846     // The cleanup list is not empty, so we'll have to process it
1847     // concurrently. Notify anyone else that might be wanting free
1848     // regions that there will be more free regions coming soon.
1849     g1h->set_free_regions_coming();
1850   }
1851   double note_end_end = os::elapsedTime();
1852   if (G1PrintParCleanupStats) {
1853     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1854                            (note_end_end - note_end_start)*1000.0);
1855   }
1856 
1857   // call below, since it affects the metric by which we sort the heap
1858   // regions.
1859   if (G1ScrubRemSets) {
1860     double rs_scrub_start = os::elapsedTime();
1861     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1862     if (G1CollectedHeap::use_parallel_gc_threads()) {
1863       g1h->set_par_threads((int)n_workers);
1864       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1865       g1h->set_par_threads(0);
1866 
1867       assert(g1h->check_heap_region_claim_values(
1868                                             HeapRegion::ScrubRemSetClaimValue),
1869              "sanity check");
1870     } else {
1871       g1_par_scrub_rs_task.work(0);
1872     }
1873 
1874     double rs_scrub_end = os::elapsedTime();
1875     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1876     _total_rs_scrub_time += this_rs_scrub_time;
1877   }
1878 
1879   // this will also free any regions totally full of garbage objects,
1880   // and sort the regions.
1881   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1882 
1883   // Statistics.
1884   double end = os::elapsedTime();
1885   _cleanup_times.add((end - start) * 1000.0);
1886 
1887   // G1CollectedHeap::heap()->print();
1888   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1889   // G1CollectedHeap::heap()->get_gc_time_stamp());
1890 
1891   if (PrintGC || PrintGCDetails) {
1892     g1h->print_size_transition(gclog_or_tty,
1893                                start_used_bytes,
1894                                g1h->used(),
1895                                g1h->capacity());
1896   }
1897 
1898   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1899   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1900 
1901   // Clean up will have freed any regions completely full of garbage.
1902   // Update the soft reference policy with the new heap occupancy.
1903   Universe::update_heap_info_at_gc();
1904 
1905   // We need to make this be a "collection" so any collection pause that
1906   // races with it goes around and waits for completeCleanup to finish.
1907   g1h->increment_total_collections();
1908 
1909   if (VerifyDuringGC) {
1910     HandleMark hm;  // handle scope
1911     gclog_or_tty->print(" VerifyDuringGC:(after)");
1912     Universe::heap()->prepare_for_verify();
1913     Universe::verify(/* allow dirty */ true,
1914                      /* silent      */ false,
1915                      /* option      */ VerifyOption_G1UsePrevMarking);
1916   }
1917 
1918   g1h->verify_region_sets_optional();
1919 }
1920 
1921 void ConcurrentMark::completeCleanup() {
1922   if (has_aborted()) return;
1923 
1924   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1925 
1926   _cleanup_list.verify_optional();
1927   FreeRegionList tmp_free_list("Tmp Free List");
1928 
1929   if (G1ConcRegionFreeingVerbose) {
1930     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1931                            "cleanup list has "SIZE_FORMAT" entries",
1932                            _cleanup_list.length());
1933   }
1934 
1935   // Noone else should be accessing the _cleanup_list at this point,
1936   // so it's not necessary to take any locks
1937   while (!_cleanup_list.is_empty()) {
1938     HeapRegion* hr = _cleanup_list.remove_head();
1939     assert(hr != NULL, "the list was not empty");
1940     hr->par_clear();
1941     tmp_free_list.add_as_tail(hr);
1942 
1943     // Instead of adding one region at a time to the secondary_free_list,
1944     // we accumulate them in the local list and move them a few at a
1945     // time. This also cuts down on the number of notify_all() calls
1946     // we do during this process. We'll also append the local list when
1947     // _cleanup_list is empty (which means we just removed the last
1948     // region from the _cleanup_list).
1949     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1950         _cleanup_list.is_empty()) {
1951       if (G1ConcRegionFreeingVerbose) {
1952         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1953                                "appending "SIZE_FORMAT" entries to the "
1954                                "secondary_free_list, clean list still has "
1955                                SIZE_FORMAT" entries",
1956                                tmp_free_list.length(),
1957                                _cleanup_list.length());
1958       }
1959 
1960       {
1961         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1962         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1963         SecondaryFreeList_lock->notify_all();
1964       }
1965 
1966       if (G1StressConcRegionFreeing) {
1967         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1968           os::sleep(Thread::current(), (jlong) 1, false);
1969         }
1970       }
1971     }
1972   }
1973   assert(tmp_free_list.is_empty(), "post-condition");
1974 }
1975 
1976 // Support closures for reference procssing in G1
1977 
1978 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1979   HeapWord* addr = (HeapWord*)obj;
1980   return addr != NULL &&
1981          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1982 }
1983 
1984 class G1CMKeepAliveClosure: public OopClosure {
1985   G1CollectedHeap* _g1;
1986   ConcurrentMark*  _cm;
1987   CMBitMap*        _bitMap;
1988  public:
1989   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1990                        CMBitMap* bitMap) :
1991     _g1(g1), _cm(cm),
1992     _bitMap(bitMap) {}
1993 
1994   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1995   virtual void do_oop(      oop* p) { do_oop_work(p); }
1996 
1997   template <class T> void do_oop_work(T* p) {
1998     oop obj = oopDesc::load_decode_heap_oop(p);
1999     HeapWord* addr = (HeapWord*)obj;
2000 
2001     if (_cm->verbose_high()) {
2002       gclog_or_tty->print_cr("\t[0] we're looking at location "
2003                              "*"PTR_FORMAT" = "PTR_FORMAT,
2004                              p, (void*) obj);
2005     }
2006 
2007     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2008       _bitMap->mark(addr);
2009       _cm->mark_stack_push(obj);
2010     }
2011   }
2012 };
2013 
2014 class G1CMDrainMarkingStackClosure: public VoidClosure {
2015   CMMarkStack*                  _markStack;
2016   CMBitMap*                     _bitMap;
2017   G1CMKeepAliveClosure*         _oopClosure;
2018  public:
2019   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
2020                                G1CMKeepAliveClosure* oopClosure) :
2021     _bitMap(bitMap),
2022     _markStack(markStack),
2023     _oopClosure(oopClosure)
2024   {}
2025 
2026   void do_void() {
2027     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
2028   }
2029 };
2030 
2031 // 'Keep Alive' closure used by parallel reference processing.
2032 // An instance of this closure is used in the parallel reference processing
2033 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2034 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2035 // placed on to discovered ref lists once so we can mark and push with no
2036 // need to check whether the object has already been marked. Using the
2037 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2038 // operating on the global mark stack. This means that an individual
2039 // worker would be doing lock-free pushes while it processes its own
2040 // discovered ref list followed by drain call. If the discovered ref lists
2041 // are unbalanced then this could cause interference with the other
2042 // workers. Using a CMTask (and its embedded local data structures)
2043 // avoids that potential interference.
2044 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2045   ConcurrentMark*  _cm;
2046   CMTask*          _task;
2047   int              _ref_counter_limit;
2048   int              _ref_counter;
2049  public:
2050   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2051     _cm(cm), _task(task),
2052     _ref_counter_limit(G1RefProcDrainInterval) {
2053     assert(_ref_counter_limit > 0, "sanity");
2054     _ref_counter = _ref_counter_limit;
2055   }
2056 
2057   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2058   virtual void do_oop(      oop* p) { do_oop_work(p); }
2059 
2060   template <class T> void do_oop_work(T* p) {
2061     if (!_cm->has_overflown()) {
2062       oop obj = oopDesc::load_decode_heap_oop(p);
2063       if (_cm->verbose_high()) {
2064         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2065                                "*"PTR_FORMAT" = "PTR_FORMAT,
2066                                _task->task_id(), p, (void*) obj);
2067       }
2068 
2069       _task->deal_with_reference(obj);
2070       _ref_counter--;
2071 
2072       if (_ref_counter == 0) {
2073         // We have dealt with _ref_counter_limit references, pushing them and objects
2074         // reachable from them on to the local stack (and possibly the global stack).
2075         // Call do_marking_step() to process these entries. We call the routine in a
2076         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2077         // with the entries that we've pushed as a result of the deal_with_reference
2078         // calls above) or we overflow.
2079         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2080         // while there may still be some work to do. (See the comment at the
2081         // beginning of CMTask::do_marking_step() for those conditions - one of which
2082         // is reaching the specified time target.) It is only when
2083         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2084         // that the marking has completed.
2085         do {
2086           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2087           _task->do_marking_step(mark_step_duration_ms,
2088                                  false /* do_stealing    */,
2089                                  false /* do_termination */);
2090         } while (_task->has_aborted() && !_cm->has_overflown());
2091         _ref_counter = _ref_counter_limit;
2092       }
2093     } else {
2094       if (_cm->verbose_high()) {
2095          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2096       }
2097     }
2098   }
2099 };
2100 
2101 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2102   ConcurrentMark* _cm;
2103   CMTask* _task;
2104  public:
2105   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2106     _cm(cm), _task(task)
2107   {}
2108 
2109   void do_void() {
2110     do {
2111       if (_cm->verbose_high()) {
2112         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2113                                _task->task_id());
2114       }
2115 
2116       // We call CMTask::do_marking_step() to completely drain the local and
2117       // global marking stacks. The routine is called in a loop, which we'll
2118       // exit if there's nothing more to do (i.e. we'completely drained the
2119       // entries that were pushed as a result of applying the
2120       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2121       // lists above) or we overflow the global marking stack.
2122       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2123       // while there may still be some work to do. (See the comment at the
2124       // beginning of CMTask::do_marking_step() for those conditions - one of which
2125       // is reaching the specified time target.) It is only when
2126       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2127       // that the marking has completed.
2128 
2129       _task->do_marking_step(1000000000.0 /* something very large */,
2130                              true /* do_stealing    */,
2131                              true /* do_termination */);
2132     } while (_task->has_aborted() && !_cm->has_overflown());
2133   }
2134 };
2135 
2136 // Implementation of AbstractRefProcTaskExecutor for parallel
2137 // reference processing at the end of G1 concurrent marking
2138 
2139 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2140 private:
2141   G1CollectedHeap* _g1h;
2142   ConcurrentMark*  _cm;
2143   WorkGang*        _workers;
2144   int              _active_workers;
2145 
2146 public:
2147   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2148                         ConcurrentMark* cm,
2149                         WorkGang* workers,
2150                         int n_workers) :
2151     _g1h(g1h), _cm(cm),
2152     _workers(workers), _active_workers(n_workers) { }
2153 
2154   // Executes the given task using concurrent marking worker threads.
2155   virtual void execute(ProcessTask& task);
2156   virtual void execute(EnqueueTask& task);
2157 };
2158 
2159 class G1CMRefProcTaskProxy: public AbstractGangTask {
2160   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2161   ProcessTask&     _proc_task;
2162   G1CollectedHeap* _g1h;
2163   ConcurrentMark*  _cm;
2164 
2165 public:
2166   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2167                      G1CollectedHeap* g1h,
2168                      ConcurrentMark* cm) :
2169     AbstractGangTask("Process reference objects in parallel"),
2170     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2171 
2172   virtual void work(int i) {
2173     CMTask* marking_task = _cm->task(i);
2174     G1CMIsAliveClosure g1_is_alive(_g1h);
2175     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2176     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2177 
2178     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2179   }
2180 };
2181 
2182 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2183   assert(_workers != NULL, "Need parallel worker threads.");
2184 
2185   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2186 
2187   // We need to reset the phase for each task execution so that
2188   // the termination protocol of CMTask::do_marking_step works.
2189   _cm->set_phase(_active_workers, false /* concurrent */);
2190   _g1h->set_par_threads(_active_workers);
2191   _workers->run_task(&proc_task_proxy);
2192   _g1h->set_par_threads(0);
2193 }
2194 
2195 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2196   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2197   EnqueueTask& _enq_task;
2198 
2199 public:
2200   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2201     AbstractGangTask("Enqueue reference objects in parallel"),
2202     _enq_task(enq_task) { }
2203 
2204   virtual void work(int i) {
2205     _enq_task.work(i);
2206   }
2207 };
2208 
2209 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2210   assert(_workers != NULL, "Need parallel worker threads.");
2211 
2212   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2213 
2214   _g1h->set_par_threads(_active_workers);
2215   _workers->run_task(&enq_task_proxy);
2216   _g1h->set_par_threads(0);
2217 }
2218 
2219 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2220   ResourceMark rm;
2221   HandleMark   hm;
2222 
2223   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2224 
2225   // Is alive closure.
2226   G1CMIsAliveClosure g1_is_alive(g1h);
2227 
2228   // Inner scope to exclude the cleaning of the string and symbol
2229   // tables from the displayed time.
2230   {
2231     bool verbose = PrintGC && PrintGCDetails;
2232     if (verbose) {
2233       gclog_or_tty->put(' ');
2234     }
2235     TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2236 
2237     ReferenceProcessor* rp = g1h->ref_processor_cm();
2238 
2239     // See the comment in G1CollectedHeap::ref_processing_init()
2240     // about how reference processing currently works in G1.
2241 
2242     // Process weak references.
2243     rp->setup_policy(clear_all_soft_refs);
2244     assert(_markStack.isEmpty(), "mark stack should be empty");
2245 
2246     G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2247     G1CMDrainMarkingStackClosure
2248       g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2249 
2250     // We use the work gang from the G1CollectedHeap and we utilize all
2251     // the worker threads.
2252     int active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1;
2253     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2254 
2255     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2256                                               g1h->workers(), active_workers);
2257 
2258     if (rp->processing_is_mt()) {
2259       // Set the degree of MT here.  If the discovery is done MT, there
2260       // may have been a different number of threads doing the discovery
2261       // and a different number of discovered lists may have Ref objects.
2262       // That is OK as long as the Reference lists are balanced (see
2263       // balance_all_queues() and balance_queues()).
2264       rp->set_active_mt_degree(active_workers);
2265 
2266       rp->process_discovered_references(&g1_is_alive,
2267                                       &g1_keep_alive,
2268                                       &g1_drain_mark_stack,
2269                                       &par_task_executor);
2270 
2271       // The work routines of the parallel keep_alive and drain_marking_stack
2272       // will set the has_overflown flag if we overflow the global marking
2273       // stack.
2274     } else {
2275       rp->process_discovered_references(&g1_is_alive,
2276                                         &g1_keep_alive,
2277                                         &g1_drain_mark_stack,
2278                                         NULL);
2279     }
2280 
2281     assert(_markStack.overflow() || _markStack.isEmpty(),
2282             "mark stack should be empty (unless it overflowed)");
2283     if (_markStack.overflow()) {
2284       // Should have been done already when we tried to push an
2285       // entry on to the global mark stack. But let's do it again.
2286       set_has_overflown();
2287     }
2288 
2289     if (rp->processing_is_mt()) {
2290       assert(rp->num_q() == active_workers, "why not");
2291       rp->enqueue_discovered_references(&par_task_executor);
2292     } else {
2293       rp->enqueue_discovered_references();
2294     }
2295 
2296     rp->verify_no_references_recorded();
2297     assert(!rp->discovery_enabled(), "Post condition");
2298   }
2299 
2300   // Now clean up stale oops in StringTable
2301   StringTable::unlink(&g1_is_alive);
2302   // Clean up unreferenced symbols in symbol table.
2303   SymbolTable::unlink();
2304 }
2305 
2306 void ConcurrentMark::swapMarkBitMaps() {
2307   CMBitMapRO* temp = _prevMarkBitMap;
2308   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2309   _nextMarkBitMap  = (CMBitMap*)  temp;
2310 }
2311 
2312 class CMRemarkTask: public AbstractGangTask {
2313 private:
2314   ConcurrentMark *_cm;
2315 
2316 public:
2317   void work(int worker_i) {
2318     // Since all available tasks are actually started, we should
2319     // only proceed if we're supposed to be actived.
2320     if ((size_t)worker_i < _cm->active_tasks()) {
2321       CMTask* task = _cm->task(worker_i);
2322       task->record_start_time();
2323       do {
2324         task->do_marking_step(1000000000.0 /* something very large */,
2325                               true /* do_stealing    */,
2326                               true /* do_termination */);
2327       } while (task->has_aborted() && !_cm->has_overflown());
2328       // If we overflow, then we do not want to restart. We instead
2329       // want to abort remark and do concurrent marking again.
2330       task->record_end_time();
2331     }
2332   }
2333 
2334   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2335     AbstractGangTask("Par Remark"), _cm(cm) {
2336     _cm->terminator()->reset_for_reuse(active_workers);
2337   }
2338 };
2339 
2340 void ConcurrentMark::checkpointRootsFinalWork() {
2341   ResourceMark rm;
2342   HandleMark   hm;
2343   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2344 
2345   g1h->ensure_parsability(false);
2346 
2347   if (G1CollectedHeap::use_parallel_gc_threads()) {
2348     G1CollectedHeap::StrongRootsScope srs(g1h);
2349     // this is remark, so we'll use up all active threads
2350     int active_workers = g1h->workers()->active_workers();
2351     if (active_workers == 0) {
2352       assert(active_workers > 0, "Should have been set earlier");
2353       active_workers = ParallelGCThreads;
2354       g1h->workers()->set_active_workers(active_workers);
2355     }
2356     set_phase(active_workers, false /* concurrent */);
2357     // Leave _parallel_marking_threads at it's
2358     // value originally calculated in the ConcurrentMark
2359     // constructor and pass values of the active workers
2360     // through the gang in the task.
2361 
2362     CMRemarkTask remarkTask(this, active_workers);
2363     g1h->set_par_threads(active_workers);
2364     g1h->workers()->run_task(&remarkTask);
2365     g1h->set_par_threads(0);
2366   } else {
2367     G1CollectedHeap::StrongRootsScope srs(g1h);
2368     // this is remark, so we'll use up all available threads
2369     int active_workers = 1;
2370     set_phase(active_workers, false /* concurrent */);
2371 
2372     CMRemarkTask remarkTask(this, active_workers);
2373     // We will start all available threads, even if we decide that the
2374     // active_workers will be fewer. The extra ones will just bail out
2375     // immediately.
2376     remarkTask.work(0);
2377   }
2378   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2379   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2380 
2381   print_stats();
2382 
2383 #if VERIFY_OBJS_PROCESSED
2384   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2385     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2386                            _scan_obj_cl.objs_processed,
2387                            ThreadLocalObjQueue::objs_enqueued);
2388     guarantee(_scan_obj_cl.objs_processed ==
2389               ThreadLocalObjQueue::objs_enqueued,
2390               "Different number of objs processed and enqueued.");
2391   }
2392 #endif
2393 }
2394 
2395 #ifndef PRODUCT
2396 
2397 class PrintReachableOopClosure: public OopClosure {
2398 private:
2399   G1CollectedHeap* _g1h;
2400   outputStream*    _out;
2401   VerifyOption     _vo;
2402   bool             _all;
2403 
2404 public:
2405   PrintReachableOopClosure(outputStream* out,
2406                            VerifyOption  vo,
2407                            bool          all) :
2408     _g1h(G1CollectedHeap::heap()),
2409     _out(out), _vo(vo), _all(all) { }
2410 
2411   void do_oop(narrowOop* p) { do_oop_work(p); }
2412   void do_oop(      oop* p) { do_oop_work(p); }
2413 
2414   template <class T> void do_oop_work(T* p) {
2415     oop         obj = oopDesc::load_decode_heap_oop(p);
2416     const char* str = NULL;
2417     const char* str2 = "";
2418 
2419     if (obj == NULL) {
2420       str = "";
2421     } else if (!_g1h->is_in_g1_reserved(obj)) {
2422       str = " O";
2423     } else {
2424       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2425       guarantee(hr != NULL, "invariant");
2426       bool over_tams = false;
2427       bool marked = false;
2428 
2429       switch (_vo) {
2430         case VerifyOption_G1UsePrevMarking:
2431           over_tams = hr->obj_allocated_since_prev_marking(obj);
2432           marked = _g1h->isMarkedPrev(obj);
2433           break;
2434         case VerifyOption_G1UseNextMarking:
2435           over_tams = hr->obj_allocated_since_next_marking(obj);
2436           marked = _g1h->isMarkedNext(obj);
2437           break;
2438         case VerifyOption_G1UseMarkWord:
2439           marked = obj->is_gc_marked();
2440           break;
2441         default:
2442           ShouldNotReachHere();
2443       }
2444 
2445       if (over_tams) {
2446         str = " >";
2447         if (marked) {
2448           str2 = " AND MARKED";
2449         }
2450       } else if (marked) {
2451         str = " M";
2452       } else {
2453         str = " NOT";
2454       }
2455     }
2456 
2457     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2458                    p, (void*) obj, str, str2);
2459   }
2460 };
2461 
2462 class PrintReachableObjectClosure : public ObjectClosure {
2463 private:
2464   G1CollectedHeap* _g1h;
2465   outputStream*    _out;
2466   VerifyOption     _vo;
2467   bool             _all;
2468   HeapRegion*      _hr;
2469 
2470 public:
2471   PrintReachableObjectClosure(outputStream* out,
2472                               VerifyOption  vo,
2473                               bool          all,
2474                               HeapRegion*   hr) :
2475     _g1h(G1CollectedHeap::heap()),
2476     _out(out), _vo(vo), _all(all), _hr(hr) { }
2477 
2478   void do_object(oop o) {
2479     bool over_tams = false;
2480     bool marked = false;
2481 
2482     switch (_vo) {
2483       case VerifyOption_G1UsePrevMarking:
2484         over_tams = _hr->obj_allocated_since_prev_marking(o);
2485         marked = _g1h->isMarkedPrev(o);
2486         break;
2487       case VerifyOption_G1UseNextMarking:
2488         over_tams = _hr->obj_allocated_since_next_marking(o);
2489         marked = _g1h->isMarkedNext(o);
2490         break;
2491       case VerifyOption_G1UseMarkWord:
2492         marked = o->is_gc_marked();
2493         break;
2494       default:
2495         ShouldNotReachHere();
2496     }
2497     bool print_it = _all || over_tams || marked;
2498 
2499     if (print_it) {
2500       _out->print_cr(" "PTR_FORMAT"%s",
2501                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2502       PrintReachableOopClosure oopCl(_out, _vo, _all);
2503       o->oop_iterate(&oopCl);
2504     }
2505   }
2506 };
2507 
2508 class PrintReachableRegionClosure : public HeapRegionClosure {
2509 private:
2510   outputStream* _out;
2511   VerifyOption  _vo;
2512   bool          _all;
2513 
2514 public:
2515   bool doHeapRegion(HeapRegion* hr) {
2516     HeapWord* b = hr->bottom();
2517     HeapWord* e = hr->end();
2518     HeapWord* t = hr->top();
2519     HeapWord* p = NULL;
2520 
2521     switch (_vo) {
2522       case VerifyOption_G1UsePrevMarking:
2523         p = hr->prev_top_at_mark_start();
2524         break;
2525       case VerifyOption_G1UseNextMarking:
2526         p = hr->next_top_at_mark_start();
2527         break;
2528       case VerifyOption_G1UseMarkWord:
2529         // When we are verifying marking using the mark word
2530         // TAMS has no relevance.
2531         assert(p == NULL, "post-condition");
2532         break;
2533       default:
2534         ShouldNotReachHere();
2535     }
2536     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2537                    "TAMS: "PTR_FORMAT, b, e, t, p);
2538     _out->cr();
2539 
2540     HeapWord* from = b;
2541     HeapWord* to   = t;
2542 
2543     if (to > from) {
2544       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2545       _out->cr();
2546       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2547       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2548       _out->cr();
2549     }
2550 
2551     return false;
2552   }
2553 
2554   PrintReachableRegionClosure(outputStream* out,
2555                               VerifyOption  vo,
2556                               bool          all) :
2557     _out(out), _vo(vo), _all(all) { }
2558 };
2559 
2560 static const char* verify_option_to_tams(VerifyOption vo) {
2561   switch (vo) {
2562     case VerifyOption_G1UsePrevMarking:
2563       return "PTAMS";
2564     case VerifyOption_G1UseNextMarking:
2565       return "NTAMS";
2566     default:
2567       return "NONE";
2568   }
2569 }
2570 
2571 void ConcurrentMark::print_reachable(const char* str,
2572                                      VerifyOption vo,
2573                                      bool all) {
2574   gclog_or_tty->cr();
2575   gclog_or_tty->print_cr("== Doing heap dump... ");
2576 
2577   if (G1PrintReachableBaseFile == NULL) {
2578     gclog_or_tty->print_cr("  #### error: no base file defined");
2579     return;
2580   }
2581 
2582   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2583       (JVM_MAXPATHLEN - 1)) {
2584     gclog_or_tty->print_cr("  #### error: file name too long");
2585     return;
2586   }
2587 
2588   char file_name[JVM_MAXPATHLEN];
2589   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2590   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2591 
2592   fileStream fout(file_name);
2593   if (!fout.is_open()) {
2594     gclog_or_tty->print_cr("  #### error: could not open file");
2595     return;
2596   }
2597 
2598   outputStream* out = &fout;
2599   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2600   out->cr();
2601 
2602   out->print_cr("--- ITERATING OVER REGIONS");
2603   out->cr();
2604   PrintReachableRegionClosure rcl(out, vo, all);
2605   _g1h->heap_region_iterate(&rcl);
2606   out->cr();
2607 
2608   gclog_or_tty->print_cr("  done");
2609   gclog_or_tty->flush();
2610 }
2611 
2612 #endif // PRODUCT
2613 
2614 // This note is for drainAllSATBBuffers and the code in between.
2615 // In the future we could reuse a task to do this work during an
2616 // evacuation pause (since now tasks are not active and can be claimed
2617 // during an evacuation pause). This was a late change to the code and
2618 // is currently not being taken advantage of.
2619 
2620 class CMGlobalObjectClosure : public ObjectClosure {
2621 private:
2622   ConcurrentMark* _cm;
2623 
2624 public:
2625   void do_object(oop obj) {
2626     _cm->deal_with_reference(obj);
2627   }
2628 
2629   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2630 };
2631 
2632 void ConcurrentMark::deal_with_reference(oop obj) {
2633   if (verbose_high()) {
2634     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2635                            (void*) obj);
2636   }
2637 
2638   HeapWord* objAddr = (HeapWord*) obj;
2639   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2640   if (_g1h->is_in_g1_reserved(objAddr)) {
2641     assert(obj != NULL, "null check is implicit");
2642     if (!_nextMarkBitMap->isMarked(objAddr)) {
2643       // Only get the containing region if the object is not marked on the
2644       // bitmap (otherwise, it's a waste of time since we won't do
2645       // anything with it).
2646       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2647       if (!hr->obj_allocated_since_next_marking(obj)) {
2648         if (verbose_high()) {
2649           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2650                                  "marked", (void*) obj);
2651         }
2652 
2653         // we need to mark it first
2654         if (_nextMarkBitMap->parMark(objAddr)) {
2655           // No OrderAccess:store_load() is needed. It is implicit in the
2656           // CAS done in parMark(objAddr) above
2657           HeapWord* finger = _finger;
2658           if (objAddr < finger) {
2659             if (verbose_high()) {
2660               gclog_or_tty->print_cr("[global] below the global finger "
2661                                      "("PTR_FORMAT"), pushing it", finger);
2662             }
2663             if (!mark_stack_push(obj)) {
2664               if (verbose_low()) {
2665                 gclog_or_tty->print_cr("[global] global stack overflow during "
2666                                        "deal_with_reference");
2667               }
2668             }
2669           }
2670         }
2671       }
2672     }
2673   }
2674 }
2675 
2676 void ConcurrentMark::drainAllSATBBuffers() {
2677   CMGlobalObjectClosure oc(this);
2678   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2679   satb_mq_set.set_closure(&oc);
2680 
2681   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2682     if (verbose_medium()) {
2683       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2684     }
2685   }
2686 
2687   // no need to check whether we should do this, as this is only
2688   // called during an evacuation pause
2689   satb_mq_set.iterate_closure_all_threads();
2690 
2691   satb_mq_set.set_closure(NULL);
2692   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2693 }
2694 
2695 void ConcurrentMark::markPrev(oop p) {
2696   // Note we are overriding the read-only view of the prev map here, via
2697   // the cast.
2698   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2699 }
2700 
2701 void ConcurrentMark::clear(oop p) {
2702   assert(p != NULL && p->is_oop(), "expected an oop");
2703   HeapWord* addr = (HeapWord*)p;
2704   assert(addr >= _nextMarkBitMap->startWord() ||
2705          addr < _nextMarkBitMap->endWord(), "in a region");
2706 
2707   _nextMarkBitMap->clear(addr);
2708 }
2709 
2710 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2711   // Note we are overriding the read-only view of the prev map here, via
2712   // the cast.
2713   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2714   _nextMarkBitMap->clearRange(mr);
2715 }
2716 
2717 HeapRegion*
2718 ConcurrentMark::claim_region(int task_num) {
2719   // "checkpoint" the finger
2720   HeapWord* finger = _finger;
2721 
2722   // _heap_end will not change underneath our feet; it only changes at
2723   // yield points.
2724   while (finger < _heap_end) {
2725     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2726 
2727     // Note on how this code handles humongous regions. In the
2728     // normal case the finger will reach the start of a "starts
2729     // humongous" (SH) region. Its end will either be the end of the
2730     // last "continues humongous" (CH) region in the sequence, or the
2731     // standard end of the SH region (if the SH is the only region in
2732     // the sequence). That way claim_region() will skip over the CH
2733     // regions. However, there is a subtle race between a CM thread
2734     // executing this method and a mutator thread doing a humongous
2735     // object allocation. The two are not mutually exclusive as the CM
2736     // thread does not need to hold the Heap_lock when it gets
2737     // here. So there is a chance that claim_region() will come across
2738     // a free region that's in the progress of becoming a SH or a CH
2739     // region. In the former case, it will either
2740     //   a) Miss the update to the region's end, in which case it will
2741     //      visit every subsequent CH region, will find their bitmaps
2742     //      empty, and do nothing, or
2743     //   b) Will observe the update of the region's end (in which case
2744     //      it will skip the subsequent CH regions).
2745     // If it comes across a region that suddenly becomes CH, the
2746     // scenario will be similar to b). So, the race between
2747     // claim_region() and a humongous object allocation might force us
2748     // to do a bit of unnecessary work (due to some unnecessary bitmap
2749     // iterations) but it should not introduce and correctness issues.
2750     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2751     HeapWord*   bottom        = curr_region->bottom();
2752     HeapWord*   end           = curr_region->end();
2753     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2754 
2755     if (verbose_low()) {
2756       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2757                              "["PTR_FORMAT", "PTR_FORMAT"), "
2758                              "limit = "PTR_FORMAT,
2759                              task_num, curr_region, bottom, end, limit);
2760     }
2761 
2762     // Is the gap between reading the finger and doing the CAS too long?
2763     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2764     if (res == finger) {
2765       // we succeeded
2766 
2767       // notice that _finger == end cannot be guaranteed here since,
2768       // someone else might have moved the finger even further
2769       assert(_finger >= end, "the finger should have moved forward");
2770 
2771       if (verbose_low()) {
2772         gclog_or_tty->print_cr("[%d] we were successful with region = "
2773                                PTR_FORMAT, task_num, curr_region);
2774       }
2775 
2776       if (limit > bottom) {
2777         if (verbose_low()) {
2778           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2779                                  "returning it ", task_num, curr_region);
2780         }
2781         return curr_region;
2782       } else {
2783         assert(limit == bottom,
2784                "the region limit should be at bottom");
2785         if (verbose_low()) {
2786           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2787                                  "returning NULL", task_num, curr_region);
2788         }
2789         // we return NULL and the caller should try calling
2790         // claim_region() again.
2791         return NULL;
2792       }
2793     } else {
2794       assert(_finger > finger, "the finger should have moved forward");
2795       if (verbose_low()) {
2796         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2797                                "global finger = "PTR_FORMAT", "
2798                                "our finger = "PTR_FORMAT,
2799                                task_num, _finger, finger);
2800       }
2801 
2802       // read it again
2803       finger = _finger;
2804     }
2805   }
2806 
2807   return NULL;
2808 }
2809 
2810 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2811   bool result = false;
2812   for (int i = 0; i < (int)_max_task_num; ++i) {
2813     CMTask* the_task = _tasks[i];
2814     MemRegion mr = the_task->aborted_region();
2815     if (mr.start() != NULL) {
2816       assert(mr.end() != NULL, "invariant");
2817       assert(mr.word_size() > 0, "invariant");
2818       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2819       assert(hr != NULL, "invariant");
2820       if (hr->in_collection_set()) {
2821         // The region points into the collection set
2822         the_task->set_aborted_region(MemRegion());
2823         result = true;
2824       }
2825     }
2826   }
2827   return result;
2828 }
2829 
2830 bool ConcurrentMark::has_aborted_regions() {
2831   for (int i = 0; i < (int)_max_task_num; ++i) {
2832     CMTask* the_task = _tasks[i];
2833     MemRegion mr = the_task->aborted_region();
2834     if (mr.start() != NULL) {
2835       assert(mr.end() != NULL, "invariant");
2836       assert(mr.word_size() > 0, "invariant");
2837       return true;
2838     }
2839   }
2840   return false;
2841 }
2842 
2843 void ConcurrentMark::oops_do(OopClosure* cl) {
2844   if (_markStack.size() > 0 && verbose_low()) {
2845     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2846                            "size = %d", _markStack.size());
2847   }
2848   // we first iterate over the contents of the mark stack...
2849   _markStack.oops_do(cl);
2850 
2851   for (int i = 0; i < (int)_max_task_num; ++i) {
2852     OopTaskQueue* queue = _task_queues->queue((int)i);
2853 
2854     if (queue->size() > 0 && verbose_low()) {
2855       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2856                              "size = %d", i, queue->size());
2857     }
2858 
2859     // ...then over the contents of the all the task queues.
2860     queue->oops_do(cl);
2861   }
2862 
2863   // Invalidate any entries, that are in the region stack, that
2864   // point into the collection set
2865   if (_regionStack.invalidate_entries_into_cset()) {
2866     // otherwise, any gray objects copied during the evacuation pause
2867     // might not be visited.
2868     assert(_should_gray_objects, "invariant");
2869   }
2870 
2871   // Invalidate any aborted regions, recorded in the individual CM
2872   // tasks, that point into the collection set.
2873   if (invalidate_aborted_regions_in_cset()) {
2874     // otherwise, any gray objects copied during the evacuation pause
2875     // might not be visited.
2876     assert(_should_gray_objects, "invariant");
2877   }
2878 
2879 }
2880 
2881 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2882   _markStack.setEmpty();
2883   _markStack.clear_overflow();
2884   _regionStack.setEmpty();
2885   _regionStack.clear_overflow();
2886   if (clear_overflow) {
2887     clear_has_overflown();
2888   } else {
2889     assert(has_overflown(), "pre-condition");
2890   }
2891   _finger = _heap_start;
2892 
2893   for (int i = 0; i < (int)_max_task_num; ++i) {
2894     OopTaskQueue* queue = _task_queues->queue(i);
2895     queue->set_empty();
2896     // Clear any partial regions from the CMTasks
2897     _tasks[i]->clear_aborted_region();
2898   }
2899 }
2900 
2901 void ConcurrentMark::print_stats() {
2902   if (verbose_stats()) {
2903     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2904     for (size_t i = 0; i < _active_tasks; ++i) {
2905       _tasks[i]->print_stats();
2906       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2907     }
2908   }
2909 }
2910 
2911 // Closures used by ConcurrentMark::complete_marking_in_collection_set().
2912 
2913 class CSetMarkOopClosure: public OopClosure {
2914   friend class CSetMarkBitMapClosure;
2915 
2916   G1CollectedHeap* _g1h;
2917   CMBitMap*        _bm;
2918   ConcurrentMark*  _cm;
2919   oop*             _ms;
2920   jint*            _array_ind_stack;
2921   int              _ms_size;
2922   int              _ms_ind;
2923   int              _array_increment;
2924   int              _worker_i;
2925 
2926   bool push(oop obj, int arr_ind = 0) {
2927     if (_ms_ind == _ms_size) {
2928       gclog_or_tty->print_cr("Mark stack is full.");
2929       return false;
2930     }
2931     _ms[_ms_ind] = obj;
2932     if (obj->is_objArray()) {
2933       _array_ind_stack[_ms_ind] = arr_ind;
2934     }
2935     _ms_ind++;
2936     return true;
2937   }
2938 
2939   oop pop() {
2940     if (_ms_ind == 0) {
2941       return NULL;
2942     } else {
2943       _ms_ind--;
2944       return _ms[_ms_ind];
2945     }
2946   }
2947 
2948   template <class T> bool drain() {
2949     while (_ms_ind > 0) {
2950       oop obj = pop();
2951       assert(obj != NULL, "Since index was non-zero.");
2952       if (obj->is_objArray()) {
2953         jint arr_ind = _array_ind_stack[_ms_ind];
2954         objArrayOop aobj = objArrayOop(obj);
2955         jint len = aobj->length();
2956         jint next_arr_ind = arr_ind + _array_increment;
2957         if (next_arr_ind < len) {
2958           push(obj, next_arr_ind);
2959         }
2960         // Now process this portion of this one.
2961         int lim = MIN2(next_arr_ind, len);
2962         for (int j = arr_ind; j < lim; j++) {
2963           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2964         }
2965       } else {
2966         obj->oop_iterate(this);
2967       }
2968       if (abort()) return false;
2969     }
2970     return true;
2971   }
2972 
2973 public:
2974   CSetMarkOopClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
2975     _g1h(G1CollectedHeap::heap()),
2976     _cm(cm),
2977     _bm(cm->nextMarkBitMap()),
2978     _ms_size(ms_size), _ms_ind(0),
2979     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2980     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2981     _array_increment(MAX2(ms_size/8, 16)),
2982     _worker_i(worker_i) { }
2983 
2984   ~CSetMarkOopClosure() {
2985     FREE_C_HEAP_ARRAY(oop, _ms);
2986     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2987   }
2988 
2989   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2990   virtual void do_oop(      oop* p) { do_oop_work(p); }
2991 
2992   template <class T> void do_oop_work(T* p) {
2993     T heap_oop = oopDesc::load_heap_oop(p);
2994     if (oopDesc::is_null(heap_oop)) return;
2995     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2996     if (obj->is_forwarded()) {
2997       // If the object has already been forwarded, we have to make sure
2998       // that it's marked.  So follow the forwarding pointer.  Note that
2999       // this does the right thing for self-forwarding pointers in the
3000       // evacuation failure case.
3001       obj = obj->forwardee();
3002     }
3003     HeapRegion* hr = _g1h->heap_region_containing(obj);
3004     if (hr != NULL) {
3005       if (hr->in_collection_set()) {
3006         if (_g1h->is_obj_ill(obj)) {
3007           if (_bm->parMark((HeapWord*)obj)) {
3008             if (!push(obj)) {
3009               gclog_or_tty->print_cr("Setting abort in CSetMarkOopClosure because push failed.");
3010               set_abort();
3011             }
3012           }
3013         }
3014       } else {
3015         // Outside the collection set; we need to gray it
3016         _cm->deal_with_reference(obj);
3017       }
3018     }
3019   }
3020 };
3021 
3022 class CSetMarkBitMapClosure: public BitMapClosure {
3023   G1CollectedHeap*   _g1h;
3024   CMBitMap*          _bitMap;
3025   ConcurrentMark*    _cm;
3026   CSetMarkOopClosure _oop_cl;
3027   int                _worker_i;
3028 
3029 public:
3030   CSetMarkBitMapClosure(ConcurrentMark* cm, int ms_size, int worker_i) :
3031     _g1h(G1CollectedHeap::heap()),
3032     _bitMap(cm->nextMarkBitMap()),
3033     _oop_cl(cm, ms_size, worker_i),
3034     _worker_i(worker_i) { }
3035 
3036   bool do_bit(size_t offset) {
3037     // convert offset into a HeapWord*
3038     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
3039     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
3040            "address out of range");
3041     assert(_bitMap->isMarked(addr), "tautology");
3042     oop obj = oop(addr);
3043     if (!obj->is_forwarded()) {
3044       if (!_oop_cl.push(obj)) return false;
3045       if (UseCompressedOops) {
3046         if (!_oop_cl.drain<narrowOop>()) return false;
3047       } else {
3048         if (!_oop_cl.drain<oop>()) return false;
3049       }
3050     }
3051     // Otherwise...
3052     return true;
3053   }
3054 };
3055 
3056 class CompleteMarkingInCSetHRClosure: public HeapRegionClosure {
3057   CMBitMap*             _bm;
3058   CSetMarkBitMapClosure _bit_cl;
3059   int                   _worker_i;
3060 
3061   enum SomePrivateConstants {
3062     MSSize = 1000
3063   };
3064 
3065 public:
3066   CompleteMarkingInCSetHRClosure(ConcurrentMark* cm, int worker_i) :
3067     _bm(cm->nextMarkBitMap()),
3068     _bit_cl(cm, MSSize, worker_i),
3069     _worker_i(worker_i) { }
3070 
3071   bool doHeapRegion(HeapRegion* hr) {
3072     if (hr->claimHeapRegion(HeapRegion::CompleteMarkCSetClaimValue)) {
3073       // The current worker has successfully claimed the region.
3074       if (!hr->evacuation_failed()) {
3075         MemRegion mr = MemRegion(hr->bottom(), hr->next_top_at_mark_start());
3076         if (!mr.is_empty()) {
3077           bool done = false;
3078           while (!done) {
3079             done = _bm->iterate(&_bit_cl, mr);
3080           }
3081         }
3082       }
3083     }
3084     return false;
3085   }
3086 };
3087 
3088 class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
3089   jint _claim_value;
3090 
3091 public:
3092   SetClaimValuesInCSetHRClosure(jint claim_value) :
3093     _claim_value(claim_value) { }
3094 
3095   bool doHeapRegion(HeapRegion* hr) {
3096     hr->set_claim_value(_claim_value);
3097     return false;
3098   }
3099 };
3100 
3101 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
3102 protected:
3103   G1CollectedHeap* _g1h;
3104   ConcurrentMark*  _cm;
3105 
3106 public:
3107   G1ParCompleteMarkInCSetTask(G1CollectedHeap* g1h,
3108                               ConcurrentMark* cm) :
3109     AbstractGangTask("Complete Mark in CSet"),
3110     _g1h(g1h), _cm(cm) { }
3111 
3112   void work(int worker_i) {
3113     CompleteMarkingInCSetHRClosure cmplt(_cm, worker_i);
3114     HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_i);
3115     _g1h->collection_set_iterate_from(hr, &cmplt);
3116   }
3117 };
3118 
3119 void ConcurrentMark::complete_marking_in_collection_set() {
3120   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3121 
3122   if (!g1h->mark_in_progress()) {
3123     g1h->g1_policy()->record_mark_closure_time(0.0);
3124     return;
3125   }
3126 
3127   double start = os::elapsedTime();
3128   G1ParCompleteMarkInCSetTask complete_mark_task(g1h, this);
3129 
3130   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3131 
3132   if (G1CollectedHeap::use_parallel_gc_threads()) {
3133     int n_workers = g1h->workers()->active_workers();
3134     g1h->set_par_threads(n_workers);
3135     g1h->workers()->run_task(&complete_mark_task);
3136     g1h->set_par_threads(0);
3137   } else {
3138     complete_mark_task.work(0);
3139   }
3140 
3141   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
3142 
3143   // Now reset the claim values in the regions in the collection set.
3144   SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
3145   g1h->collection_set_iterate(&set_cv_cl);
3146 
3147   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
3148 
3149   double end_time = os::elapsedTime();
3150   double elapsed_time_ms = (end_time - start) * 1000.0;
3151   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3152 }
3153 
3154 // The next two methods deal with the following optimisation. Some
3155 // objects are gray by being marked and located above the finger. If
3156 // they are copied, during an evacuation pause, below the finger then
3157 // the need to be pushed on the stack. The observation is that, if
3158 // there are no regions in the collection set located above the
3159 // finger, then the above cannot happen, hence we do not need to
3160 // explicitly gray any objects when copying them to below the
3161 // finger. The global stack will be scanned to ensure that, if it
3162 // points to objects being copied, it will update their
3163 // location. There is a tricky situation with the gray objects in
3164 // region stack that are being coped, however. See the comment in
3165 // newCSet().
3166 
3167 void ConcurrentMark::newCSet() {
3168   if (!concurrent_marking_in_progress()) {
3169     // nothing to do if marking is not in progress
3170     return;
3171   }
3172 
3173   // find what the lowest finger is among the global and local fingers
3174   _min_finger = _finger;
3175   for (int i = 0; i < (int)_max_task_num; ++i) {
3176     CMTask* task = _tasks[i];
3177     HeapWord* task_finger = task->finger();
3178     if (task_finger != NULL && task_finger < _min_finger) {
3179       _min_finger = task_finger;
3180     }
3181   }
3182 
3183   _should_gray_objects = false;
3184 
3185   // This fixes a very subtle and fustrating bug. It might be the case
3186   // that, during en evacuation pause, heap regions that contain
3187   // objects that are gray (by being in regions contained in the
3188   // region stack) are included in the collection set. Since such gray
3189   // objects will be moved, and because it's not easy to redirect
3190   // region stack entries to point to a new location (because objects
3191   // in one region might be scattered to multiple regions after they
3192   // are copied), one option is to ensure that all marked objects
3193   // copied during a pause are pushed on the stack. Notice, however,
3194   // that this problem can only happen when the region stack is not
3195   // empty during an evacuation pause. So, we make the fix a bit less
3196   // conservative and ensure that regions are pushed on the stack,
3197   // irrespective whether all collection set regions are below the
3198   // finger, if the region stack is not empty. This is expected to be
3199   // a rare case, so I don't think it's necessary to be smarted about it.
3200   if (!region_stack_empty() || has_aborted_regions()) {
3201     _should_gray_objects = true;
3202   }
3203 }
3204 
3205 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3206   if (!concurrent_marking_in_progress()) return;
3207 
3208   HeapWord* region_end = hr->end();
3209   if (region_end > _min_finger) {
3210     _should_gray_objects = true;
3211   }
3212 }
3213 
3214 // Resets the region fields of active CMTasks whose values point
3215 // into the collection set.
3216 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3217   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3218   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3219 
3220   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3221     CMTask* task = _tasks[i];
3222     HeapWord* task_finger = task->finger();
3223     if (task_finger != NULL) {
3224       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3225       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3226       if (finger_region->in_collection_set()) {
3227         // The task's current region is in the collection set.
3228         // This region will be evacuated in the current GC and
3229         // the region fields in the task will be stale.
3230         task->giveup_current_region();
3231       }
3232     }
3233   }
3234 }
3235 
3236 // abandon current marking iteration due to a Full GC
3237 void ConcurrentMark::abort() {
3238   // Clear all marks to force marking thread to do nothing
3239   _nextMarkBitMap->clearAll();
3240   // Empty mark stack
3241   clear_marking_state();
3242   for (int i = 0; i < (int)_max_task_num; ++i) {
3243     _tasks[i]->clear_region_fields();
3244   }
3245   _has_aborted = true;
3246 
3247   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3248   satb_mq_set.abandon_partial_marking();
3249   // This can be called either during or outside marking, we'll read
3250   // the expected_active value from the SATB queue set.
3251   satb_mq_set.set_active_all_threads(
3252                                  false, /* new active value */
3253                                  satb_mq_set.is_active() /* expected_active */);
3254 }
3255 
3256 static void print_ms_time_info(const char* prefix, const char* name,
3257                                NumberSeq& ns) {
3258   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3259                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3260   if (ns.num() > 0) {
3261     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3262                            prefix, ns.sd(), ns.maximum());
3263   }
3264 }
3265 
3266 void ConcurrentMark::print_summary_info() {
3267   gclog_or_tty->print_cr(" Concurrent marking:");
3268   print_ms_time_info("  ", "init marks", _init_times);
3269   print_ms_time_info("  ", "remarks", _remark_times);
3270   {
3271     print_ms_time_info("     ", "final marks", _remark_mark_times);
3272     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3273 
3274   }
3275   print_ms_time_info("  ", "cleanups", _cleanup_times);
3276   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3277                          _total_counting_time,
3278                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3279                           (double)_cleanup_times.num()
3280                          : 0.0));
3281   if (G1ScrubRemSets) {
3282     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3283                            _total_rs_scrub_time,
3284                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3285                             (double)_cleanup_times.num()
3286                            : 0.0));
3287   }
3288   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3289                          (_init_times.sum() + _remark_times.sum() +
3290                           _cleanup_times.sum())/1000.0);
3291   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3292                 "(%8.2f s marking, %8.2f s counting).",
3293                 cmThread()->vtime_accum(),
3294                 cmThread()->vtime_mark_accum(),
3295                 cmThread()->vtime_count_accum());
3296 }
3297 
3298 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3299   _parallel_workers->print_worker_threads_on(st);
3300 }
3301 
3302 // Closures
3303 // XXX: there seems to be a lot of code  duplication here;
3304 // should refactor and consolidate the shared code.
3305 
3306 // This closure is used to mark refs into the CMS generation in
3307 // the CMS bit map. Called at the first checkpoint.
3308 
3309 // We take a break if someone is trying to stop the world.
3310 bool ConcurrentMark::do_yield_check(int worker_i) {
3311   if (should_yield()) {
3312     if (worker_i == 0) {
3313       _g1h->g1_policy()->record_concurrent_pause();
3314     }
3315     cmThread()->yield();
3316     if (worker_i == 0) {
3317       _g1h->g1_policy()->record_concurrent_pause_end();
3318     }
3319     return true;
3320   } else {
3321     return false;
3322   }
3323 }
3324 
3325 bool ConcurrentMark::should_yield() {
3326   return cmThread()->should_yield();
3327 }
3328 
3329 bool ConcurrentMark::containing_card_is_marked(void* p) {
3330   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3331   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3332 }
3333 
3334 bool ConcurrentMark::containing_cards_are_marked(void* start,
3335                                                  void* last) {
3336   return containing_card_is_marked(start) &&
3337          containing_card_is_marked(last);
3338 }
3339 
3340 #ifndef PRODUCT
3341 // for debugging purposes
3342 void ConcurrentMark::print_finger() {
3343   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3344                          _heap_start, _heap_end, _finger);
3345   for (int i = 0; i < (int) _max_task_num; ++i) {
3346     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3347   }
3348   gclog_or_tty->print_cr("");
3349 }
3350 #endif
3351 
3352 void CMTask::scan_object(oop obj) {
3353   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3354 
3355   if (_cm->verbose_high()) {
3356     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3357                            _task_id, (void*) obj);
3358   }
3359 
3360   size_t obj_size = obj->size();
3361   _words_scanned += obj_size;
3362 
3363   obj->oop_iterate(_cm_oop_closure);
3364   statsOnly( ++_objs_scanned );
3365   check_limits();
3366 }
3367 
3368 // Closure for iteration over bitmaps
3369 class CMBitMapClosure : public BitMapClosure {
3370 private:
3371   // the bitmap that is being iterated over
3372   CMBitMap*                   _nextMarkBitMap;
3373   ConcurrentMark*             _cm;
3374   CMTask*                     _task;
3375   // true if we're scanning a heap region claimed by the task (so that
3376   // we move the finger along), false if we're not, i.e. currently when
3377   // scanning a heap region popped from the region stack (so that we
3378   // do not move the task finger along; it'd be a mistake if we did so).
3379   bool                        _scanning_heap_region;
3380 
3381 public:
3382   CMBitMapClosure(CMTask *task,
3383                   ConcurrentMark* cm,
3384                   CMBitMap* nextMarkBitMap)
3385     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3386 
3387   void set_scanning_heap_region(bool scanning_heap_region) {
3388     _scanning_heap_region = scanning_heap_region;
3389   }
3390 
3391   bool do_bit(size_t offset) {
3392     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3393     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3394     assert( addr < _cm->finger(), "invariant");
3395 
3396     if (_scanning_heap_region) {
3397       statsOnly( _task->increase_objs_found_on_bitmap() );
3398       assert(addr >= _task->finger(), "invariant");
3399       // We move that task's local finger along.
3400       _task->move_finger_to(addr);
3401     } else {
3402       // We move the task's region finger along.
3403       _task->move_region_finger_to(addr);
3404     }
3405 
3406     _task->scan_object(oop(addr));
3407     // we only partially drain the local queue and global stack
3408     _task->drain_local_queue(true);
3409     _task->drain_global_stack(true);
3410 
3411     // if the has_aborted flag has been raised, we need to bail out of
3412     // the iteration
3413     return !_task->has_aborted();
3414   }
3415 };
3416 
3417 // Closure for iterating over objects, currently only used for
3418 // processing SATB buffers.
3419 class CMObjectClosure : public ObjectClosure {
3420 private:
3421   CMTask* _task;
3422 
3423 public:
3424   void do_object(oop obj) {
3425     _task->deal_with_reference(obj);
3426   }
3427 
3428   CMObjectClosure(CMTask* task) : _task(task) { }
3429 };
3430 
3431 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3432                                ConcurrentMark* cm,
3433                                CMTask* task)
3434   : _g1h(g1h), _cm(cm), _task(task) {
3435   assert(_ref_processor == NULL, "should be initialized to NULL");
3436 
3437   if (G1UseConcMarkReferenceProcessing) {
3438     _ref_processor = g1h->ref_processor_cm();
3439     assert(_ref_processor != NULL, "should not be NULL");
3440   }
3441 }
3442 
3443 void CMTask::setup_for_region(HeapRegion* hr) {
3444   // Separated the asserts so that we know which one fires.
3445   assert(hr != NULL,
3446         "claim_region() should have filtered out continues humongous regions");
3447   assert(!hr->continuesHumongous(),
3448         "claim_region() should have filtered out continues humongous regions");
3449 
3450   if (_cm->verbose_low()) {
3451     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3452                            _task_id, hr);
3453   }
3454 
3455   _curr_region  = hr;
3456   _finger       = hr->bottom();
3457   update_region_limit();
3458 }
3459 
3460 void CMTask::update_region_limit() {
3461   HeapRegion* hr            = _curr_region;
3462   HeapWord* bottom          = hr->bottom();
3463   HeapWord* limit           = hr->next_top_at_mark_start();
3464 
3465   if (limit == bottom) {
3466     if (_cm->verbose_low()) {
3467       gclog_or_tty->print_cr("[%d] found an empty region "
3468                              "["PTR_FORMAT", "PTR_FORMAT")",
3469                              _task_id, bottom, limit);
3470     }
3471     // The region was collected underneath our feet.
3472     // We set the finger to bottom to ensure that the bitmap
3473     // iteration that will follow this will not do anything.
3474     // (this is not a condition that holds when we set the region up,
3475     // as the region is not supposed to be empty in the first place)
3476     _finger = bottom;
3477   } else if (limit >= _region_limit) {
3478     assert(limit >= _finger, "peace of mind");
3479   } else {
3480     assert(limit < _region_limit, "only way to get here");
3481     // This can happen under some pretty unusual circumstances.  An
3482     // evacuation pause empties the region underneath our feet (NTAMS
3483     // at bottom). We then do some allocation in the region (NTAMS
3484     // stays at bottom), followed by the region being used as a GC
3485     // alloc region (NTAMS will move to top() and the objects
3486     // originally below it will be grayed). All objects now marked in
3487     // the region are explicitly grayed, if below the global finger,
3488     // and we do not need in fact to scan anything else. So, we simply
3489     // set _finger to be limit to ensure that the bitmap iteration
3490     // doesn't do anything.
3491     _finger = limit;
3492   }
3493 
3494   _region_limit = limit;
3495 }
3496 
3497 void CMTask::giveup_current_region() {
3498   assert(_curr_region != NULL, "invariant");
3499   if (_cm->verbose_low()) {
3500     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3501                            _task_id, _curr_region);
3502   }
3503   clear_region_fields();
3504 }
3505 
3506 void CMTask::clear_region_fields() {
3507   // Values for these three fields that indicate that we're not
3508   // holding on to a region.
3509   _curr_region   = NULL;
3510   _finger        = NULL;
3511   _region_limit  = NULL;
3512 
3513   _region_finger = NULL;
3514 }
3515 
3516 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3517   if (cm_oop_closure == NULL) {
3518     assert(_cm_oop_closure != NULL, "invariant");
3519   } else {
3520     assert(_cm_oop_closure == NULL, "invariant");
3521   }
3522   _cm_oop_closure = cm_oop_closure;
3523 }
3524 
3525 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3526   guarantee(nextMarkBitMap != NULL, "invariant");
3527 
3528   if (_cm->verbose_low()) {
3529     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3530   }
3531 
3532   _nextMarkBitMap                = nextMarkBitMap;
3533   clear_region_fields();
3534   assert(_aborted_region.is_empty(), "should have been cleared");
3535 
3536   _calls                         = 0;
3537   _elapsed_time_ms               = 0.0;
3538   _termination_time_ms           = 0.0;
3539   _termination_start_time_ms     = 0.0;
3540 
3541 #if _MARKING_STATS_
3542   _local_pushes                  = 0;
3543   _local_pops                    = 0;
3544   _local_max_size                = 0;
3545   _objs_scanned                  = 0;
3546   _global_pushes                 = 0;
3547   _global_pops                   = 0;
3548   _global_max_size               = 0;
3549   _global_transfers_to           = 0;
3550   _global_transfers_from         = 0;
3551   _region_stack_pops             = 0;
3552   _regions_claimed               = 0;
3553   _objs_found_on_bitmap          = 0;
3554   _satb_buffers_processed        = 0;
3555   _steal_attempts                = 0;
3556   _steals                        = 0;
3557   _aborted                       = 0;
3558   _aborted_overflow              = 0;
3559   _aborted_cm_aborted            = 0;
3560   _aborted_yield                 = 0;
3561   _aborted_timed_out             = 0;
3562   _aborted_satb                  = 0;
3563   _aborted_termination           = 0;
3564 #endif // _MARKING_STATS_
3565 }
3566 
3567 bool CMTask::should_exit_termination() {
3568   regular_clock_call();
3569   // This is called when we are in the termination protocol. We should
3570   // quit if, for some reason, this task wants to abort or the global
3571   // stack is not empty (this means that we can get work from it).
3572   return !_cm->mark_stack_empty() || has_aborted();
3573 }
3574 
3575 void CMTask::reached_limit() {
3576   assert(_words_scanned >= _words_scanned_limit ||
3577          _refs_reached >= _refs_reached_limit ,
3578          "shouldn't have been called otherwise");
3579   regular_clock_call();
3580 }
3581 
3582 void CMTask::regular_clock_call() {
3583   if (has_aborted()) return;
3584 
3585   // First, we need to recalculate the words scanned and refs reached
3586   // limits for the next clock call.
3587   recalculate_limits();
3588 
3589   // During the regular clock call we do the following
3590 
3591   // (1) If an overflow has been flagged, then we abort.
3592   if (_cm->has_overflown()) {
3593     set_has_aborted();
3594     return;
3595   }
3596 
3597   // If we are not concurrent (i.e. we're doing remark) we don't need
3598   // to check anything else. The other steps are only needed during
3599   // the concurrent marking phase.
3600   if (!concurrent()) return;
3601 
3602   // (2) If marking has been aborted for Full GC, then we also abort.
3603   if (_cm->has_aborted()) {
3604     set_has_aborted();
3605     statsOnly( ++_aborted_cm_aborted );
3606     return;
3607   }
3608 
3609   double curr_time_ms = os::elapsedVTime() * 1000.0;
3610 
3611   // (3) If marking stats are enabled, then we update the step history.
3612 #if _MARKING_STATS_
3613   if (_words_scanned >= _words_scanned_limit) {
3614     ++_clock_due_to_scanning;
3615   }
3616   if (_refs_reached >= _refs_reached_limit) {
3617     ++_clock_due_to_marking;
3618   }
3619 
3620   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3621   _interval_start_time_ms = curr_time_ms;
3622   _all_clock_intervals_ms.add(last_interval_ms);
3623 
3624   if (_cm->verbose_medium()) {
3625       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3626                         "scanned = %d%s, refs reached = %d%s",
3627                         _task_id, last_interval_ms,
3628                         _words_scanned,
3629                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3630                         _refs_reached,
3631                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3632   }
3633 #endif // _MARKING_STATS_
3634 
3635   // (4) We check whether we should yield. If we have to, then we abort.
3636   if (_cm->should_yield()) {
3637     // We should yield. To do this we abort the task. The caller is
3638     // responsible for yielding.
3639     set_has_aborted();
3640     statsOnly( ++_aborted_yield );
3641     return;
3642   }
3643 
3644   // (5) We check whether we've reached our time quota. If we have,
3645   // then we abort.
3646   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3647   if (elapsed_time_ms > _time_target_ms) {
3648     set_has_aborted();
3649     _has_timed_out = true;
3650     statsOnly( ++_aborted_timed_out );
3651     return;
3652   }
3653 
3654   // (6) Finally, we check whether there are enough completed STAB
3655   // buffers available for processing. If there are, we abort.
3656   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3657   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3658     if (_cm->verbose_low()) {
3659       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3660                              _task_id);
3661     }
3662     // we do need to process SATB buffers, we'll abort and restart
3663     // the marking task to do so
3664     set_has_aborted();
3665     statsOnly( ++_aborted_satb );
3666     return;
3667   }
3668 }
3669 
3670 void CMTask::recalculate_limits() {
3671   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3672   _words_scanned_limit      = _real_words_scanned_limit;
3673 
3674   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3675   _refs_reached_limit       = _real_refs_reached_limit;
3676 }
3677 
3678 void CMTask::decrease_limits() {
3679   // This is called when we believe that we're going to do an infrequent
3680   // operation which will increase the per byte scanned cost (i.e. move
3681   // entries to/from the global stack). It basically tries to decrease the
3682   // scanning limit so that the clock is called earlier.
3683 
3684   if (_cm->verbose_medium()) {
3685     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3686   }
3687 
3688   _words_scanned_limit = _real_words_scanned_limit -
3689     3 * words_scanned_period / 4;
3690   _refs_reached_limit  = _real_refs_reached_limit -
3691     3 * refs_reached_period / 4;
3692 }
3693 
3694 void CMTask::move_entries_to_global_stack() {
3695   // local array where we'll store the entries that will be popped
3696   // from the local queue
3697   oop buffer[global_stack_transfer_size];
3698 
3699   int n = 0;
3700   oop obj;
3701   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3702     buffer[n] = obj;
3703     ++n;
3704   }
3705 
3706   if (n > 0) {
3707     // we popped at least one entry from the local queue
3708 
3709     statsOnly( ++_global_transfers_to; _local_pops += n );
3710 
3711     if (!_cm->mark_stack_push(buffer, n)) {
3712       if (_cm->verbose_low()) {
3713         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3714                                _task_id);
3715       }
3716       set_has_aborted();
3717     } else {
3718       // the transfer was successful
3719 
3720       if (_cm->verbose_medium()) {
3721         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3722                                _task_id, n);
3723       }
3724       statsOnly( int tmp_size = _cm->mark_stack_size();
3725                  if (tmp_size > _global_max_size) {
3726                    _global_max_size = tmp_size;
3727                  }
3728                  _global_pushes += n );
3729     }
3730   }
3731 
3732   // this operation was quite expensive, so decrease the limits
3733   decrease_limits();
3734 }
3735 
3736 void CMTask::get_entries_from_global_stack() {
3737   // local array where we'll store the entries that will be popped
3738   // from the global stack.
3739   oop buffer[global_stack_transfer_size];
3740   int n;
3741   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3742   assert(n <= global_stack_transfer_size,
3743          "we should not pop more than the given limit");
3744   if (n > 0) {
3745     // yes, we did actually pop at least one entry
3746 
3747     statsOnly( ++_global_transfers_from; _global_pops += n );
3748     if (_cm->verbose_medium()) {
3749       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3750                              _task_id, n);
3751     }
3752     for (int i = 0; i < n; ++i) {
3753       bool success = _task_queue->push(buffer[i]);
3754       // We only call this when the local queue is empty or under a
3755       // given target limit. So, we do not expect this push to fail.
3756       assert(success, "invariant");
3757     }
3758 
3759     statsOnly( int tmp_size = _task_queue->size();
3760                if (tmp_size > _local_max_size) {
3761                  _local_max_size = tmp_size;
3762                }
3763                _local_pushes += n );
3764   }
3765 
3766   // this operation was quite expensive, so decrease the limits
3767   decrease_limits();
3768 }
3769 
3770 void CMTask::drain_local_queue(bool partially) {
3771   if (has_aborted()) return;
3772 
3773   // Decide what the target size is, depending whether we're going to
3774   // drain it partially (so that other tasks can steal if they run out
3775   // of things to do) or totally (at the very end).
3776   size_t target_size;
3777   if (partially) {
3778     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3779   } else {
3780     target_size = 0;
3781   }
3782 
3783   if (_task_queue->size() > target_size) {
3784     if (_cm->verbose_high()) {
3785       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3786                              _task_id, target_size);
3787     }
3788 
3789     oop obj;
3790     bool ret = _task_queue->pop_local(obj);
3791     while (ret) {
3792       statsOnly( ++_local_pops );
3793 
3794       if (_cm->verbose_high()) {
3795         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3796                                (void*) obj);
3797       }
3798 
3799       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3800       assert(!_g1h->is_on_master_free_list(
3801                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3802 
3803       scan_object(obj);
3804 
3805       if (_task_queue->size() <= target_size || has_aborted()) {
3806         ret = false;
3807       } else {
3808         ret = _task_queue->pop_local(obj);
3809       }
3810     }
3811 
3812     if (_cm->verbose_high()) {
3813       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3814                              _task_id, _task_queue->size());
3815     }
3816   }
3817 }
3818 
3819 void CMTask::drain_global_stack(bool partially) {
3820   if (has_aborted()) return;
3821 
3822   // We have a policy to drain the local queue before we attempt to
3823   // drain the global stack.
3824   assert(partially || _task_queue->size() == 0, "invariant");
3825 
3826   // Decide what the target size is, depending whether we're going to
3827   // drain it partially (so that other tasks can steal if they run out
3828   // of things to do) or totally (at the very end).  Notice that,
3829   // because we move entries from the global stack in chunks or
3830   // because another task might be doing the same, we might in fact
3831   // drop below the target. But, this is not a problem.
3832   size_t target_size;
3833   if (partially) {
3834     target_size = _cm->partial_mark_stack_size_target();
3835   } else {
3836     target_size = 0;
3837   }
3838 
3839   if (_cm->mark_stack_size() > target_size) {
3840     if (_cm->verbose_low()) {
3841       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3842                              _task_id, target_size);
3843     }
3844 
3845     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3846       get_entries_from_global_stack();
3847       drain_local_queue(partially);
3848     }
3849 
3850     if (_cm->verbose_low()) {
3851       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3852                              _task_id, _cm->mark_stack_size());
3853     }
3854   }
3855 }
3856 
3857 // SATB Queue has several assumptions on whether to call the par or
3858 // non-par versions of the methods. this is why some of the code is
3859 // replicated. We should really get rid of the single-threaded version
3860 // of the code to simplify things.
3861 void CMTask::drain_satb_buffers() {
3862   if (has_aborted()) return;
3863 
3864   // We set this so that the regular clock knows that we're in the
3865   // middle of draining buffers and doesn't set the abort flag when it
3866   // notices that SATB buffers are available for draining. It'd be
3867   // very counter productive if it did that. :-)
3868   _draining_satb_buffers = true;
3869 
3870   CMObjectClosure oc(this);
3871   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3872   if (G1CollectedHeap::use_parallel_gc_threads()) {
3873     satb_mq_set.set_par_closure(_task_id, &oc);
3874   } else {
3875     satb_mq_set.set_closure(&oc);
3876   }
3877 
3878   // This keeps claiming and applying the closure to completed buffers
3879   // until we run out of buffers or we need to abort.
3880   if (G1CollectedHeap::use_parallel_gc_threads()) {
3881     while (!has_aborted() &&
3882            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3883       if (_cm->verbose_medium()) {
3884         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3885       }
3886       statsOnly( ++_satb_buffers_processed );
3887       regular_clock_call();
3888     }
3889   } else {
3890     while (!has_aborted() &&
3891            satb_mq_set.apply_closure_to_completed_buffer()) {
3892       if (_cm->verbose_medium()) {
3893         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3894       }
3895       statsOnly( ++_satb_buffers_processed );
3896       regular_clock_call();
3897     }
3898   }
3899 
3900   if (!concurrent() && !has_aborted()) {
3901     // We should only do this during remark.
3902     if (G1CollectedHeap::use_parallel_gc_threads()) {
3903       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3904     } else {
3905       satb_mq_set.iterate_closure_all_threads();
3906     }
3907   }
3908 
3909   _draining_satb_buffers = false;
3910 
3911   assert(has_aborted() ||
3912          concurrent() ||
3913          satb_mq_set.completed_buffers_num() == 0, "invariant");
3914 
3915   if (G1CollectedHeap::use_parallel_gc_threads()) {
3916     satb_mq_set.set_par_closure(_task_id, NULL);
3917   } else {
3918     satb_mq_set.set_closure(NULL);
3919   }
3920 
3921   // again, this was a potentially expensive operation, decrease the
3922   // limits to get the regular clock call early
3923   decrease_limits();
3924 }
3925 
3926 void CMTask::drain_region_stack(BitMapClosure* bc) {
3927   if (has_aborted()) return;
3928 
3929   assert(_region_finger == NULL,
3930          "it should be NULL when we're not scanning a region");
3931 
3932   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3933     if (_cm->verbose_low()) {
3934       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3935                              _task_id, _cm->region_stack_size());
3936     }
3937 
3938     MemRegion mr;
3939 
3940     if (!_aborted_region.is_empty()) {
3941       mr = _aborted_region;
3942       _aborted_region = MemRegion();
3943 
3944       if (_cm->verbose_low()) {
3945         gclog_or_tty->print_cr("[%d] scanning aborted region "
3946                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3947                                _task_id, mr.start(), mr.end());
3948       }
3949     } else {
3950       mr = _cm->region_stack_pop_lock_free();
3951       // it returns MemRegion() if the pop fails
3952       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3953     }
3954 
3955     while (mr.start() != NULL) {
3956       if (_cm->verbose_medium()) {
3957         gclog_or_tty->print_cr("[%d] we are scanning region "
3958                                "["PTR_FORMAT", "PTR_FORMAT")",
3959                                _task_id, mr.start(), mr.end());
3960       }
3961 
3962       assert(mr.end() <= _cm->finger(),
3963              "otherwise the region shouldn't be on the stack");
3964       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3965       if (_nextMarkBitMap->iterate(bc, mr)) {
3966         assert(!has_aborted(),
3967                "cannot abort the task without aborting the bitmap iteration");
3968 
3969         // We finished iterating over the region without aborting.
3970         regular_clock_call();
3971         if (has_aborted()) {
3972           mr = MemRegion();
3973         } else {
3974           mr = _cm->region_stack_pop_lock_free();
3975           // it returns MemRegion() if the pop fails
3976           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3977         }
3978       } else {
3979         assert(has_aborted(), "currently the only way to do so");
3980 
3981         // The only way to abort the bitmap iteration is to return
3982         // false from the do_bit() method. However, inside the
3983         // do_bit() method we move the _region_finger to point to the
3984         // object currently being looked at. So, if we bail out, we
3985         // have definitely set _region_finger to something non-null.
3986         assert(_region_finger != NULL, "invariant");
3987 
3988         // Make sure that any previously aborted region has been
3989         // cleared.
3990         assert(_aborted_region.is_empty(), "aborted region not cleared");
3991 
3992         // The iteration was actually aborted. So now _region_finger
3993         // points to the address of the object we last scanned. If we
3994         // leave it there, when we restart this task, we will rescan
3995         // the object. It is easy to avoid this. We move the finger by
3996         // enough to point to the next possible object header (the
3997         // bitmap knows by how much we need to move it as it knows its
3998         // granularity).
3999         MemRegion newRegion =
4000           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
4001 
4002         if (!newRegion.is_empty()) {
4003           if (_cm->verbose_low()) {
4004             gclog_or_tty->print_cr("[%d] recording unscanned region"
4005                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
4006                                    _task_id,
4007                                    newRegion.start(), newRegion.end());
4008           }
4009           // Now record the part of the region we didn't scan to
4010           // make sure this task scans it later.
4011           _aborted_region = newRegion;
4012         }
4013         // break from while
4014         mr = MemRegion();
4015       }
4016       _region_finger = NULL;
4017     }
4018 
4019     if (_cm->verbose_low()) {
4020       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
4021                              _task_id, _cm->region_stack_size());
4022     }
4023   }
4024 }
4025 
4026 void CMTask::print_stats() {
4027   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
4028                          _task_id, _calls);
4029   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4030                          _elapsed_time_ms, _termination_time_ms);
4031   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4032                          _step_times_ms.num(), _step_times_ms.avg(),
4033                          _step_times_ms.sd());
4034   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4035                          _step_times_ms.maximum(), _step_times_ms.sum());
4036 
4037 #if _MARKING_STATS_
4038   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4039                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4040                          _all_clock_intervals_ms.sd());
4041   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4042                          _all_clock_intervals_ms.maximum(),
4043                          _all_clock_intervals_ms.sum());
4044   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4045                          _clock_due_to_scanning, _clock_due_to_marking);
4046   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4047                          _objs_scanned, _objs_found_on_bitmap);
4048   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4049                          _local_pushes, _local_pops, _local_max_size);
4050   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4051                          _global_pushes, _global_pops, _global_max_size);
4052   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4053                          _global_transfers_to,_global_transfers_from);
4054   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
4055                          _regions_claimed, _region_stack_pops);
4056   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4057   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4058                          _steal_attempts, _steals);
4059   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4060   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4061                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4062   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4063                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4064 #endif // _MARKING_STATS_
4065 }
4066 
4067 /*****************************************************************************
4068 
4069     The do_marking_step(time_target_ms) method is the building block
4070     of the parallel marking framework. It can be called in parallel
4071     with other invocations of do_marking_step() on different tasks
4072     (but only one per task, obviously) and concurrently with the
4073     mutator threads, or during remark, hence it eliminates the need
4074     for two versions of the code. When called during remark, it will
4075     pick up from where the task left off during the concurrent marking
4076     phase. Interestingly, tasks are also claimable during evacuation
4077     pauses too, since do_marking_step() ensures that it aborts before
4078     it needs to yield.
4079 
4080     The data structures that is uses to do marking work are the
4081     following:
4082 
4083       (1) Marking Bitmap. If there are gray objects that appear only
4084       on the bitmap (this happens either when dealing with an overflow
4085       or when the initial marking phase has simply marked the roots
4086       and didn't push them on the stack), then tasks claim heap
4087       regions whose bitmap they then scan to find gray objects. A
4088       global finger indicates where the end of the last claimed region
4089       is. A local finger indicates how far into the region a task has
4090       scanned. The two fingers are used to determine how to gray an
4091       object (i.e. whether simply marking it is OK, as it will be
4092       visited by a task in the future, or whether it needs to be also
4093       pushed on a stack).
4094 
4095       (2) Local Queue. The local queue of the task which is accessed
4096       reasonably efficiently by the task. Other tasks can steal from
4097       it when they run out of work. Throughout the marking phase, a
4098       task attempts to keep its local queue short but not totally
4099       empty, so that entries are available for stealing by other
4100       tasks. Only when there is no more work, a task will totally
4101       drain its local queue.
4102 
4103       (3) Global Mark Stack. This handles local queue overflow. During
4104       marking only sets of entries are moved between it and the local
4105       queues, as access to it requires a mutex and more fine-grain
4106       interaction with it which might cause contention. If it
4107       overflows, then the marking phase should restart and iterate
4108       over the bitmap to identify gray objects. Throughout the marking
4109       phase, tasks attempt to keep the global mark stack at a small
4110       length but not totally empty, so that entries are available for
4111       popping by other tasks. Only when there is no more work, tasks
4112       will totally drain the global mark stack.
4113 
4114       (4) Global Region Stack. Entries on it correspond to areas of
4115       the bitmap that need to be scanned since they contain gray
4116       objects. Pushes on the region stack only happen during
4117       evacuation pauses and typically correspond to areas covered by
4118       GC LABS. If it overflows, then the marking phase should restart
4119       and iterate over the bitmap to identify gray objects. Tasks will
4120       try to totally drain the region stack as soon as possible.
4121 
4122       (5) SATB Buffer Queue. This is where completed SATB buffers are
4123       made available. Buffers are regularly removed from this queue
4124       and scanned for roots, so that the queue doesn't get too
4125       long. During remark, all completed buffers are processed, as
4126       well as the filled in parts of any uncompleted buffers.
4127 
4128     The do_marking_step() method tries to abort when the time target
4129     has been reached. There are a few other cases when the
4130     do_marking_step() method also aborts:
4131 
4132       (1) When the marking phase has been aborted (after a Full GC).
4133 
4134       (2) When a global overflow (either on the global stack or the
4135       region stack) has been triggered. Before the task aborts, it
4136       will actually sync up with the other tasks to ensure that all
4137       the marking data structures (local queues, stacks, fingers etc.)
4138       are re-initialised so that when do_marking_step() completes,
4139       the marking phase can immediately restart.
4140 
4141       (3) When enough completed SATB buffers are available. The
4142       do_marking_step() method only tries to drain SATB buffers right
4143       at the beginning. So, if enough buffers are available, the
4144       marking step aborts and the SATB buffers are processed at
4145       the beginning of the next invocation.
4146 
4147       (4) To yield. when we have to yield then we abort and yield
4148       right at the end of do_marking_step(). This saves us from a lot
4149       of hassle as, by yielding we might allow a Full GC. If this
4150       happens then objects will be compacted underneath our feet, the
4151       heap might shrink, etc. We save checking for this by just
4152       aborting and doing the yield right at the end.
4153 
4154     From the above it follows that the do_marking_step() method should
4155     be called in a loop (or, otherwise, regularly) until it completes.
4156 
4157     If a marking step completes without its has_aborted() flag being
4158     true, it means it has completed the current marking phase (and
4159     also all other marking tasks have done so and have all synced up).
4160 
4161     A method called regular_clock_call() is invoked "regularly" (in
4162     sub ms intervals) throughout marking. It is this clock method that
4163     checks all the abort conditions which were mentioned above and
4164     decides when the task should abort. A work-based scheme is used to
4165     trigger this clock method: when the number of object words the
4166     marking phase has scanned or the number of references the marking
4167     phase has visited reach a given limit. Additional invocations to
4168     the method clock have been planted in a few other strategic places
4169     too. The initial reason for the clock method was to avoid calling
4170     vtime too regularly, as it is quite expensive. So, once it was in
4171     place, it was natural to piggy-back all the other conditions on it
4172     too and not constantly check them throughout the code.
4173 
4174  *****************************************************************************/
4175 
4176 void CMTask::do_marking_step(double time_target_ms,
4177                              bool do_stealing,
4178                              bool do_termination) {
4179   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4180   assert(concurrent() == _cm->concurrent(), "they should be the same");
4181 
4182   assert(concurrent() || _cm->region_stack_empty(),
4183          "the region stack should have been cleared before remark");
4184   assert(concurrent() || !_cm->has_aborted_regions(),
4185          "aborted regions should have been cleared before remark");
4186   assert(_region_finger == NULL,
4187          "this should be non-null only when a region is being scanned");
4188 
4189   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4190   assert(_task_queues != NULL, "invariant");
4191   assert(_task_queue != NULL, "invariant");
4192   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4193 
4194   assert(!_claimed,
4195          "only one thread should claim this task at any one time");
4196 
4197   // OK, this doesn't safeguard again all possible scenarios, as it is
4198   // possible for two threads to set the _claimed flag at the same
4199   // time. But it is only for debugging purposes anyway and it will
4200   // catch most problems.
4201   _claimed = true;
4202 
4203   _start_time_ms = os::elapsedVTime() * 1000.0;
4204   statsOnly( _interval_start_time_ms = _start_time_ms );
4205 
4206   double diff_prediction_ms =
4207     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4208   _time_target_ms = time_target_ms - diff_prediction_ms;
4209 
4210   // set up the variables that are used in the work-based scheme to
4211   // call the regular clock method
4212   _words_scanned = 0;
4213   _refs_reached  = 0;
4214   recalculate_limits();
4215 
4216   // clear all flags
4217   clear_has_aborted();
4218   _has_timed_out = false;
4219   _draining_satb_buffers = false;
4220 
4221   ++_calls;
4222 
4223   if (_cm->verbose_low()) {
4224     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4225                            "target = %1.2lfms >>>>>>>>>>",
4226                            _task_id, _calls, _time_target_ms);
4227   }
4228 
4229   // Set up the bitmap and oop closures. Anything that uses them is
4230   // eventually called from this method, so it is OK to allocate these
4231   // statically.
4232   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4233   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4234   set_cm_oop_closure(&cm_oop_closure);
4235 
4236   if (_cm->has_overflown()) {
4237     // This can happen if the region stack or the mark stack overflows
4238     // during a GC pause and this task, after a yield point,
4239     // restarts. We have to abort as we need to get into the overflow
4240     // protocol which happens right at the end of this task.
4241     set_has_aborted();
4242   }
4243 
4244   // First drain any available SATB buffers. After this, we will not
4245   // look at SATB buffers before the next invocation of this method.
4246   // If enough completed SATB buffers are queued up, the regular clock
4247   // will abort this task so that it restarts.
4248   drain_satb_buffers();
4249   // ...then partially drain the local queue and the global stack
4250   drain_local_queue(true);
4251   drain_global_stack(true);
4252 
4253   // Then totally drain the region stack.  We will not look at
4254   // it again before the next invocation of this method. Entries on
4255   // the region stack are only added during evacuation pauses, for
4256   // which we have to yield. When we do, we abort the task anyway so
4257   // it will look at the region stack again when it restarts.
4258   bitmap_closure.set_scanning_heap_region(false);
4259   drain_region_stack(&bitmap_closure);
4260   // ...then partially drain the local queue and the global stack
4261   drain_local_queue(true);
4262   drain_global_stack(true);
4263 
4264   do {
4265     if (!has_aborted() && _curr_region != NULL) {
4266       // This means that we're already holding on to a region.
4267       assert(_finger != NULL, "if region is not NULL, then the finger "
4268              "should not be NULL either");
4269 
4270       // We might have restarted this task after an evacuation pause
4271       // which might have evacuated the region we're holding on to
4272       // underneath our feet. Let's read its limit again to make sure
4273       // that we do not iterate over a region of the heap that
4274       // contains garbage (update_region_limit() will also move
4275       // _finger to the start of the region if it is found empty).
4276       update_region_limit();
4277       // We will start from _finger not from the start of the region,
4278       // as we might be restarting this task after aborting half-way
4279       // through scanning this region. In this case, _finger points to
4280       // the address where we last found a marked object. If this is a
4281       // fresh region, _finger points to start().
4282       MemRegion mr = MemRegion(_finger, _region_limit);
4283 
4284       if (_cm->verbose_low()) {
4285         gclog_or_tty->print_cr("[%d] we're scanning part "
4286                                "["PTR_FORMAT", "PTR_FORMAT") "
4287                                "of region "PTR_FORMAT,
4288                                _task_id, _finger, _region_limit, _curr_region);
4289       }
4290 
4291       // Let's iterate over the bitmap of the part of the
4292       // region that is left.
4293       bitmap_closure.set_scanning_heap_region(true);
4294       if (mr.is_empty() ||
4295           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4296         // We successfully completed iterating over the region. Now,
4297         // let's give up the region.
4298         giveup_current_region();
4299         regular_clock_call();
4300       } else {
4301         assert(has_aborted(), "currently the only way to do so");
4302         // The only way to abort the bitmap iteration is to return
4303         // false from the do_bit() method. However, inside the
4304         // do_bit() method we move the _finger to point to the
4305         // object currently being looked at. So, if we bail out, we
4306         // have definitely set _finger to something non-null.
4307         assert(_finger != NULL, "invariant");
4308 
4309         // Region iteration was actually aborted. So now _finger
4310         // points to the address of the object we last scanned. If we
4311         // leave it there, when we restart this task, we will rescan
4312         // the object. It is easy to avoid this. We move the finger by
4313         // enough to point to the next possible object header (the
4314         // bitmap knows by how much we need to move it as it knows its
4315         // granularity).
4316         assert(_finger < _region_limit, "invariant");
4317         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4318         // Check if bitmap iteration was aborted while scanning the last object
4319         if (new_finger >= _region_limit) {
4320             giveup_current_region();
4321         } else {
4322             move_finger_to(new_finger);
4323         }
4324       }
4325     }
4326     // At this point we have either completed iterating over the
4327     // region we were holding on to, or we have aborted.
4328 
4329     // We then partially drain the local queue and the global stack.
4330     // (Do we really need this?)
4331     drain_local_queue(true);
4332     drain_global_stack(true);
4333 
4334     // Read the note on the claim_region() method on why it might
4335     // return NULL with potentially more regions available for
4336     // claiming and why we have to check out_of_regions() to determine
4337     // whether we're done or not.
4338     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4339       // We are going to try to claim a new region. We should have
4340       // given up on the previous one.
4341       // Separated the asserts so that we know which one fires.
4342       assert(_curr_region  == NULL, "invariant");
4343       assert(_finger       == NULL, "invariant");
4344       assert(_region_limit == NULL, "invariant");
4345       if (_cm->verbose_low()) {
4346         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4347       }
4348       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4349       if (claimed_region != NULL) {
4350         // Yes, we managed to claim one
4351         statsOnly( ++_regions_claimed );
4352 
4353         if (_cm->verbose_low()) {
4354           gclog_or_tty->print_cr("[%d] we successfully claimed "
4355                                  "region "PTR_FORMAT,
4356                                  _task_id, claimed_region);
4357         }
4358 
4359         setup_for_region(claimed_region);
4360         assert(_curr_region == claimed_region, "invariant");
4361       }
4362       // It is important to call the regular clock here. It might take
4363       // a while to claim a region if, for example, we hit a large
4364       // block of empty regions. So we need to call the regular clock
4365       // method once round the loop to make sure it's called
4366       // frequently enough.
4367       regular_clock_call();
4368     }
4369 
4370     if (!has_aborted() && _curr_region == NULL) {
4371       assert(_cm->out_of_regions(),
4372              "at this point we should be out of regions");
4373     }
4374   } while ( _curr_region != NULL && !has_aborted());
4375 
4376   if (!has_aborted()) {
4377     // We cannot check whether the global stack is empty, since other
4378     // tasks might be pushing objects to it concurrently. We also cannot
4379     // check if the region stack is empty because if a thread is aborting
4380     // it can push a partially done region back.
4381     assert(_cm->out_of_regions(),
4382            "at this point we should be out of regions");
4383 
4384     if (_cm->verbose_low()) {
4385       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4386     }
4387 
4388     // Try to reduce the number of available SATB buffers so that
4389     // remark has less work to do.
4390     drain_satb_buffers();
4391   }
4392 
4393   // Since we've done everything else, we can now totally drain the
4394   // local queue and global stack.
4395   drain_local_queue(false);
4396   drain_global_stack(false);
4397 
4398   // Attempt at work stealing from other task's queues.
4399   if (do_stealing && !has_aborted()) {
4400     // We have not aborted. This means that we have finished all that
4401     // we could. Let's try to do some stealing...
4402 
4403     // We cannot check whether the global stack is empty, since other
4404     // tasks might be pushing objects to it concurrently. We also cannot
4405     // check if the region stack is empty because if a thread is aborting
4406     // it can push a partially done region back.
4407     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4408            "only way to reach here");
4409 
4410     if (_cm->verbose_low()) {
4411       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4412     }
4413 
4414     while (!has_aborted()) {
4415       oop obj;
4416       statsOnly( ++_steal_attempts );
4417 
4418       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4419         if (_cm->verbose_medium()) {
4420           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4421                                  _task_id, (void*) obj);
4422         }
4423 
4424         statsOnly( ++_steals );
4425 
4426         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4427                "any stolen object should be marked");
4428         scan_object(obj);
4429 
4430         // And since we're towards the end, let's totally drain the
4431         // local queue and global stack.
4432         drain_local_queue(false);
4433         drain_global_stack(false);
4434       } else {
4435         break;
4436       }
4437     }
4438   }
4439 
4440   // If we are about to wrap up and go into termination, check if we
4441   // should raise the overflow flag.
4442   if (do_termination && !has_aborted()) {
4443     if (_cm->force_overflow()->should_force()) {
4444       _cm->set_has_overflown();
4445       regular_clock_call();
4446     }
4447   }
4448 
4449   // We still haven't aborted. Now, let's try to get into the
4450   // termination protocol.
4451   if (do_termination && !has_aborted()) {
4452     // We cannot check whether the global stack is empty, since other
4453     // tasks might be concurrently pushing objects on it. We also cannot
4454     // check if the region stack is empty because if a thread is aborting
4455     // it can push a partially done region back.
4456     // Separated the asserts so that we know which one fires.
4457     assert(_cm->out_of_regions(), "only way to reach here");
4458     assert(_task_queue->size() == 0, "only way to reach here");
4459 
4460     if (_cm->verbose_low()) {
4461       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4462     }
4463 
4464     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4465     // The CMTask class also extends the TerminatorTerminator class,
4466     // hence its should_exit_termination() method will also decide
4467     // whether to exit the termination protocol or not.
4468     bool finished = _cm->terminator()->offer_termination(this);
4469     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4470     _termination_time_ms +=
4471       termination_end_time_ms - _termination_start_time_ms;
4472 
4473     if (finished) {
4474       // We're all done.
4475 
4476       if (_task_id == 0) {
4477         // let's allow task 0 to do this
4478         if (concurrent()) {
4479           assert(_cm->concurrent_marking_in_progress(), "invariant");
4480           // we need to set this to false before the next
4481           // safepoint. This way we ensure that the marking phase
4482           // doesn't observe any more heap expansions.
4483           _cm->clear_concurrent_marking_in_progress();
4484         }
4485       }
4486 
4487       // We can now guarantee that the global stack is empty, since
4488       // all other tasks have finished. We separated the guarantees so
4489       // that, if a condition is false, we can immediately find out
4490       // which one.
4491       guarantee(_cm->out_of_regions(), "only way to reach here");
4492       guarantee(_aborted_region.is_empty(), "only way to reach here");
4493       guarantee(_cm->region_stack_empty(), "only way to reach here");
4494       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4495       guarantee(_task_queue->size() == 0, "only way to reach here");
4496       guarantee(!_cm->has_overflown(), "only way to reach here");
4497       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4498       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4499 
4500       if (_cm->verbose_low()) {
4501         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4502       }
4503     } else {
4504       // Apparently there's more work to do. Let's abort this task. It
4505       // will restart it and we can hopefully find more things to do.
4506 
4507       if (_cm->verbose_low()) {
4508         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4509                                _task_id);
4510       }
4511 
4512       set_has_aborted();
4513       statsOnly( ++_aborted_termination );
4514     }
4515   }
4516 
4517   // Mainly for debugging purposes to make sure that a pointer to the
4518   // closure which was statically allocated in this frame doesn't
4519   // escape it by accident.
4520   set_cm_oop_closure(NULL);
4521   double end_time_ms = os::elapsedVTime() * 1000.0;
4522   double elapsed_time_ms = end_time_ms - _start_time_ms;
4523   // Update the step history.
4524   _step_times_ms.add(elapsed_time_ms);
4525 
4526   if (has_aborted()) {
4527     // The task was aborted for some reason.
4528 
4529     statsOnly( ++_aborted );
4530 
4531     if (_has_timed_out) {
4532       double diff_ms = elapsed_time_ms - _time_target_ms;
4533       // Keep statistics of how well we did with respect to hitting
4534       // our target only if we actually timed out (if we aborted for
4535       // other reasons, then the results might get skewed).
4536       _marking_step_diffs_ms.add(diff_ms);
4537     }
4538 
4539     if (_cm->has_overflown()) {
4540       // This is the interesting one. We aborted because a global
4541       // overflow was raised. This means we have to restart the
4542       // marking phase and start iterating over regions. However, in
4543       // order to do this we have to make sure that all tasks stop
4544       // what they are doing and re-initialise in a safe manner. We
4545       // will achieve this with the use of two barrier sync points.
4546 
4547       if (_cm->verbose_low()) {
4548         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4549       }
4550 
4551       _cm->enter_first_sync_barrier(_task_id);
4552       // When we exit this sync barrier we know that all tasks have
4553       // stopped doing marking work. So, it's now safe to
4554       // re-initialise our data structures. At the end of this method,
4555       // task 0 will clear the global data structures.
4556 
4557       statsOnly( ++_aborted_overflow );
4558 
4559       // We clear the local state of this task...
4560       clear_region_fields();
4561 
4562       // ...and enter the second barrier.
4563       _cm->enter_second_sync_barrier(_task_id);
4564       // At this point everything has bee re-initialised and we're
4565       // ready to restart.
4566     }
4567 
4568     if (_cm->verbose_low()) {
4569       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4570                              "elapsed = %1.2lfms <<<<<<<<<<",
4571                              _task_id, _time_target_ms, elapsed_time_ms);
4572       if (_cm->has_aborted()) {
4573         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4574                                _task_id);
4575       }
4576     }
4577   } else {
4578     if (_cm->verbose_low()) {
4579       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4580                              "elapsed = %1.2lfms <<<<<<<<<<",
4581                              _task_id, _time_target_ms, elapsed_time_ms);
4582     }
4583   }
4584 
4585   _claimed = false;
4586 }
4587 
4588 CMTask::CMTask(int task_id,
4589                ConcurrentMark* cm,
4590                CMTaskQueue* task_queue,
4591                CMTaskQueueSet* task_queues)
4592   : _g1h(G1CollectedHeap::heap()),
4593     _task_id(task_id), _cm(cm),
4594     _claimed(false),
4595     _nextMarkBitMap(NULL), _hash_seed(17),
4596     _task_queue(task_queue),
4597     _task_queues(task_queues),
4598     _cm_oop_closure(NULL),
4599     _aborted_region(MemRegion()) {
4600   guarantee(task_queue != NULL, "invariant");
4601   guarantee(task_queues != NULL, "invariant");
4602 
4603   statsOnly( _clock_due_to_scanning = 0;
4604              _clock_due_to_marking  = 0 );
4605 
4606   _marking_step_diffs_ms.add(0.5);
4607 }
4608 
4609 // These are formatting macros that are used below to ensure
4610 // consistent formatting. The *_H_* versions are used to format the
4611 // header for a particular value and they should be kept consistent
4612 // with the corresponding macro. Also note that most of the macros add
4613 // the necessary white space (as a prefix) which makes them a bit
4614 // easier to compose.
4615 
4616 // All the output lines are prefixed with this string to be able to
4617 // identify them easily in a large log file.
4618 #define G1PPRL_LINE_PREFIX            "###"
4619 
4620 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4621 #ifdef _LP64
4622 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4623 #else // _LP64
4624 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4625 #endif // _LP64
4626 
4627 // For per-region info
4628 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4629 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4630 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4631 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4632 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4633 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4634 
4635 // For summary info
4636 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4637 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4638 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4639 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4640 
4641 G1PrintRegionLivenessInfoClosure::
4642 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4643   : _out(out),
4644     _total_used_bytes(0), _total_capacity_bytes(0),
4645     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4646     _hum_used_bytes(0), _hum_capacity_bytes(0),
4647     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4648   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4649   MemRegion g1_committed = g1h->g1_committed();
4650   MemRegion g1_reserved = g1h->g1_reserved();
4651   double now = os::elapsedTime();
4652 
4653   // Print the header of the output.
4654   _out->cr();
4655   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4656   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4657                  G1PPRL_SUM_ADDR_FORMAT("committed")
4658                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4659                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4660                  g1_committed.start(), g1_committed.end(),
4661                  g1_reserved.start(), g1_reserved.end(),
4662                  HeapRegion::GrainBytes);
4663   _out->print_cr(G1PPRL_LINE_PREFIX);
4664   _out->print_cr(G1PPRL_LINE_PREFIX
4665                  G1PPRL_TYPE_H_FORMAT
4666                  G1PPRL_ADDR_BASE_H_FORMAT
4667                  G1PPRL_BYTE_H_FORMAT
4668                  G1PPRL_BYTE_H_FORMAT
4669                  G1PPRL_BYTE_H_FORMAT
4670                  G1PPRL_DOUBLE_H_FORMAT,
4671                  "type", "address-range",
4672                  "used", "prev-live", "next-live", "gc-eff");
4673   _out->print_cr(G1PPRL_LINE_PREFIX
4674                  G1PPRL_TYPE_H_FORMAT
4675                  G1PPRL_ADDR_BASE_H_FORMAT
4676                  G1PPRL_BYTE_H_FORMAT
4677                  G1PPRL_BYTE_H_FORMAT
4678                  G1PPRL_BYTE_H_FORMAT
4679                  G1PPRL_DOUBLE_H_FORMAT,
4680                  "", "",
4681                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4682 }
4683 
4684 // It takes as a parameter a reference to one of the _hum_* fields, it
4685 // deduces the corresponding value for a region in a humongous region
4686 // series (either the region size, or what's left if the _hum_* field
4687 // is < the region size), and updates the _hum_* field accordingly.
4688 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4689   size_t bytes = 0;
4690   // The > 0 check is to deal with the prev and next live bytes which
4691   // could be 0.
4692   if (*hum_bytes > 0) {
4693     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4694     *hum_bytes -= bytes;
4695   }
4696   return bytes;
4697 }
4698 
4699 // It deduces the values for a region in a humongous region series
4700 // from the _hum_* fields and updates those accordingly. It assumes
4701 // that that _hum_* fields have already been set up from the "starts
4702 // humongous" region and we visit the regions in address order.
4703 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4704                                                      size_t* capacity_bytes,
4705                                                      size_t* prev_live_bytes,
4706                                                      size_t* next_live_bytes) {
4707   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4708   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4709   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4710   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4711   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4712 }
4713 
4714 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4715   const char* type = "";
4716   HeapWord* bottom       = r->bottom();
4717   HeapWord* end          = r->end();
4718   size_t capacity_bytes  = r->capacity();
4719   size_t used_bytes      = r->used();
4720   size_t prev_live_bytes = r->live_bytes();
4721   size_t next_live_bytes = r->next_live_bytes();
4722   double gc_eff          = r->gc_efficiency();
4723   if (r->used() == 0) {
4724     type = "FREE";
4725   } else if (r->is_survivor()) {
4726     type = "SURV";
4727   } else if (r->is_young()) {
4728     type = "EDEN";
4729   } else if (r->startsHumongous()) {
4730     type = "HUMS";
4731 
4732     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4733            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4734            "they should have been zeroed after the last time we used them");
4735     // Set up the _hum_* fields.
4736     _hum_capacity_bytes  = capacity_bytes;
4737     _hum_used_bytes      = used_bytes;
4738     _hum_prev_live_bytes = prev_live_bytes;
4739     _hum_next_live_bytes = next_live_bytes;
4740     get_hum_bytes(&used_bytes, &capacity_bytes,
4741                   &prev_live_bytes, &next_live_bytes);
4742     end = bottom + HeapRegion::GrainWords;
4743   } else if (r->continuesHumongous()) {
4744     type = "HUMC";
4745     get_hum_bytes(&used_bytes, &capacity_bytes,
4746                   &prev_live_bytes, &next_live_bytes);
4747     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4748   } else {
4749     type = "OLD";
4750   }
4751 
4752   _total_used_bytes      += used_bytes;
4753   _total_capacity_bytes  += capacity_bytes;
4754   _total_prev_live_bytes += prev_live_bytes;
4755   _total_next_live_bytes += next_live_bytes;
4756 
4757   // Print a line for this particular region.
4758   _out->print_cr(G1PPRL_LINE_PREFIX
4759                  G1PPRL_TYPE_FORMAT
4760                  G1PPRL_ADDR_BASE_FORMAT
4761                  G1PPRL_BYTE_FORMAT
4762                  G1PPRL_BYTE_FORMAT
4763                  G1PPRL_BYTE_FORMAT
4764                  G1PPRL_DOUBLE_FORMAT,
4765                  type, bottom, end,
4766                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4767 
4768   return false;
4769 }
4770 
4771 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4772   // Print the footer of the output.
4773   _out->print_cr(G1PPRL_LINE_PREFIX);
4774   _out->print_cr(G1PPRL_LINE_PREFIX
4775                  " SUMMARY"
4776                  G1PPRL_SUM_MB_FORMAT("capacity")
4777                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4778                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4779                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4780                  bytes_to_mb(_total_capacity_bytes),
4781                  bytes_to_mb(_total_used_bytes),
4782                  perc(_total_used_bytes, _total_capacity_bytes),
4783                  bytes_to_mb(_total_prev_live_bytes),
4784                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4785                  bytes_to_mb(_total_next_live_bytes),
4786                  perc(_total_next_live_bytes, _total_capacity_bytes));
4787   _out->cr();
4788 }