Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  50   _bm((uintptr_t*)NULL,0),
  51   _shifter(shifter) {
  52   _bmStartWord = (HeapWord*)(rs.base());
  53   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  54   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  55                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  56 
  57   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  58 
  59   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  60   // For now we'll just commit all of the bit map up fromt.
  61   // Later on we'll try to be more parsimonious with swap.
  62   guarantee(_virtual_space.initialize(brs, brs.size()),
  63             "couldn't reseve backing store for concurrent marking bit map");
  64   assert(_virtual_space.committed_size() == brs.size(),
  65          "didn't reserve backing store for all of concurrent marking bit map?");
  66   _bm.set_map((uintptr_t*)_virtual_space.low());
  67   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  68          _bmWordSize, "inconsistency in bit map sizing");
  69   _bm.set_size(_bmWordSize >> _shifter);
  70 }
  71 
  72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  73                                                HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = (HeapWord*)align_size_up((intptr_t)addr,
  76                                   HeapWordSize << _shifter);
  77   size_t addrOffset = heapWordToOffset(addr);
  78   if (limit == NULL) {
  79     limit = _bmStartWord + _bmWordSize;
  80   }
  81   size_t limitOffset = heapWordToOffset(limit);
  82   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  83   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  84   assert(nextAddr >= addr, "get_next_one postcondition");
  85   assert(nextAddr == limit || isMarked(nextAddr),
  86          "get_next_one postcondition");
  87   return nextAddr;
  88 }
  89 
  90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  91                                                  HeapWord* limit) const {
  92   size_t addrOffset = heapWordToOffset(addr);
  93   if (limit == NULL) {
  94     limit = _bmStartWord + _bmWordSize;
  95   }
  96   size_t limitOffset = heapWordToOffset(limit);
  97   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  98   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  99   assert(nextAddr >= addr, "get_next_one postcondition");
 100   assert(nextAddr == limit || !isMarked(nextAddr),
 101          "get_next_one postcondition");
 102   return nextAddr;
 103 }
 104 
 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 106   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 107   return (int) (diff >> _shifter);
 108 }
 109 
 110 #ifndef PRODUCT
 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
 112   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 113   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 114          "size inconsistency");
 115   return _bmStartWord == (HeapWord*)(rs.base()) &&
 116          _bmWordSize  == rs.size()>>LogHeapWordSize;
 117 }
 118 #endif
 119 
 120 void CMBitMap::clearAll() {
 121   _bm.clear();
 122   return;
 123 }
 124 
 125 void CMBitMap::markRange(MemRegion mr) {
 126   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 127   assert(!mr.is_empty(), "unexpected empty region");
 128   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 129           ((HeapWord *) mr.end())),
 130          "markRange memory region end is not card aligned");
 131   // convert address range into offset range
 132   _bm.at_put_range(heapWordToOffset(mr.start()),
 133                    heapWordToOffset(mr.end()), true);
 134 }
 135 
 136 void CMBitMap::clearRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   // convert address range into offset range
 140   _bm.at_put_range(heapWordToOffset(mr.start()),
 141                    heapWordToOffset(mr.end()), false);
 142 }
 143 
 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 145                                             HeapWord* end_addr) {
 146   HeapWord* start = getNextMarkedWordAddress(addr);
 147   start = MIN2(start, end_addr);
 148   HeapWord* end   = getNextUnmarkedWordAddress(start);
 149   end = MIN2(end, end_addr);
 150   assert(start <= end, "Consistency check");
 151   MemRegion mr(start, end);
 152   if (!mr.is_empty()) {
 153     clearRange(mr);
 154   }
 155   return mr;
 156 }
 157 
 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 159   _base(NULL), _cm(cm)
 160 #ifdef ASSERT
 161   , _drain_in_progress(false)
 162   , _drain_in_progress_yields(false)
 163 #endif
 164 {}
 165 
 166 void CMMarkStack::allocate(size_t size) {
 167   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 168   if (_base == NULL) {
 169     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 170   }
 171   _index = 0;
 172   _capacity = (jint) size;
 173   _saved_index = -1;
 174   NOT_PRODUCT(_max_depth = 0);
 175 }
 176 
 177 CMMarkStack::~CMMarkStack() {
 178   if (_base != NULL) {
 179     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 180   }
 181 }
 182 
 183 void CMMarkStack::par_push(oop ptr) {
 184   while (true) {
 185     if (isFull()) {
 186       _overflow = true;
 187       return;
 188     }
 189     // Otherwise...
 190     jint index = _index;
 191     jint next_index = index+1;
 192     jint res = Atomic::cmpxchg(next_index, &_index, index);
 193     if (res == index) {
 194       _base[index] = ptr;
 195       // Note that we don't maintain this atomically.  We could, but it
 196       // doesn't seem necessary.
 197       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 198       return;
 199     }
 200     // Otherwise, we need to try again.
 201   }
 202 }
 203 
 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 205   while (true) {
 206     if (isFull()) {
 207       _overflow = true;
 208       return;
 209     }
 210     // Otherwise...
 211     jint index = _index;
 212     jint next_index = index + n;
 213     if (next_index > _capacity) {
 214       _overflow = true;
 215       return;
 216     }
 217     jint res = Atomic::cmpxchg(next_index, &_index, index);
 218     if (res == index) {
 219       for (int i = 0; i < n; i++) {
 220         int ind = index + i;
 221         assert(ind < _capacity, "By overflow test above.");
 222         _base[ind] = ptr_arr[i];
 223       }
 224       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 225       return;
 226     }
 227     // Otherwise, we need to try again.
 228   }
 229 }
 230 
 231 
 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234   jint start = _index;
 235   jint next_index = start + n;
 236   if (next_index > _capacity) {
 237     _overflow = true;
 238     return;
 239   }
 240   // Otherwise.
 241   _index = next_index;
 242   for (int i = 0; i < n; i++) {
 243     int ind = start + i;
 244     assert(ind < _capacity, "By overflow test above.");
 245     _base[ind] = ptr_arr[i];
 246   }
 247 }
 248 
 249 
 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint index = _index;
 253   if (index == 0) {
 254     *n = 0;
 255     return false;
 256   } else {
 257     int k = MIN2(max, index);
 258     jint new_ind = index - k;
 259     for (int j = 0; j < k; j++) {
 260       ptr_arr[j] = _base[new_ind + j];
 261     }
 262     _index = new_ind;
 263     *n = k;
 264     return true;
 265   }
 266 }
 267 
 268 template<class OopClosureClass>
 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 270   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 271          || SafepointSynchronize::is_at_safepoint(),
 272          "Drain recursion must be yield-safe.");
 273   bool res = true;
 274   debug_only(_drain_in_progress = true);
 275   debug_only(_drain_in_progress_yields = yield_after);
 276   while (!isEmpty()) {
 277     oop newOop = pop();
 278     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 279     assert(newOop->is_oop(), "Expected an oop");
 280     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 281            "only grey objects on this stack");
 282     newOop->oop_iterate(cl);
 283     if (yield_after && _cm->do_yield_check()) {
 284       res = false;
 285       break;
 286     }
 287   }
 288   debug_only(_drain_in_progress = false);
 289   return res;
 290 }
 291 
 292 void CMMarkStack::note_start_of_gc() {
 293   assert(_saved_index == -1,
 294          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 295   _saved_index = _index;
 296 }
 297 
 298 void CMMarkStack::note_end_of_gc() {
 299   // This is intentionally a guarantee, instead of an assert. If we
 300   // accidentally add something to the mark stack during GC, it
 301   // will be a correctness issue so it's better if we crash. we'll
 302   // only check this once per GC anyway, so it won't be a performance
 303   // issue in any way.
 304   guarantee(_saved_index == _index,
 305             err_msg("saved index: %d index: %d", _saved_index, _index));
 306   _saved_index = -1;
 307 }
 308 
 309 void CMMarkStack::oops_do(OopClosure* f) {
 310   assert(_saved_index == _index,
 311          err_msg("saved index: %d index: %d", _saved_index, _index));
 312   for (int i = 0; i < _index; i += 1) {
 313     f->do_oop(&_base[i]);
 314   }
 315 }
 316 
 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
 318   return _g1h->is_obj_ill(obj);
 319 }
 320 
 321 CMRootRegions::CMRootRegions() :
 322   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 323   _should_abort(false),  _next_survivor(NULL) { }
 324 
 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 326   _young_list = g1h->young_list();
 327   _cm = cm;
 328 }
 329 
 330 void CMRootRegions::prepare_for_scan() {
 331   assert(!scan_in_progress(), "pre-condition");
 332 
 333   // Currently, only survivors can be root regions.
 334   assert(_next_survivor == NULL, "pre-condition");
 335   _next_survivor = _young_list->first_survivor_region();
 336   _scan_in_progress = (_next_survivor != NULL);
 337   _should_abort = false;
 338 }
 339 
 340 HeapRegion* CMRootRegions::claim_next() {
 341   if (_should_abort) {
 342     // If someone has set the should_abort flag, we return NULL to
 343     // force the caller to bail out of their loop.
 344     return NULL;
 345   }
 346 
 347   // Currently, only survivors can be root regions.
 348   HeapRegion* res = _next_survivor;
 349   if (res != NULL) {
 350     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 351     // Read it again in case it changed while we were waiting for the lock.
 352     res = _next_survivor;
 353     if (res != NULL) {
 354       if (res == _young_list->last_survivor_region()) {
 355         // We just claimed the last survivor so store NULL to indicate
 356         // that we're done.
 357         _next_survivor = NULL;
 358       } else {
 359         _next_survivor = res->get_next_young_region();
 360       }
 361     } else {
 362       // Someone else claimed the last survivor while we were trying
 363       // to take the lock so nothing else to do.
 364     }
 365   }
 366   assert(res == NULL || res->is_survivor(), "post-condition");
 367 
 368   return res;
 369 }
 370 
 371 void CMRootRegions::scan_finished() {
 372   assert(scan_in_progress(), "pre-condition");
 373 
 374   // Currently, only survivors can be root regions.
 375   if (!_should_abort) {
 376     assert(_next_survivor == NULL, "we should have claimed all survivors");
 377   }
 378   _next_survivor = NULL;
 379 
 380   {
 381     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 382     _scan_in_progress = false;
 383     RootRegionScan_lock->notify_all();
 384   }
 385 }
 386 
 387 bool CMRootRegions::wait_until_scan_finished() {
 388   if (!scan_in_progress()) return false;
 389 
 390   {
 391     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 392     while (scan_in_progress()) {
 393       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 394     }
 395   }
 396   return true;
 397 }
 398 
 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 401 #endif // _MSC_VER
 402 
 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 404   return MAX2((n_par_threads + 2) / 4, 1U);
 405 }
 406 
 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 408   _markBitMap1(rs, MinObjAlignment - 1),
 409   _markBitMap2(rs, MinObjAlignment - 1),
 410 
 411   _parallel_marking_threads(0),
 412   _max_parallel_marking_threads(0),
 413   _sleep_factor(0.0),
 414   _marking_task_overhead(1.0),
 415   _cleanup_sleep_factor(0.0),
 416   _cleanup_task_overhead(1.0),
 417   _cleanup_list("Cleanup List"),
 418   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 419   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 420            CardTableModRefBS::card_shift,
 421            false /* in_resource_area*/),
 422 
 423   _prevMarkBitMap(&_markBitMap1),
 424   _nextMarkBitMap(&_markBitMap2),
 425 
 426   _markStack(this),
 427   // _finger set in set_non_marking_state
 428 
 429   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 430   // _active_tasks set in set_non_marking_state
 431   // _tasks set inside the constructor
 432   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 433   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 434 
 435   _has_overflown(false),
 436   _concurrent(false),
 437   _has_aborted(false),
 438   _restart_for_overflow(false),
 439   _concurrent_marking_in_progress(false),
 440 
 441   // _verbose_level set below
 442 
 443   _init_times(),
 444   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 445   _cleanup_times(),
 446   _total_counting_time(0.0),
 447   _total_rs_scrub_time(0.0),
 448 
 449   _parallel_workers(NULL),
 450 
 451   _count_card_bitmaps(NULL),
 452   _count_marked_bytes(NULL) {
 453   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 454   if (verbose_level < no_verbose) {
 455     verbose_level = no_verbose;
 456   }
 457   if (verbose_level > high_verbose) {
 458     verbose_level = high_verbose;
 459   }
 460   _verbose_level = verbose_level;
 461 
 462   if (verbose_low()) {
 463     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 464                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 465   }
 466 
 467   _markStack.allocate(MarkStackSize);
 468 
 469   // Create & start a ConcurrentMark thread.
 470   _cmThread = new ConcurrentMarkThread(this);
 471   assert(cmThread() != NULL, "CM Thread should have been created");
 472   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 473 
 474   _g1h = G1CollectedHeap::heap();
 475   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 476   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 477   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 478 
 479   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 480   satb_qs.set_buffer_size(G1SATBBufferSize);
 481 
 482   _root_regions.init(_g1h, this);
 483 
 484   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 485   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 486 
 487   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 488   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 489 
 490   BitMap::idx_t card_bm_size = _card_bm.size();
 491 
 492   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 493   _active_tasks = _max_worker_id;
 494   for (uint i = 0; i < _max_worker_id; ++i) {
 495     CMTaskQueue* task_queue = new CMTaskQueue();
 496     task_queue->initialize();
 497     _task_queues->register_queue(i, task_queue);
 498 
 499     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 500     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 501 
 502     _tasks[i] = new CMTask(i, this,
 503                            _count_marked_bytes[i],
 504                            &_count_card_bitmaps[i],
 505                            task_queue, _task_queues);
 506 
 507     _accum_task_vtime[i] = 0.0;
 508   }
 509 
 510   // Calculate the card number for the bottom of the heap. Used
 511   // in biasing indexes into the accounting card bitmaps.
 512   _heap_bottom_card_num =
 513     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 514                                 CardTableModRefBS::card_shift);
 515 
 516   // Clear all the liveness counting data
 517   clear_all_count_data();
 518 
 519   if (ConcGCThreads > ParallelGCThreads) {
 520     vm_exit_during_initialization("Can't have more ConcGCThreads "
 521                                   "than ParallelGCThreads.");
 522   }
 523   if (ParallelGCThreads == 0) {
 524     // if we are not running with any parallel GC threads we will not
 525     // spawn any marking threads either
 526     _parallel_marking_threads =       0;
 527     _max_parallel_marking_threads =   0;
 528     _sleep_factor             =     0.0;
 529     _marking_task_overhead    =     1.0;
 530   } else {
 531     if (ConcGCThreads > 0) {
 532       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 533       // if both are set
 534 
 535       _parallel_marking_threads = (uint) ConcGCThreads;
 536       _max_parallel_marking_threads = _parallel_marking_threads;
 537       _sleep_factor             = 0.0;
 538       _marking_task_overhead    = 1.0;
 539     } else if (G1MarkingOverheadPercent > 0) {
 540       // we will calculate the number of parallel marking threads
 541       // based on a target overhead with respect to the soft real-time
 542       // goal
 543 
 544       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 545       double overall_cm_overhead =
 546         (double) MaxGCPauseMillis * marking_overhead /
 547         (double) GCPauseIntervalMillis;
 548       double cpu_ratio = 1.0 / (double) os::processor_count();
 549       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 550       double marking_task_overhead =
 551         overall_cm_overhead / marking_thread_num *
 552                                                 (double) os::processor_count();
 553       double sleep_factor =
 554                          (1.0 - marking_task_overhead) / marking_task_overhead;
 555 
 556       _parallel_marking_threads = (uint) marking_thread_num;
 557       _max_parallel_marking_threads = _parallel_marking_threads;
 558       _sleep_factor             = sleep_factor;
 559       _marking_task_overhead    = marking_task_overhead;
 560     } else {
 561       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 562       _max_parallel_marking_threads = _parallel_marking_threads;
 563       _sleep_factor             = 0.0;
 564       _marking_task_overhead    = 1.0;
 565     }
 566 
 567     if (parallel_marking_threads() > 1) {
 568       _cleanup_task_overhead = 1.0;
 569     } else {
 570       _cleanup_task_overhead = marking_task_overhead();
 571     }
 572     _cleanup_sleep_factor =
 573                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 574 
 575 #if 0
 576     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 577     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 578     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 579     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 580     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 581 #endif
 582 
 583     guarantee(parallel_marking_threads() > 0, "peace of mind");
 584     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 585          _max_parallel_marking_threads, false, true);
 586     if (_parallel_workers == NULL) {
 587       vm_exit_during_initialization("Failed necessary allocation.");
 588     } else {
 589       _parallel_workers->initialize_workers();
 590     }
 591   }
 592 
 593   // so that the call below can read a sensible value
 594   _heap_start = (HeapWord*) rs.base();
 595   set_non_marking_state();
 596 }
 597 
 598 void ConcurrentMark::update_g1_committed(bool force) {
 599   // If concurrent marking is not in progress, then we do not need to
 600   // update _heap_end.
 601   if (!concurrent_marking_in_progress() && !force) return;
 602 
 603   MemRegion committed = _g1h->g1_committed();
 604   assert(committed.start() == _heap_start, "start shouldn't change");
 605   HeapWord* new_end = committed.end();
 606   if (new_end > _heap_end) {
 607     // The heap has been expanded.
 608 
 609     _heap_end = new_end;
 610   }
 611   // Notice that the heap can also shrink. However, this only happens
 612   // during a Full GC (at least currently) and the entire marking
 613   // phase will bail out and the task will not be restarted. So, let's
 614   // do nothing.
 615 }
 616 
 617 void ConcurrentMark::reset() {
 618   // Starting values for these two. This should be called in a STW
 619   // phase. CM will be notified of any future g1_committed expansions
 620   // will be at the end of evacuation pauses, when tasks are
 621   // inactive.
 622   MemRegion committed = _g1h->g1_committed();
 623   _heap_start = committed.start();
 624   _heap_end   = committed.end();
 625 
 626   // Separated the asserts so that we know which one fires.
 627   assert(_heap_start != NULL, "heap bounds should look ok");
 628   assert(_heap_end != NULL, "heap bounds should look ok");
 629   assert(_heap_start < _heap_end, "heap bounds should look ok");
 630 
 631   // reset all the marking data structures and any necessary flags
 632   clear_marking_state();
 633 
 634   if (verbose_low()) {
 635     gclog_or_tty->print_cr("[global] resetting");
 636   }
 637 
 638   // We do reset all of them, since different phases will use
 639   // different number of active threads. So, it's easiest to have all
 640   // of them ready.
 641   for (uint i = 0; i < _max_worker_id; ++i) {
 642     _tasks[i]->reset(_nextMarkBitMap);
 643   }
 644 
 645   // we need this to make sure that the flag is on during the evac
 646   // pause with initial mark piggy-backed
 647   set_concurrent_marking_in_progress();
 648 }
 649 
 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 651   assert(active_tasks <= _max_worker_id, "we should not have more");
 652 
 653   _active_tasks = active_tasks;
 654   // Need to update the three data structures below according to the
 655   // number of active threads for this phase.
 656   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 657   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659 
 660   _concurrent = concurrent;
 661   // We propagate this to all tasks, not just the active ones.
 662   for (uint i = 0; i < _max_worker_id; ++i)
 663     _tasks[i]->set_concurrent(concurrent);
 664 
 665   if (concurrent) {
 666     set_concurrent_marking_in_progress();
 667   } else {
 668     // We currently assume that the concurrent flag has been set to
 669     // false before we start remark. At this point we should also be
 670     // in a STW phase.
 671     assert(!concurrent_marking_in_progress(), "invariant");
 672     assert(_finger == _heap_end, "only way to get here");
 673     update_g1_committed(true);
 674   }
 675 }
 676 
 677 void ConcurrentMark::set_non_marking_state() {
 678   // We set the global marking state to some default values when we're
 679   // not doing marking.
 680   clear_marking_state();
 681   _active_tasks = 0;
 682   clear_concurrent_marking_in_progress();
 683 }
 684 
 685 ConcurrentMark::~ConcurrentMark() {
 686   // The ConcurrentMark instance is never freed.
 687   ShouldNotReachHere();
 688 }
 689 
 690 void ConcurrentMark::clearNextBitmap() {
 691   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 692   G1CollectorPolicy* g1p = g1h->g1_policy();
 693 
 694   // Make sure that the concurrent mark thread looks to still be in
 695   // the current cycle.
 696   guarantee(cmThread()->during_cycle(), "invariant");
 697 
 698   // We are finishing up the current cycle by clearing the next
 699   // marking bitmap and getting it ready for the next cycle. During
 700   // this time no other cycle can start. So, let's make sure that this
 701   // is the case.
 702   guarantee(!g1h->mark_in_progress(), "invariant");
 703 
 704   // clear the mark bitmap (no grey objects to start with).
 705   // We need to do this in chunks and offer to yield in between
 706   // each chunk.
 707   HeapWord* start  = _nextMarkBitMap->startWord();
 708   HeapWord* end    = _nextMarkBitMap->endWord();
 709   HeapWord* cur    = start;
 710   size_t chunkSize = M;
 711   while (cur < end) {
 712     HeapWord* next = cur + chunkSize;
 713     if (next > end) {
 714       next = end;
 715     }
 716     MemRegion mr(cur,next);
 717     _nextMarkBitMap->clearRange(mr);
 718     cur = next;
 719     do_yield_check();
 720 
 721     // Repeat the asserts from above. We'll do them as asserts here to
 722     // minimize their overhead on the product. However, we'll have
 723     // them as guarantees at the beginning / end of the bitmap
 724     // clearing to get some checking in the product.
 725     assert(cmThread()->during_cycle(), "invariant");
 726     assert(!g1h->mark_in_progress(), "invariant");
 727   }
 728 
 729   // Clear the liveness counting data
 730   clear_all_count_data();
 731 
 732   // Repeat the asserts from above.
 733   guarantee(cmThread()->during_cycle(), "invariant");
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 }
 736 
 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 738 public:
 739   bool doHeapRegion(HeapRegion* r) {
 740     if (!r->continuesHumongous()) {
 741       r->note_start_of_marking();
 742     }
 743     return false;
 744   }
 745 };
 746 
 747 void ConcurrentMark::checkpointRootsInitialPre() {
 748   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749   G1CollectorPolicy* g1p = g1h->g1_policy();
 750 
 751   _has_aborted = false;
 752 
 753 #ifndef PRODUCT
 754   if (G1PrintReachableAtInitialMark) {
 755     print_reachable("at-cycle-start",
 756                     VerifyOption_G1UsePrevMarking, true /* all */);
 757   }
 758 #endif
 759 
 760   // Initialise marking structures. This has to be done in a STW phase.
 761   reset();
 762 
 763   // For each region note start of marking.
 764   NoteStartOfMarkHRClosure startcl;
 765   g1h->heap_region_iterate(&startcl);
 766 }
 767 
 768 
 769 void ConcurrentMark::checkpointRootsInitialPost() {
 770   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771 
 772   // If we force an overflow during remark, the remark operation will
 773   // actually abort and we'll restart concurrent marking. If we always
 774   // force an oveflow during remark we'll never actually complete the
 775   // marking phase. So, we initilize this here, at the start of the
 776   // cycle, so that at the remaining overflow number will decrease at
 777   // every remark and we'll eventually not need to cause one.
 778   force_overflow_stw()->init();
 779 
 780   // Start Concurrent Marking weak-reference discovery.
 781   ReferenceProcessor* rp = g1h->ref_processor_cm();
 782   // enable ("weak") refs discovery
 783   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 784   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 785 
 786   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 787   // This is the start of  the marking cycle, we're expected all
 788   // threads to have SATB queues with active set to false.
 789   satb_mq_set.set_active_all_threads(true, /* new active value */
 790                                      false /* expected_active */);
 791 
 792   _root_regions.prepare_for_scan();
 793 
 794   // update_g1_committed() will be called at the end of an evac pause
 795   // when marking is on. So, it's also called at the end of the
 796   // initial-mark pause to update the heap end, if the heap expands
 797   // during it. No need to call it here.
 798 }
 799 
 800 /*
 801  * Notice that in the next two methods, we actually leave the STS
 802  * during the barrier sync and join it immediately afterwards. If we
 803  * do not do this, the following deadlock can occur: one thread could
 804  * be in the barrier sync code, waiting for the other thread to also
 805  * sync up, whereas another one could be trying to yield, while also
 806  * waiting for the other threads to sync up too.
 807  *
 808  * Note, however, that this code is also used during remark and in
 809  * this case we should not attempt to leave / enter the STS, otherwise
 810  * we'll either hit an asseert (debug / fastdebug) or deadlock
 811  * (product). So we should only leave / enter the STS if we are
 812  * operating concurrently.
 813  *
 814  * Because the thread that does the sync barrier has left the STS, it
 815  * is possible to be suspended for a Full GC or an evacuation pause
 816  * could occur. This is actually safe, since the entering the sync
 817  * barrier is one of the last things do_marking_step() does, and it
 818  * doesn't manipulate any data structures afterwards.
 819  */
 820 
 821 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 822   if (verbose_low()) {
 823     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 824   }
 825 
 826   if (concurrent()) {
 827     ConcurrentGCThread::stsLeave();
 828   }
 829   _first_overflow_barrier_sync.enter();
 830   if (concurrent()) {
 831     ConcurrentGCThread::stsJoin();
 832   }
 833   // at this point everyone should have synced up and not be doing any
 834   // more work
 835 
 836   if (verbose_low()) {
 837     gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 838   }
 839 
 840   // let the task associated with with worker 0 do this
 841   if (worker_id == 0) {
 842     // task 0 is responsible for clearing the global data structures
 843     // We should be here because of an overflow. During STW we should
 844     // not clear the overflow flag since we rely on it being true when
 845     // we exit this method to abort the pause and restart concurent
 846     // marking.
 847     clear_marking_state(concurrent() /* clear_overflow */);
 848     force_overflow()->update();
 849 
 850     if (G1Log::fine()) {
 851       gclog_or_tty->date_stamp(PrintGCDateStamps);
 852       gclog_or_tty->stamp(PrintGCTimeStamps);
 853       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 854     }
 855   }
 856 
 857   // after this, each task should reset its own data structures then
 858   // then go into the second barrier
 859 }
 860 
 861 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 862   if (verbose_low()) {
 863     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 864   }
 865 
 866   if (concurrent()) {
 867     ConcurrentGCThread::stsLeave();
 868   }
 869   _second_overflow_barrier_sync.enter();
 870   if (concurrent()) {
 871     ConcurrentGCThread::stsJoin();
 872   }
 873   // at this point everything should be re-initialised and ready to go
 874 
 875   if (verbose_low()) {
 876     gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 877   }
 878 }
 879 
 880 #ifndef PRODUCT
 881 void ForceOverflowSettings::init() {
 882   _num_remaining = G1ConcMarkForceOverflow;
 883   _force = false;
 884   update();
 885 }
 886 
 887 void ForceOverflowSettings::update() {
 888   if (_num_remaining > 0) {
 889     _num_remaining -= 1;
 890     _force = true;
 891   } else {
 892     _force = false;
 893   }
 894 }
 895 
 896 bool ForceOverflowSettings::should_force() {
 897   if (_force) {
 898     _force = false;
 899     return true;
 900   } else {
 901     return false;
 902   }
 903 }
 904 #endif // !PRODUCT
 905 
 906 class CMConcurrentMarkingTask: public AbstractGangTask {
 907 private:
 908   ConcurrentMark*       _cm;
 909   ConcurrentMarkThread* _cmt;
 910 
 911 public:
 912   void work(uint worker_id) {
 913     assert(Thread::current()->is_ConcurrentGC_thread(),
 914            "this should only be done by a conc GC thread");
 915     ResourceMark rm;
 916 
 917     double start_vtime = os::elapsedVTime();
 918 
 919     ConcurrentGCThread::stsJoin();
 920 
 921     assert(worker_id < _cm->active_tasks(), "invariant");
 922     CMTask* the_task = _cm->task(worker_id);
 923     the_task->record_start_time();
 924     if (!_cm->has_aborted()) {
 925       do {
 926         double start_vtime_sec = os::elapsedVTime();
 927         double start_time_sec = os::elapsedTime();
 928         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 929 
 930         the_task->do_marking_step(mark_step_duration_ms,
 931                                   true /* do_stealing    */,
 932                                   true /* do_termination */);
 933 
 934         double end_time_sec = os::elapsedTime();
 935         double end_vtime_sec = os::elapsedVTime();
 936         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 937         double elapsed_time_sec = end_time_sec - start_time_sec;
 938         _cm->clear_has_overflown();
 939 
 940         bool ret = _cm->do_yield_check(worker_id);
 941 
 942         jlong sleep_time_ms;
 943         if (!_cm->has_aborted() && the_task->has_aborted()) {
 944           sleep_time_ms =
 945             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 946           ConcurrentGCThread::stsLeave();
 947           os::sleep(Thread::current(), sleep_time_ms, false);
 948           ConcurrentGCThread::stsJoin();
 949         }
 950         double end_time2_sec = os::elapsedTime();
 951         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 952 
 953 #if 0
 954           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 955                                  "overhead %1.4lf",
 956                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 957                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 958           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 959                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 960 #endif
 961       } while (!_cm->has_aborted() && the_task->has_aborted());
 962     }
 963     the_task->record_end_time();
 964     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 965 
 966     ConcurrentGCThread::stsLeave();
 967 
 968     double end_vtime = os::elapsedVTime();
 969     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 970   }
 971 
 972   CMConcurrentMarkingTask(ConcurrentMark* cm,
 973                           ConcurrentMarkThread* cmt) :
 974       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 975 
 976   ~CMConcurrentMarkingTask() { }
 977 };
 978 
 979 // Calculates the number of active workers for a concurrent
 980 // phase.
 981 uint ConcurrentMark::calc_parallel_marking_threads() {
 982   if (G1CollectedHeap::use_parallel_gc_threads()) {
 983     uint n_conc_workers = 0;
 984     if (!UseDynamicNumberOfGCThreads ||
 985         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 986          !ForceDynamicNumberOfGCThreads)) {
 987       n_conc_workers = max_parallel_marking_threads();
 988     } else {
 989       n_conc_workers =
 990         AdaptiveSizePolicy::calc_default_active_workers(
 991                                      max_parallel_marking_threads(),
 992                                      1, /* Minimum workers */
 993                                      parallel_marking_threads(),
 994                                      Threads::number_of_non_daemon_threads());
 995       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 996       // that scaling has already gone into "_max_parallel_marking_threads".
 997     }
 998     assert(n_conc_workers > 0, "Always need at least 1");
 999     return n_conc_workers;
1000   }
1001   // If we are not running with any parallel GC threads we will not
1002   // have spawned any marking threads either. Hence the number of
1003   // concurrent workers should be 0.
1004   return 0;
1005 }
1006 
1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008   // Currently, only survivors can be root regions.
1009   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 
1012   const uintx interval = PrefetchScanIntervalInBytes;
1013   HeapWord* curr = hr->bottom();
1014   const HeapWord* end = hr->top();
1015   while (curr < end) {
1016     Prefetch::read(curr, interval);
1017     oop obj = oop(curr);
1018     int size = obj->oop_iterate(&cl);
1019     assert(size == obj->size(), "sanity");
1020     curr += size;
1021   }
1022 }
1023 
1024 class CMRootRegionScanTask : public AbstractGangTask {
1025 private:
1026   ConcurrentMark* _cm;
1027 
1028 public:
1029   CMRootRegionScanTask(ConcurrentMark* cm) :
1030     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 
1032   void work(uint worker_id) {
1033     assert(Thread::current()->is_ConcurrentGC_thread(),
1034            "this should only be done by a conc GC thread");
1035 
1036     CMRootRegions* root_regions = _cm->root_regions();
1037     HeapRegion* hr = root_regions->claim_next();
1038     while (hr != NULL) {
1039       _cm->scanRootRegion(hr, worker_id);
1040       hr = root_regions->claim_next();
1041     }
1042   }
1043 };
1044 
1045 void ConcurrentMark::scanRootRegions() {
1046   // scan_in_progress() will have been set to true only if there was
1047   // at least one root region to scan. So, if it's false, we
1048   // should not attempt to do any further work.
1049   if (root_regions()->scan_in_progress()) {
1050     _parallel_marking_threads = calc_parallel_marking_threads();
1051     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052            "Maximum number of marking threads exceeded");
1053     uint active_workers = MAX2(1U, parallel_marking_threads());
1054 
1055     CMRootRegionScanTask task(this);
1056     if (parallel_marking_threads() > 0) {
1057       _parallel_workers->set_active_workers((int) active_workers);
1058       _parallel_workers->run_task(&task);
1059     } else {
1060       task.work(0);
1061     }
1062 
1063     // It's possible that has_aborted() is true here without actually
1064     // aborting the survivor scan earlier. This is OK as it's
1065     // mainly used for sanity checking.
1066     root_regions()->scan_finished();
1067   }
1068 }
1069 
1070 void ConcurrentMark::markFromRoots() {
1071   // we might be tempted to assert that:
1072   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073   //        "inconsistent argument?");
1074   // However that wouldn't be right, because it's possible that
1075   // a safepoint is indeed in progress as a younger generation
1076   // stop-the-world GC happens even as we mark in this generation.
1077 
1078   _restart_for_overflow = false;
1079   force_overflow_conc()->init();
1080 
1081   // _g1h has _n_par_threads
1082   _parallel_marking_threads = calc_parallel_marking_threads();
1083   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084     "Maximum number of marking threads exceeded");
1085 
1086   uint active_workers = MAX2(1U, parallel_marking_threads());
1087 
1088   // Parallel task terminator is set in "set_phase()"
1089   set_phase(active_workers, true /* concurrent */);
1090 
1091   CMConcurrentMarkingTask markingTask(this, cmThread());
1092   if (parallel_marking_threads() > 0) {
1093     _parallel_workers->set_active_workers((int)active_workers);
1094     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095     // and the decisions on that MT processing is made elsewhere.
1096     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097     _parallel_workers->run_task(&markingTask);
1098   } else {
1099     markingTask.work(0);
1100   }
1101   print_stats();
1102 }
1103 
1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105   // world is stopped at this checkpoint
1106   assert(SafepointSynchronize::is_at_safepoint(),
1107          "world should be stopped");
1108 
1109   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 
1111   // If a full collection has happened, we shouldn't do this.
1112   if (has_aborted()) {
1113     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114     return;
1115   }
1116 
1117   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 
1119   if (VerifyDuringGC) {
1120     HandleMark hm;  // handle scope
1121     gclog_or_tty->print(" VerifyDuringGC:(before)");
1122     Universe::heap()->prepare_for_verify();
1123     Universe::verify(/* silent      */ false,
1124                      /* option      */ VerifyOption_G1UsePrevMarking);
1125   }
1126 
1127   G1CollectorPolicy* g1p = g1h->g1_policy();
1128   g1p->record_concurrent_mark_remark_start();
1129 
1130   double start = os::elapsedTime();
1131 
1132   checkpointRootsFinalWork();
1133 
1134   double mark_work_end = os::elapsedTime();
1135 
1136   weakRefsWork(clear_all_soft_refs);
1137 
1138   if (has_overflown()) {
1139     // Oops.  We overflowed.  Restart concurrent marking.
1140     _restart_for_overflow = true;
1141     // Clear the flag. We do not need it any more.
1142     clear_has_overflown();
1143     if (G1TraceMarkStackOverflow) {
1144       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145     }
1146   } else {
1147     // Aggregate the per-task counting data that we have accumulated
1148     // while marking.
1149     aggregate_count_data();
1150 
1151     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152     // We're done with marking.
1153     // This is the end of  the marking cycle, we're expected all
1154     // threads to have SATB queues with active set to true.
1155     satb_mq_set.set_active_all_threads(false, /* new active value */
1156                                        true /* expected_active */);
1157 
1158     if (VerifyDuringGC) {
1159       HandleMark hm;  // handle scope
1160       gclog_or_tty->print(" VerifyDuringGC:(after)");
1161       Universe::heap()->prepare_for_verify();
1162       Universe::verify(/* silent      */ false,
1163                        /* option      */ VerifyOption_G1UseNextMarking);
1164     }
1165     assert(!restart_for_overflow(), "sanity");
1166   }
1167 
1168   // Reset the marking state if marking completed
1169   if (!restart_for_overflow()) {
1170     set_non_marking_state();
1171   }
1172 
1173 #if VERIFY_OBJS_PROCESSED
1174   _scan_obj_cl.objs_processed = 0;
1175   ThreadLocalObjQueue::objs_enqueued = 0;
1176 #endif
1177 
1178   // Statistics
1179   double now = os::elapsedTime();
1180   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182   _remark_times.add((now - start) * 1000.0);
1183 
1184   g1p->record_concurrent_mark_remark_end();
1185 }
1186 
1187 // Base class of the closures that finalize and verify the
1188 // liveness counting data.
1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 protected:
1191   G1CollectedHeap* _g1h;
1192   ConcurrentMark* _cm;
1193   CardTableModRefBS* _ct_bs;
1194 
1195   BitMap* _region_bm;
1196   BitMap* _card_bm;
1197 
1198   // Takes a region that's not empty (i.e., it has at least one
1199   // live object in it and sets its corresponding bit on the region
1200   // bitmap to 1. If the region is "starts humongous" it will also set
1201   // to 1 the bits on the region bitmap that correspond to its
1202   // associated "continues humongous" regions.
1203   void set_bit_for_region(HeapRegion* hr) {
1204     assert(!hr->continuesHumongous(), "should have filtered those out");
1205 
1206     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207     if (!hr->startsHumongous()) {
1208       // Normal (non-humongous) case: just set the bit.
1209       _region_bm->par_at_put(index, true);
1210     } else {
1211       // Starts humongous case: calculate how many regions are part of
1212       // this humongous region and then set the bit range.
1213       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214       _region_bm->par_at_put_range(index, end_index, true);
1215     }
1216   }
1217 
1218 public:
1219   CMCountDataClosureBase(G1CollectedHeap* g1h,
1220                          BitMap* region_bm, BitMap* card_bm):
1221     _g1h(g1h), _cm(g1h->concurrent_mark()),
1222     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223     _region_bm(region_bm), _card_bm(card_bm) { }
1224 };
1225 
1226 // Closure that calculates the # live objects per region. Used
1227 // for verification purposes during the cleanup pause.
1228 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229   CMBitMapRO* _bm;
1230   size_t _region_marked_bytes;
1231 
1232 public:
1233   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234                          BitMap* region_bm, BitMap* card_bm) :
1235     CMCountDataClosureBase(g1h, region_bm, card_bm),
1236     _bm(bm), _region_marked_bytes(0) { }
1237 
1238   bool doHeapRegion(HeapRegion* hr) {
1239 
1240     if (hr->continuesHumongous()) {
1241       // We will ignore these here and process them when their
1242       // associated "starts humongous" region is processed (see
1243       // set_bit_for_heap_region()). Note that we cannot rely on their
1244       // associated "starts humongous" region to have their bit set to
1245       // 1 since, due to the region chunking in the parallel region
1246       // iteration, a "continues humongous" region might be visited
1247       // before its associated "starts humongous".
1248       return false;
1249     }
1250 
1251     HeapWord* ntams = hr->next_top_at_mark_start();
1252     HeapWord* start = hr->bottom();
1253 
1254     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255            err_msg("Preconditions not met - "
1256                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257                    start, ntams, hr->end()));
1258 
1259     // Find the first marked object at or after "start".
1260     start = _bm->getNextMarkedWordAddress(start, ntams);
1261 
1262     size_t marked_bytes = 0;
1263 
1264     while (start < ntams) {
1265       oop obj = oop(start);
1266       int obj_sz = obj->size();
1267       HeapWord* obj_end = start + obj_sz;
1268 
1269       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271 
1272       // Note: if we're looking at the last region in heap - obj_end
1273       // could be actually just beyond the end of the heap; end_idx
1274       // will then correspond to a (non-existent) card that is also
1275       // just beyond the heap.
1276       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1277         // end of object is not card aligned - increment to cover
1278         // all the cards spanned by the object
1279         end_idx += 1;
1280       }
1281 
1282       // Set the bits in the card BM for the cards spanned by this object.
1283       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1284 
1285       // Add the size of this object to the number of marked bytes.
1286       marked_bytes += (size_t)obj_sz * HeapWordSize;
1287 
1288       // Find the next marked object after this one.
1289       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1290     }
1291 
1292     // Mark the allocated-since-marking portion...
1293     HeapWord* top = hr->top();
1294     if (ntams < top) {
1295       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1296       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1297 
1298       // Note: if we're looking at the last region in heap - top
1299       // could be actually just beyond the end of the heap; end_idx
1300       // will then correspond to a (non-existent) card that is also
1301       // just beyond the heap.
1302       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1303         // end of object is not card aligned - increment to cover
1304         // all the cards spanned by the object
1305         end_idx += 1;
1306       }
1307       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1308 
1309       // This definitely means the region has live objects.
1310       set_bit_for_region(hr);
1311     }
1312 
1313     // Update the live region bitmap.
1314     if (marked_bytes > 0) {
1315       set_bit_for_region(hr);
1316     }
1317 
1318     // Set the marked bytes for the current region so that
1319     // it can be queried by a calling verificiation routine
1320     _region_marked_bytes = marked_bytes;
1321 
1322     return false;
1323   }
1324 
1325   size_t region_marked_bytes() const { return _region_marked_bytes; }
1326 };
1327 
1328 // Heap region closure used for verifying the counting data
1329 // that was accumulated concurrently and aggregated during
1330 // the remark pause. This closure is applied to the heap
1331 // regions during the STW cleanup pause.
1332 
1333 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1334   G1CollectedHeap* _g1h;
1335   ConcurrentMark* _cm;
1336   CalcLiveObjectsClosure _calc_cl;
1337   BitMap* _region_bm;   // Region BM to be verified
1338   BitMap* _card_bm;     // Card BM to be verified
1339   bool _verbose;        // verbose output?
1340 
1341   BitMap* _exp_region_bm; // Expected Region BM values
1342   BitMap* _exp_card_bm;   // Expected card BM values
1343 
1344   int _failures;
1345 
1346 public:
1347   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1348                                 BitMap* region_bm,
1349                                 BitMap* card_bm,
1350                                 BitMap* exp_region_bm,
1351                                 BitMap* exp_card_bm,
1352                                 bool verbose) :
1353     _g1h(g1h), _cm(g1h->concurrent_mark()),
1354     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1355     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1356     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1357     _failures(0) { }
1358 
1359   int failures() const { return _failures; }
1360 
1361   bool doHeapRegion(HeapRegion* hr) {
1362     if (hr->continuesHumongous()) {
1363       // We will ignore these here and process them when their
1364       // associated "starts humongous" region is processed (see
1365       // set_bit_for_heap_region()). Note that we cannot rely on their
1366       // associated "starts humongous" region to have their bit set to
1367       // 1 since, due to the region chunking in the parallel region
1368       // iteration, a "continues humongous" region might be visited
1369       // before its associated "starts humongous".
1370       return false;
1371     }
1372 
1373     int failures = 0;
1374 
1375     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1376     // this region and set the corresponding bits in the expected region
1377     // and card bitmaps.
1378     bool res = _calc_cl.doHeapRegion(hr);
1379     assert(res == false, "should be continuing");
1380 
1381     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1382                     Mutex::_no_safepoint_check_flag);
1383 
1384     // Verify the marked bytes for this region.
1385     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1386     size_t act_marked_bytes = hr->next_marked_bytes();
1387 
1388     // We're not OK if expected marked bytes > actual marked bytes. It means
1389     // we have missed accounting some objects during the actual marking.
1390     if (exp_marked_bytes > act_marked_bytes) {
1391       if (_verbose) {
1392         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1393                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1394                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1395       }
1396       failures += 1;
1397     }
1398 
1399     // Verify the bit, for this region, in the actual and expected
1400     // (which was just calculated) region bit maps.
1401     // We're not OK if the bit in the calculated expected region
1402     // bitmap is set and the bit in the actual region bitmap is not.
1403     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1404 
1405     bool expected = _exp_region_bm->at(index);
1406     bool actual = _region_bm->at(index);
1407     if (expected && !actual) {
1408       if (_verbose) {
1409         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1410                                "expected: %s, actual: %s",
1411                                hr->hrs_index(),
1412                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1413       }
1414       failures += 1;
1415     }
1416 
1417     // Verify that the card bit maps for the cards spanned by the current
1418     // region match. We have an error if we have a set bit in the expected
1419     // bit map and the corresponding bit in the actual bitmap is not set.
1420 
1421     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1422     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1423 
1424     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1425       expected = _exp_card_bm->at(i);
1426       actual = _card_bm->at(i);
1427 
1428       if (expected && !actual) {
1429         if (_verbose) {
1430           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1431                                  "expected: %s, actual: %s",
1432                                  hr->hrs_index(), i,
1433                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1434         }
1435         failures += 1;
1436       }
1437     }
1438 
1439     if (failures > 0 && _verbose)  {
1440       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1441                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1442                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1443                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1444     }
1445 
1446     _failures += failures;
1447 
1448     // We could stop iteration over the heap when we
1449     // find the first violating region by returning true.
1450     return false;
1451   }
1452 };
1453 
1454 
1455 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1456 protected:
1457   G1CollectedHeap* _g1h;
1458   ConcurrentMark* _cm;
1459   BitMap* _actual_region_bm;
1460   BitMap* _actual_card_bm;
1461 
1462   uint    _n_workers;
1463 
1464   BitMap* _expected_region_bm;
1465   BitMap* _expected_card_bm;
1466 
1467   int  _failures;
1468   bool _verbose;
1469 
1470 public:
1471   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1472                             BitMap* region_bm, BitMap* card_bm,
1473                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1474     : AbstractGangTask("G1 verify final counting"),
1475       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1476       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1477       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1478       _failures(0), _verbose(false),
1479       _n_workers(0) {
1480     assert(VerifyDuringGC, "don't call this otherwise");
1481 
1482     // Use the value already set as the number of active threads
1483     // in the call to run_task().
1484     if (G1CollectedHeap::use_parallel_gc_threads()) {
1485       assert( _g1h->workers()->active_workers() > 0,
1486         "Should have been previously set");
1487       _n_workers = _g1h->workers()->active_workers();
1488     } else {
1489       _n_workers = 1;
1490     }
1491 
1492     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1493     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1494 
1495     _verbose = _cm->verbose_medium();
1496   }
1497 
1498   void work(uint worker_id) {
1499     assert(worker_id < _n_workers, "invariant");
1500 
1501     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1502                                             _actual_region_bm, _actual_card_bm,
1503                                             _expected_region_bm,
1504                                             _expected_card_bm,
1505                                             _verbose);
1506 
1507     if (G1CollectedHeap::use_parallel_gc_threads()) {
1508       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1509                                             worker_id,
1510                                             _n_workers,
1511                                             HeapRegion::VerifyCountClaimValue);
1512     } else {
1513       _g1h->heap_region_iterate(&verify_cl);
1514     }
1515 
1516     Atomic::add(verify_cl.failures(), &_failures);
1517   }
1518 
1519   int failures() const { return _failures; }
1520 };
1521 
1522 // Closure that finalizes the liveness counting data.
1523 // Used during the cleanup pause.
1524 // Sets the bits corresponding to the interval [NTAMS, top]
1525 // (which contains the implicitly live objects) in the
1526 // card liveness bitmap. Also sets the bit for each region,
1527 // containing live data, in the region liveness bitmap.
1528 
1529 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1530  public:
1531   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1532                               BitMap* region_bm,
1533                               BitMap* card_bm) :
1534     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1535 
1536   bool doHeapRegion(HeapRegion* hr) {
1537 
1538     if (hr->continuesHumongous()) {
1539       // We will ignore these here and process them when their
1540       // associated "starts humongous" region is processed (see
1541       // set_bit_for_heap_region()). Note that we cannot rely on their
1542       // associated "starts humongous" region to have their bit set to
1543       // 1 since, due to the region chunking in the parallel region
1544       // iteration, a "continues humongous" region might be visited
1545       // before its associated "starts humongous".
1546       return false;
1547     }
1548 
1549     HeapWord* ntams = hr->next_top_at_mark_start();
1550     HeapWord* top   = hr->top();
1551 
1552     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1553 
1554     // Mark the allocated-since-marking portion...
1555     if (ntams < top) {
1556       // This definitely means the region has live objects.
1557       set_bit_for_region(hr);
1558 
1559       // Now set the bits in the card bitmap for [ntams, top)
1560       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1561       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1562 
1563       // Note: if we're looking at the last region in heap - top
1564       // could be actually just beyond the end of the heap; end_idx
1565       // will then correspond to a (non-existent) card that is also
1566       // just beyond the heap.
1567       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1568         // end of object is not card aligned - increment to cover
1569         // all the cards spanned by the object
1570         end_idx += 1;
1571       }
1572 
1573       assert(end_idx <= _card_bm->size(),
1574              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575                      end_idx, _card_bm->size()));
1576       assert(start_idx < _card_bm->size(),
1577              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1578                      start_idx, _card_bm->size()));
1579 
1580       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1581     }
1582 
1583     // Set the bit for the region if it contains live data
1584     if (hr->next_marked_bytes() > 0) {
1585       set_bit_for_region(hr);
1586     }
1587 
1588     return false;
1589   }
1590 };
1591 
1592 class G1ParFinalCountTask: public AbstractGangTask {
1593 protected:
1594   G1CollectedHeap* _g1h;
1595   ConcurrentMark* _cm;
1596   BitMap* _actual_region_bm;
1597   BitMap* _actual_card_bm;
1598 
1599   uint    _n_workers;
1600 
1601 public:
1602   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1603     : AbstractGangTask("G1 final counting"),
1604       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1605       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1606       _n_workers(0) {
1607     // Use the value already set as the number of active threads
1608     // in the call to run_task().
1609     if (G1CollectedHeap::use_parallel_gc_threads()) {
1610       assert( _g1h->workers()->active_workers() > 0,
1611         "Should have been previously set");
1612       _n_workers = _g1h->workers()->active_workers();
1613     } else {
1614       _n_workers = 1;
1615     }
1616   }
1617 
1618   void work(uint worker_id) {
1619     assert(worker_id < _n_workers, "invariant");
1620 
1621     FinalCountDataUpdateClosure final_update_cl(_g1h,
1622                                                 _actual_region_bm,
1623                                                 _actual_card_bm);
1624 
1625     if (G1CollectedHeap::use_parallel_gc_threads()) {
1626       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1627                                             worker_id,
1628                                             _n_workers,
1629                                             HeapRegion::FinalCountClaimValue);
1630     } else {
1631       _g1h->heap_region_iterate(&final_update_cl);
1632     }
1633   }
1634 };
1635 
1636 class G1ParNoteEndTask;
1637 
1638 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1639   G1CollectedHeap* _g1;
1640   int _worker_num;
1641   size_t _max_live_bytes;
1642   uint _regions_claimed;
1643   size_t _freed_bytes;
1644   FreeRegionList* _local_cleanup_list;
1645   OldRegionSet* _old_proxy_set;
1646   HumongousRegionSet* _humongous_proxy_set;
1647   HRRSCleanupTask* _hrrs_cleanup_task;
1648   double _claimed_region_time;
1649   double _max_region_time;
1650 
1651 public:
1652   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1653                              int worker_num,
1654                              FreeRegionList* local_cleanup_list,
1655                              OldRegionSet* old_proxy_set,
1656                              HumongousRegionSet* humongous_proxy_set,
1657                              HRRSCleanupTask* hrrs_cleanup_task) :
1658     _g1(g1), _worker_num(worker_num),
1659     _max_live_bytes(0), _regions_claimed(0),
1660     _freed_bytes(0),
1661     _claimed_region_time(0.0), _max_region_time(0.0),
1662     _local_cleanup_list(local_cleanup_list),
1663     _old_proxy_set(old_proxy_set),
1664     _humongous_proxy_set(humongous_proxy_set),
1665     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1666 
1667   size_t freed_bytes() { return _freed_bytes; }
1668 
1669   bool doHeapRegion(HeapRegion *hr) {
1670     if (hr->continuesHumongous()) {
1671       return false;
1672     }
1673     // We use a claim value of zero here because all regions
1674     // were claimed with value 1 in the FinalCount task.
1675     _g1->reset_gc_time_stamps(hr);
1676     double start = os::elapsedTime();
1677     _regions_claimed++;
1678     hr->note_end_of_marking();
1679     _max_live_bytes += hr->max_live_bytes();
1680     _g1->free_region_if_empty(hr,
1681                               &_freed_bytes,
1682                               _local_cleanup_list,
1683                               _old_proxy_set,
1684                               _humongous_proxy_set,
1685                               _hrrs_cleanup_task,
1686                               true /* par */);
1687     double region_time = (os::elapsedTime() - start);
1688     _claimed_region_time += region_time;
1689     if (region_time > _max_region_time) {
1690       _max_region_time = region_time;
1691     }
1692     return false;
1693   }
1694 
1695   size_t max_live_bytes() { return _max_live_bytes; }
1696   uint regions_claimed() { return _regions_claimed; }
1697   double claimed_region_time_sec() { return _claimed_region_time; }
1698   double max_region_time_sec() { return _max_region_time; }
1699 };
1700 
1701 class G1ParNoteEndTask: public AbstractGangTask {
1702   friend class G1NoteEndOfConcMarkClosure;
1703 
1704 protected:
1705   G1CollectedHeap* _g1h;
1706   size_t _max_live_bytes;
1707   size_t _freed_bytes;
1708   FreeRegionList* _cleanup_list;
1709 
1710 public:
1711   G1ParNoteEndTask(G1CollectedHeap* g1h,
1712                    FreeRegionList* cleanup_list) :
1713     AbstractGangTask("G1 note end"), _g1h(g1h),
1714     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1715 
1716   void work(uint worker_id) {
1717     double start = os::elapsedTime();
1718     FreeRegionList local_cleanup_list("Local Cleanup List");
1719     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1720     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1721     HRRSCleanupTask hrrs_cleanup_task;
1722     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1723                                            &old_proxy_set,
1724                                            &humongous_proxy_set,
1725                                            &hrrs_cleanup_task);
1726     if (G1CollectedHeap::use_parallel_gc_threads()) {
1727       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1728                                             _g1h->workers()->active_workers(),
1729                                             HeapRegion::NoteEndClaimValue);
1730     } else {
1731       _g1h->heap_region_iterate(&g1_note_end);
1732     }
1733     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1734 
1735     // Now update the lists
1736     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1737                                             NULL /* free_list */,
1738                                             &old_proxy_set,
1739                                             &humongous_proxy_set,
1740                                             true /* par */);
1741     {
1742       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1743       _max_live_bytes += g1_note_end.max_live_bytes();
1744       _freed_bytes += g1_note_end.freed_bytes();
1745 
1746       // If we iterate over the global cleanup list at the end of
1747       // cleanup to do this printing we will not guarantee to only
1748       // generate output for the newly-reclaimed regions (the list
1749       // might not be empty at the beginning of cleanup; we might
1750       // still be working on its previous contents). So we do the
1751       // printing here, before we append the new regions to the global
1752       // cleanup list.
1753 
1754       G1HRPrinter* hr_printer = _g1h->hr_printer();
1755       if (hr_printer->is_active()) {
1756         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1757         while (iter.more_available()) {
1758           HeapRegion* hr = iter.get_next();
1759           hr_printer->cleanup(hr);
1760         }
1761       }
1762 
1763       _cleanup_list->add_as_tail(&local_cleanup_list);
1764       assert(local_cleanup_list.is_empty(), "post-condition");
1765 
1766       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1767     }
1768   }
1769   size_t max_live_bytes() { return _max_live_bytes; }
1770   size_t freed_bytes() { return _freed_bytes; }
1771 };
1772 
1773 class G1ParScrubRemSetTask: public AbstractGangTask {
1774 protected:
1775   G1RemSet* _g1rs;
1776   BitMap* _region_bm;
1777   BitMap* _card_bm;
1778 public:
1779   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1780                        BitMap* region_bm, BitMap* card_bm) :
1781     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1782     _region_bm(region_bm), _card_bm(card_bm) { }
1783 
1784   void work(uint worker_id) {
1785     if (G1CollectedHeap::use_parallel_gc_threads()) {
1786       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1787                        HeapRegion::ScrubRemSetClaimValue);
1788     } else {
1789       _g1rs->scrub(_region_bm, _card_bm);
1790     }
1791   }
1792 
1793 };
1794 
1795 void ConcurrentMark::cleanup() {
1796   // world is stopped at this checkpoint
1797   assert(SafepointSynchronize::is_at_safepoint(),
1798          "world should be stopped");
1799   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1800 
1801   // If a full collection has happened, we shouldn't do this.
1802   if (has_aborted()) {
1803     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1804     return;
1805   }
1806 
1807   HRSPhaseSetter x(HRSPhaseCleanup);
1808   g1h->verify_region_sets_optional();
1809 
1810   if (VerifyDuringGC) {
1811     HandleMark hm;  // handle scope
1812     gclog_or_tty->print(" VerifyDuringGC:(before)");
1813     Universe::heap()->prepare_for_verify();
1814     Universe::verify(/* silent      */ false,
1815                      /* option      */ VerifyOption_G1UsePrevMarking);
1816   }
1817 
1818   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1819   g1p->record_concurrent_mark_cleanup_start();
1820 
1821   double start = os::elapsedTime();
1822 
1823   HeapRegionRemSet::reset_for_cleanup_tasks();
1824 
1825   uint n_workers;
1826 
1827   // Do counting once more with the world stopped for good measure.
1828   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1829 
1830   if (G1CollectedHeap::use_parallel_gc_threads()) {
1831    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1832            "sanity check");
1833 
1834     g1h->set_par_threads();
1835     n_workers = g1h->n_par_threads();
1836     assert(g1h->n_par_threads() == n_workers,
1837            "Should not have been reset");
1838     g1h->workers()->run_task(&g1_par_count_task);
1839     // Done with the parallel phase so reset to 0.
1840     g1h->set_par_threads(0);
1841 
1842     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1843            "sanity check");
1844   } else {
1845     n_workers = 1;
1846     g1_par_count_task.work(0);
1847   }
1848 
1849   if (VerifyDuringGC) {
1850     // Verify that the counting data accumulated during marking matches
1851     // that calculated by walking the marking bitmap.
1852 
1853     // Bitmaps to hold expected values
1854     BitMap expected_region_bm(_region_bm.size(), false);
1855     BitMap expected_card_bm(_card_bm.size(), false);
1856 
1857     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1858                                                  &_region_bm,
1859                                                  &_card_bm,
1860                                                  &expected_region_bm,
1861                                                  &expected_card_bm);
1862 
1863     if (G1CollectedHeap::use_parallel_gc_threads()) {
1864       g1h->set_par_threads((int)n_workers);
1865       g1h->workers()->run_task(&g1_par_verify_task);
1866       // Done with the parallel phase so reset to 0.
1867       g1h->set_par_threads(0);
1868 
1869       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1870              "sanity check");
1871     } else {
1872       g1_par_verify_task.work(0);
1873     }
1874 
1875     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1876   }
1877 
1878   size_t start_used_bytes = g1h->used();
1879   g1h->set_marking_complete();
1880 
1881   double count_end = os::elapsedTime();
1882   double this_final_counting_time = (count_end - start);
1883   _total_counting_time += this_final_counting_time;
1884 
1885   if (G1PrintRegionLivenessInfo) {
1886     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1887     _g1h->heap_region_iterate(&cl);
1888   }
1889 
1890   // Install newly created mark bitMap as "prev".
1891   swapMarkBitMaps();
1892 
1893   g1h->reset_gc_time_stamp();
1894 
1895   // Note end of marking in all heap regions.
1896   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1897   if (G1CollectedHeap::use_parallel_gc_threads()) {
1898     g1h->set_par_threads((int)n_workers);
1899     g1h->workers()->run_task(&g1_par_note_end_task);
1900     g1h->set_par_threads(0);
1901 
1902     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1903            "sanity check");
1904   } else {
1905     g1_par_note_end_task.work(0);
1906   }
1907   g1h->check_gc_time_stamps();
1908 
1909   if (!cleanup_list_is_empty()) {
1910     // The cleanup list is not empty, so we'll have to process it
1911     // concurrently. Notify anyone else that might be wanting free
1912     // regions that there will be more free regions coming soon.
1913     g1h->set_free_regions_coming();
1914   }
1915 
1916   // call below, since it affects the metric by which we sort the heap
1917   // regions.
1918   if (G1ScrubRemSets) {
1919     double rs_scrub_start = os::elapsedTime();
1920     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1921     if (G1CollectedHeap::use_parallel_gc_threads()) {
1922       g1h->set_par_threads((int)n_workers);
1923       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1924       g1h->set_par_threads(0);
1925 
1926       assert(g1h->check_heap_region_claim_values(
1927                                             HeapRegion::ScrubRemSetClaimValue),
1928              "sanity check");
1929     } else {
1930       g1_par_scrub_rs_task.work(0);
1931     }
1932 
1933     double rs_scrub_end = os::elapsedTime();
1934     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1935     _total_rs_scrub_time += this_rs_scrub_time;
1936   }
1937 
1938   // this will also free any regions totally full of garbage objects,
1939   // and sort the regions.
1940   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1941 
1942   // Statistics.
1943   double end = os::elapsedTime();
1944   _cleanup_times.add((end - start) * 1000.0);
1945 
1946   if (G1Log::fine()) {
1947     g1h->print_size_transition(gclog_or_tty,
1948                                start_used_bytes,
1949                                g1h->used(),
1950                                g1h->capacity());
1951   }
1952 
1953   // Clean up will have freed any regions completely full of garbage.
1954   // Update the soft reference policy with the new heap occupancy.
1955   Universe::update_heap_info_at_gc();
1956 
1957   // We need to make this be a "collection" so any collection pause that
1958   // races with it goes around and waits for completeCleanup to finish.
1959   g1h->increment_total_collections();
1960 
1961   // We reclaimed old regions so we should calculate the sizes to make
1962   // sure we update the old gen/space data.
1963   g1h->g1mm()->update_sizes();
1964 
1965   if (VerifyDuringGC) {
1966     HandleMark hm;  // handle scope
1967     gclog_or_tty->print(" VerifyDuringGC:(after)");
1968     Universe::heap()->prepare_for_verify();
1969     Universe::verify(/* silent      */ false,
1970                      /* option      */ VerifyOption_G1UsePrevMarking);
1971   }
1972 
1973   g1h->verify_region_sets_optional();
1974 }
1975 
1976 void ConcurrentMark::completeCleanup() {
1977   if (has_aborted()) return;
1978 
1979   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1980 
1981   _cleanup_list.verify_optional();
1982   FreeRegionList tmp_free_list("Tmp Free List");
1983 
1984   if (G1ConcRegionFreeingVerbose) {
1985     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1986                            "cleanup list has %u entries",
1987                            _cleanup_list.length());
1988   }
1989 
1990   // Noone else should be accessing the _cleanup_list at this point,
1991   // so it's not necessary to take any locks
1992   while (!_cleanup_list.is_empty()) {
1993     HeapRegion* hr = _cleanup_list.remove_head();
1994     assert(hr != NULL, "the list was not empty");
1995     hr->par_clear();
1996     tmp_free_list.add_as_tail(hr);
1997 
1998     // Instead of adding one region at a time to the secondary_free_list,
1999     // we accumulate them in the local list and move them a few at a
2000     // time. This also cuts down on the number of notify_all() calls
2001     // we do during this process. We'll also append the local list when
2002     // _cleanup_list is empty (which means we just removed the last
2003     // region from the _cleanup_list).
2004     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2005         _cleanup_list.is_empty()) {
2006       if (G1ConcRegionFreeingVerbose) {
2007         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2008                                "appending %u entries to the secondary_free_list, "
2009                                "cleanup list still has %u entries",
2010                                tmp_free_list.length(),
2011                                _cleanup_list.length());
2012       }
2013 
2014       {
2015         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2016         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2017         SecondaryFreeList_lock->notify_all();
2018       }
2019 
2020       if (G1StressConcRegionFreeing) {
2021         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2022           os::sleep(Thread::current(), (jlong) 1, false);
2023         }
2024       }
2025     }
2026   }
2027   assert(tmp_free_list.is_empty(), "post-condition");
2028 }
2029 
2030 // Support closures for reference procssing in G1
2031 
2032 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2033   HeapWord* addr = (HeapWord*)obj;
2034   return addr != NULL &&
2035          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2036 }
2037 
2038 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2039   G1CollectedHeap* _g1;
2040   ConcurrentMark*  _cm;
2041  public:
2042   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2043     _g1(g1), _cm(cm) {
2044     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2045   }
2046 
2047   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2048   virtual void do_oop(      oop* p) { do_oop_work(p); }
2049 
2050   template <class T> void do_oop_work(T* p) {
2051     oop obj = oopDesc::load_decode_heap_oop(p);
2052     HeapWord* addr = (HeapWord*)obj;
2053 
2054     if (_cm->verbose_high()) {
2055       gclog_or_tty->print_cr("\t[0] we're looking at location "
2056                              "*"PTR_FORMAT" = "PTR_FORMAT,
2057                              p, (void*) obj);
2058     }
2059 
2060     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2061       _cm->mark_and_count(obj);
2062       _cm->mark_stack_push(obj);
2063     }
2064   }
2065 };
2066 
2067 class G1CMDrainMarkingStackClosure: public VoidClosure {
2068   ConcurrentMark*               _cm;
2069   CMMarkStack*                  _markStack;
2070   G1CMKeepAliveClosure*         _oopClosure;
2071  public:
2072   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2073                                G1CMKeepAliveClosure* oopClosure) :
2074     _cm(cm),
2075     _markStack(markStack),
2076     _oopClosure(oopClosure) { }
2077 
2078   void do_void() {
2079     _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2080   }
2081 };
2082 
2083 // 'Keep Alive' closure used by parallel reference processing.
2084 // An instance of this closure is used in the parallel reference processing
2085 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2086 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2087 // placed on to discovered ref lists once so we can mark and push with no
2088 // need to check whether the object has already been marked. Using the
2089 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2090 // operating on the global mark stack. This means that an individual
2091 // worker would be doing lock-free pushes while it processes its own
2092 // discovered ref list followed by drain call. If the discovered ref lists
2093 // are unbalanced then this could cause interference with the other
2094 // workers. Using a CMTask (and its embedded local data structures)
2095 // avoids that potential interference.
2096 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2097   ConcurrentMark*  _cm;
2098   CMTask*          _task;
2099   int              _ref_counter_limit;
2100   int              _ref_counter;
2101  public:
2102   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2103     _cm(cm), _task(task),
2104     _ref_counter_limit(G1RefProcDrainInterval) {
2105     assert(_ref_counter_limit > 0, "sanity");
2106     _ref_counter = _ref_counter_limit;
2107   }
2108 
2109   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2110   virtual void do_oop(      oop* p) { do_oop_work(p); }
2111 
2112   template <class T> void do_oop_work(T* p) {
2113     if (!_cm->has_overflown()) {
2114       oop obj = oopDesc::load_decode_heap_oop(p);
2115       if (_cm->verbose_high()) {
2116         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2117                                "*"PTR_FORMAT" = "PTR_FORMAT,
2118                                _task->worker_id(), p, (void*) obj);
2119       }
2120 
2121       _task->deal_with_reference(obj);
2122       _ref_counter--;
2123 
2124       if (_ref_counter == 0) {
2125         // We have dealt with _ref_counter_limit references, pushing them and objects
2126         // reachable from them on to the local stack (and possibly the global stack).
2127         // Call do_marking_step() to process these entries. We call the routine in a
2128         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2129         // with the entries that we've pushed as a result of the deal_with_reference
2130         // calls above) or we overflow.
2131         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2132         // while there may still be some work to do. (See the comment at the
2133         // beginning of CMTask::do_marking_step() for those conditions - one of which
2134         // is reaching the specified time target.) It is only when
2135         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2136         // that the marking has completed.
2137         do {
2138           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2139           _task->do_marking_step(mark_step_duration_ms,
2140                                  false /* do_stealing    */,
2141                                  false /* do_termination */);
2142         } while (_task->has_aborted() && !_cm->has_overflown());
2143         _ref_counter = _ref_counter_limit;
2144       }
2145     } else {
2146       if (_cm->verbose_high()) {
2147          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2148       }
2149     }
2150   }
2151 };
2152 
2153 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2154   ConcurrentMark* _cm;
2155   CMTask* _task;
2156  public:
2157   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2158     _cm(cm), _task(task) { }
2159 
2160   void do_void() {
2161     do {
2162       if (_cm->verbose_high()) {
2163         gclog_or_tty->print_cr("\t[%u] Drain: Calling do marking_step",
2164                                _task->worker_id());
2165       }
2166 
2167       // We call CMTask::do_marking_step() to completely drain the local and
2168       // global marking stacks. The routine is called in a loop, which we'll
2169       // exit if there's nothing more to do (i.e. we'completely drained the
2170       // entries that were pushed as a result of applying the
2171       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2172       // lists above) or we overflow the global marking stack.
2173       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2174       // while there may still be some work to do. (See the comment at the
2175       // beginning of CMTask::do_marking_step() for those conditions - one of which
2176       // is reaching the specified time target.) It is only when
2177       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2178       // that the marking has completed.
2179 
2180       _task->do_marking_step(1000000000.0 /* something very large */,
2181                              true /* do_stealing    */,
2182                              true /* do_termination */);
2183     } while (_task->has_aborted() && !_cm->has_overflown());
2184   }
2185 };
2186 
2187 // Implementation of AbstractRefProcTaskExecutor for parallel
2188 // reference processing at the end of G1 concurrent marking
2189 
2190 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2191 private:
2192   G1CollectedHeap* _g1h;
2193   ConcurrentMark*  _cm;
2194   WorkGang*        _workers;
2195   int              _active_workers;
2196 
2197 public:
2198   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2199                         ConcurrentMark* cm,
2200                         WorkGang* workers,
2201                         int n_workers) :
2202     _g1h(g1h), _cm(cm),
2203     _workers(workers), _active_workers(n_workers) { }
2204 
2205   // Executes the given task using concurrent marking worker threads.
2206   virtual void execute(ProcessTask& task);
2207   virtual void execute(EnqueueTask& task);
2208 };
2209 
2210 class G1CMRefProcTaskProxy: public AbstractGangTask {
2211   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2212   ProcessTask&     _proc_task;
2213   G1CollectedHeap* _g1h;
2214   ConcurrentMark*  _cm;
2215 
2216 public:
2217   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2218                      G1CollectedHeap* g1h,
2219                      ConcurrentMark* cm) :
2220     AbstractGangTask("Process reference objects in parallel"),
2221     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2222 
2223   virtual void work(uint worker_id) {
2224     CMTask* marking_task = _cm->task(worker_id);
2225     G1CMIsAliveClosure g1_is_alive(_g1h);
2226     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2227     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2228 
2229     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2230   }
2231 };
2232 
2233 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2234   assert(_workers != NULL, "Need parallel worker threads.");
2235 
2236   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2237 
2238   // We need to reset the phase for each task execution so that
2239   // the termination protocol of CMTask::do_marking_step works.
2240   _cm->set_phase(_active_workers, false /* concurrent */);
2241   _g1h->set_par_threads(_active_workers);
2242   _workers->run_task(&proc_task_proxy);
2243   _g1h->set_par_threads(0);
2244 }
2245 
2246 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2247   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2248   EnqueueTask& _enq_task;
2249 
2250 public:
2251   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2252     AbstractGangTask("Enqueue reference objects in parallel"),
2253     _enq_task(enq_task) { }
2254 
2255   virtual void work(uint worker_id) {
2256     _enq_task.work(worker_id);
2257   }
2258 };
2259 
2260 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2261   assert(_workers != NULL, "Need parallel worker threads.");
2262 
2263   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2264 
2265   _g1h->set_par_threads(_active_workers);
2266   _workers->run_task(&enq_task_proxy);
2267   _g1h->set_par_threads(0);
2268 }
2269 
2270 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2271   ResourceMark rm;
2272   HandleMark   hm;
2273 
2274   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2275 
2276   // Is alive closure.
2277   G1CMIsAliveClosure g1_is_alive(g1h);
2278 
2279   // Inner scope to exclude the cleaning of the string and symbol
2280   // tables from the displayed time.
2281   {
2282     if (G1Log::finer()) {
2283       gclog_or_tty->put(' ');
2284     }
2285     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2286 
2287     ReferenceProcessor* rp = g1h->ref_processor_cm();
2288 
2289     // See the comment in G1CollectedHeap::ref_processing_init()
2290     // about how reference processing currently works in G1.
2291 
2292     // Process weak references.
2293     rp->setup_policy(clear_all_soft_refs);
2294     assert(_markStack.isEmpty(), "mark stack should be empty");
2295 
2296     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2297     G1CMDrainMarkingStackClosure
2298       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2299 
2300     // We use the work gang from the G1CollectedHeap and we utilize all
2301     // the worker threads.
2302     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2303     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2304 
2305     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2306                                               g1h->workers(), active_workers);
2307 
2308     if (rp->processing_is_mt()) {
2309       // Set the degree of MT here.  If the discovery is done MT, there
2310       // may have been a different number of threads doing the discovery
2311       // and a different number of discovered lists may have Ref objects.
2312       // That is OK as long as the Reference lists are balanced (see
2313       // balance_all_queues() and balance_queues()).
2314       rp->set_active_mt_degree(active_workers);
2315 
2316       rp->process_discovered_references(&g1_is_alive,
2317                                       &g1_keep_alive,
2318                                       &g1_drain_mark_stack,
2319                                       &par_task_executor);
2320 
2321       // The work routines of the parallel keep_alive and drain_marking_stack
2322       // will set the has_overflown flag if we overflow the global marking
2323       // stack.
2324     } else {
2325       rp->process_discovered_references(&g1_is_alive,
2326                                         &g1_keep_alive,
2327                                         &g1_drain_mark_stack,
2328                                         NULL);
2329     }
2330 
2331     assert(_markStack.overflow() || _markStack.isEmpty(),
2332             "mark stack should be empty (unless it overflowed)");
2333     if (_markStack.overflow()) {
2334       // Should have been done already when we tried to push an
2335       // entry on to the global mark stack. But let's do it again.
2336       set_has_overflown();
2337     }
2338 
2339     if (rp->processing_is_mt()) {
2340       assert(rp->num_q() == active_workers, "why not");
2341       rp->enqueue_discovered_references(&par_task_executor);
2342     } else {
2343       rp->enqueue_discovered_references();
2344     }
2345 
2346     rp->verify_no_references_recorded();
2347     assert(!rp->discovery_enabled(), "Post condition");
2348   }
2349 
2350   // Now clean up stale oops in StringTable
2351   StringTable::unlink(&g1_is_alive);
2352   // Clean up unreferenced symbols in symbol table.
2353   SymbolTable::unlink();
2354 }
2355 
2356 void ConcurrentMark::swapMarkBitMaps() {
2357   CMBitMapRO* temp = _prevMarkBitMap;
2358   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2359   _nextMarkBitMap  = (CMBitMap*)  temp;
2360 }
2361 
2362 class CMRemarkTask: public AbstractGangTask {
2363 private:
2364   ConcurrentMark *_cm;
2365 
2366 public:
2367   void work(uint worker_id) {
2368     // Since all available tasks are actually started, we should
2369     // only proceed if we're supposed to be actived.
2370     if (worker_id < _cm->active_tasks()) {
2371       CMTask* task = _cm->task(worker_id);
2372       task->record_start_time();
2373       do {
2374         task->do_marking_step(1000000000.0 /* something very large */,
2375                               true /* do_stealing    */,
2376                               true /* do_termination */);
2377       } while (task->has_aborted() && !_cm->has_overflown());
2378       // If we overflow, then we do not want to restart. We instead
2379       // want to abort remark and do concurrent marking again.
2380       task->record_end_time();
2381     }
2382   }
2383 
2384   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2385     AbstractGangTask("Par Remark"), _cm(cm) {
2386     _cm->terminator()->reset_for_reuse(active_workers);
2387   }
2388 };
2389 
2390 void ConcurrentMark::checkpointRootsFinalWork() {
2391   ResourceMark rm;
2392   HandleMark   hm;
2393   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2394 
2395   g1h->ensure_parsability(false);
2396 
2397   if (G1CollectedHeap::use_parallel_gc_threads()) {
2398     G1CollectedHeap::StrongRootsScope srs(g1h);
2399     // this is remark, so we'll use up all active threads
2400     uint active_workers = g1h->workers()->active_workers();
2401     if (active_workers == 0) {
2402       assert(active_workers > 0, "Should have been set earlier");
2403       active_workers = (uint) ParallelGCThreads;
2404       g1h->workers()->set_active_workers(active_workers);
2405     }
2406     set_phase(active_workers, false /* concurrent */);
2407     // Leave _parallel_marking_threads at it's
2408     // value originally calculated in the ConcurrentMark
2409     // constructor and pass values of the active workers
2410     // through the gang in the task.
2411 
2412     CMRemarkTask remarkTask(this, active_workers);
2413     g1h->set_par_threads(active_workers);
2414     g1h->workers()->run_task(&remarkTask);
2415     g1h->set_par_threads(0);
2416   } else {
2417     G1CollectedHeap::StrongRootsScope srs(g1h);
2418     // this is remark, so we'll use up all available threads
2419     uint active_workers = 1;
2420     set_phase(active_workers, false /* concurrent */);
2421 
2422     CMRemarkTask remarkTask(this, active_workers);
2423     // We will start all available threads, even if we decide that the
2424     // active_workers will be fewer. The extra ones will just bail out
2425     // immediately.
2426     remarkTask.work(0);
2427   }
2428   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2429   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2430 
2431   print_stats();
2432 
2433 #if VERIFY_OBJS_PROCESSED
2434   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2435     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2436                            _scan_obj_cl.objs_processed,
2437                            ThreadLocalObjQueue::objs_enqueued);
2438     guarantee(_scan_obj_cl.objs_processed ==
2439               ThreadLocalObjQueue::objs_enqueued,
2440               "Different number of objs processed and enqueued.");
2441   }
2442 #endif
2443 }
2444 
2445 #ifndef PRODUCT
2446 
2447 class PrintReachableOopClosure: public OopClosure {
2448 private:
2449   G1CollectedHeap* _g1h;
2450   outputStream*    _out;
2451   VerifyOption     _vo;
2452   bool             _all;
2453 
2454 public:
2455   PrintReachableOopClosure(outputStream* out,
2456                            VerifyOption  vo,
2457                            bool          all) :
2458     _g1h(G1CollectedHeap::heap()),
2459     _out(out), _vo(vo), _all(all) { }
2460 
2461   void do_oop(narrowOop* p) { do_oop_work(p); }
2462   void do_oop(      oop* p) { do_oop_work(p); }
2463 
2464   template <class T> void do_oop_work(T* p) {
2465     oop         obj = oopDesc::load_decode_heap_oop(p);
2466     const char* str = NULL;
2467     const char* str2 = "";
2468 
2469     if (obj == NULL) {
2470       str = "";
2471     } else if (!_g1h->is_in_g1_reserved(obj)) {
2472       str = " O";
2473     } else {
2474       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2475       guarantee(hr != NULL, "invariant");
2476       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2477       bool marked = _g1h->is_marked(obj, _vo);
2478 
2479       if (over_tams) {
2480         str = " >";
2481         if (marked) {
2482           str2 = " AND MARKED";
2483         }
2484       } else if (marked) {
2485         str = " M";
2486       } else {
2487         str = " NOT";
2488       }
2489     }
2490 
2491     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2492                    p, (void*) obj, str, str2);
2493   }
2494 };
2495 
2496 class PrintReachableObjectClosure : public ObjectClosure {
2497 private:
2498   G1CollectedHeap* _g1h;
2499   outputStream*    _out;
2500   VerifyOption     _vo;
2501   bool             _all;
2502   HeapRegion*      _hr;
2503 
2504 public:
2505   PrintReachableObjectClosure(outputStream* out,
2506                               VerifyOption  vo,
2507                               bool          all,
2508                               HeapRegion*   hr) :
2509     _g1h(G1CollectedHeap::heap()),
2510     _out(out), _vo(vo), _all(all), _hr(hr) { }
2511 
2512   void do_object(oop o) {
2513     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2514     bool marked = _g1h->is_marked(o, _vo);
2515     bool print_it = _all || over_tams || marked;
2516 
2517     if (print_it) {
2518       _out->print_cr(" "PTR_FORMAT"%s",
2519                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2520       PrintReachableOopClosure oopCl(_out, _vo, _all);
2521       o->oop_iterate_no_header(&oopCl);
2522     }
2523   }
2524 };
2525 
2526 class PrintReachableRegionClosure : public HeapRegionClosure {
2527 private:
2528   G1CollectedHeap* _g1h;
2529   outputStream*    _out;
2530   VerifyOption     _vo;
2531   bool             _all;
2532 
2533 public:
2534   bool doHeapRegion(HeapRegion* hr) {
2535     HeapWord* b = hr->bottom();
2536     HeapWord* e = hr->end();
2537     HeapWord* t = hr->top();
2538     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2539     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2540                    "TAMS: "PTR_FORMAT, b, e, t, p);
2541     _out->cr();
2542 
2543     HeapWord* from = b;
2544     HeapWord* to   = t;
2545 
2546     if (to > from) {
2547       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2548       _out->cr();
2549       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2550       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2551       _out->cr();
2552     }
2553 
2554     return false;
2555   }
2556 
2557   PrintReachableRegionClosure(outputStream* out,
2558                               VerifyOption  vo,
2559                               bool          all) :
2560     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2561 };
2562 
2563 void ConcurrentMark::print_reachable(const char* str,
2564                                      VerifyOption vo,
2565                                      bool all) {
2566   gclog_or_tty->cr();
2567   gclog_or_tty->print_cr("== Doing heap dump... ");
2568 
2569   if (G1PrintReachableBaseFile == NULL) {
2570     gclog_or_tty->print_cr("  #### error: no base file defined");
2571     return;
2572   }
2573 
2574   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2575       (JVM_MAXPATHLEN - 1)) {
2576     gclog_or_tty->print_cr("  #### error: file name too long");
2577     return;
2578   }
2579 
2580   char file_name[JVM_MAXPATHLEN];
2581   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2582   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2583 
2584   fileStream fout(file_name);
2585   if (!fout.is_open()) {
2586     gclog_or_tty->print_cr("  #### error: could not open file");
2587     return;
2588   }
2589 
2590   outputStream* out = &fout;
2591   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2592   out->cr();
2593 
2594   out->print_cr("--- ITERATING OVER REGIONS");
2595   out->cr();
2596   PrintReachableRegionClosure rcl(out, vo, all);
2597   _g1h->heap_region_iterate(&rcl);
2598   out->cr();
2599 
2600   gclog_or_tty->print_cr("  done");
2601   gclog_or_tty->flush();
2602 }
2603 
2604 #endif // PRODUCT
2605 
2606 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2607   // Note we are overriding the read-only view of the prev map here, via
2608   // the cast.
2609   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2610 }
2611 
2612 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2613   _nextMarkBitMap->clearRange(mr);
2614 }
2615 
2616 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2617   clearRangePrevBitmap(mr);
2618   clearRangeNextBitmap(mr);
2619 }
2620 
2621 HeapRegion*
2622 ConcurrentMark::claim_region(uint worker_id) {
2623   // "checkpoint" the finger
2624   HeapWord* finger = _finger;
2625 
2626   // _heap_end will not change underneath our feet; it only changes at
2627   // yield points.
2628   while (finger < _heap_end) {
2629     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2630 
2631     // Note on how this code handles humongous regions. In the
2632     // normal case the finger will reach the start of a "starts
2633     // humongous" (SH) region. Its end will either be the end of the
2634     // last "continues humongous" (CH) region in the sequence, or the
2635     // standard end of the SH region (if the SH is the only region in
2636     // the sequence). That way claim_region() will skip over the CH
2637     // regions. However, there is a subtle race between a CM thread
2638     // executing this method and a mutator thread doing a humongous
2639     // object allocation. The two are not mutually exclusive as the CM
2640     // thread does not need to hold the Heap_lock when it gets
2641     // here. So there is a chance that claim_region() will come across
2642     // a free region that's in the progress of becoming a SH or a CH
2643     // region. In the former case, it will either
2644     //   a) Miss the update to the region's end, in which case it will
2645     //      visit every subsequent CH region, will find their bitmaps
2646     //      empty, and do nothing, or
2647     //   b) Will observe the update of the region's end (in which case
2648     //      it will skip the subsequent CH regions).
2649     // If it comes across a region that suddenly becomes CH, the
2650     // scenario will be similar to b). So, the race between
2651     // claim_region() and a humongous object allocation might force us
2652     // to do a bit of unnecessary work (due to some unnecessary bitmap
2653     // iterations) but it should not introduce and correctness issues.
2654     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2655     HeapWord*   bottom        = curr_region->bottom();
2656     HeapWord*   end           = curr_region->end();
2657     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2658 
2659     if (verbose_low()) {
2660       gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2661                              "["PTR_FORMAT", "PTR_FORMAT"), "
2662                              "limit = "PTR_FORMAT,
2663                              worker_id, curr_region, bottom, end, limit);
2664     }
2665 
2666     // Is the gap between reading the finger and doing the CAS too long?
2667     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2668     if (res == finger) {
2669       // we succeeded
2670 
2671       // notice that _finger == end cannot be guaranteed here since,
2672       // someone else might have moved the finger even further
2673       assert(_finger >= end, "the finger should have moved forward");
2674 
2675       if (verbose_low()) {
2676         gclog_or_tty->print_cr("[%u] we were successful with region = "
2677                                PTR_FORMAT, worker_id, curr_region);
2678       }
2679 
2680       if (limit > bottom) {
2681         if (verbose_low()) {
2682           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2683                                  "returning it ", worker_id, curr_region);
2684         }
2685         return curr_region;
2686       } else {
2687         assert(limit == bottom,
2688                "the region limit should be at bottom");
2689         if (verbose_low()) {
2690           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2691                                  "returning NULL", worker_id, curr_region);
2692         }
2693         // we return NULL and the caller should try calling
2694         // claim_region() again.
2695         return NULL;
2696       }
2697     } else {
2698       assert(_finger > finger, "the finger should have moved forward");
2699       if (verbose_low()) {
2700         gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2701                                "global finger = "PTR_FORMAT", "
2702                                "our finger = "PTR_FORMAT,
2703                                worker_id, _finger, finger);
2704       }
2705 
2706       // read it again
2707       finger = _finger;
2708     }
2709   }
2710 
2711   return NULL;
2712 }
2713 
2714 #ifndef PRODUCT
2715 enum VerifyNoCSetOopsPhase {
2716   VerifyNoCSetOopsStack,
2717   VerifyNoCSetOopsQueues,
2718   VerifyNoCSetOopsSATBCompleted,
2719   VerifyNoCSetOopsSATBThread
2720 };
2721 
2722 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2723 private:
2724   G1CollectedHeap* _g1h;
2725   VerifyNoCSetOopsPhase _phase;
2726   int _info;
2727 
2728   const char* phase_str() {
2729     switch (_phase) {
2730     case VerifyNoCSetOopsStack:         return "Stack";
2731     case VerifyNoCSetOopsQueues:        return "Queue";
2732     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2733     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2734     default:                            ShouldNotReachHere();
2735     }
2736     return NULL;
2737   }
2738 
2739   void do_object_work(oop obj) {
2740     guarantee(!_g1h->obj_in_cs(obj),
2741               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2742                       (void*) obj, phase_str(), _info));
2743   }
2744 
2745 public:
2746   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2747 
2748   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2749     _phase = phase;
2750     _info = info;
2751   }
2752 
2753   virtual void do_oop(oop* p) {
2754     oop obj = oopDesc::load_decode_heap_oop(p);
2755     do_object_work(obj);
2756   }
2757 
2758   virtual void do_oop(narrowOop* p) {
2759     // We should not come across narrow oops while scanning marking
2760     // stacks and SATB buffers.
2761     ShouldNotReachHere();
2762   }
2763 
2764   virtual void do_object(oop obj) {
2765     do_object_work(obj);
2766   }
2767 };
2768 
2769 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2770                                          bool verify_enqueued_buffers,
2771                                          bool verify_thread_buffers,
2772                                          bool verify_fingers) {
2773   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2774   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2775     return;
2776   }
2777 
2778   VerifyNoCSetOopsClosure cl;
2779 
2780   if (verify_stacks) {
2781     // Verify entries on the global mark stack
2782     cl.set_phase(VerifyNoCSetOopsStack);
2783     _markStack.oops_do(&cl);
2784 
2785     // Verify entries on the task queues
2786     for (uint i = 0; i < _max_worker_id; i += 1) {
2787       cl.set_phase(VerifyNoCSetOopsQueues, i);
2788       OopTaskQueue* queue = _task_queues->queue(i);
2789       queue->oops_do(&cl);
2790     }
2791   }
2792 
2793   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2794 
2795   // Verify entries on the enqueued SATB buffers
2796   if (verify_enqueued_buffers) {
2797     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2798     satb_qs.iterate_completed_buffers_read_only(&cl);
2799   }
2800 
2801   // Verify entries on the per-thread SATB buffers
2802   if (verify_thread_buffers) {
2803     cl.set_phase(VerifyNoCSetOopsSATBThread);
2804     satb_qs.iterate_thread_buffers_read_only(&cl);
2805   }
2806 
2807   if (verify_fingers) {
2808     // Verify the global finger
2809     HeapWord* global_finger = finger();
2810     if (global_finger != NULL && global_finger < _heap_end) {
2811       // The global finger always points to a heap region boundary. We
2812       // use heap_region_containing_raw() to get the containing region
2813       // given that the global finger could be pointing to a free region
2814       // which subsequently becomes continues humongous. If that
2815       // happens, heap_region_containing() will return the bottom of the
2816       // corresponding starts humongous region and the check below will
2817       // not hold any more.
2818       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2819       guarantee(global_finger == global_hr->bottom(),
2820                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2821                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2822     }
2823 
2824     // Verify the task fingers
2825     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2826     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2827       CMTask* task = _tasks[i];
2828       HeapWord* task_finger = task->finger();
2829       if (task_finger != NULL && task_finger < _heap_end) {
2830         // See above note on the global finger verification.
2831         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2832         guarantee(task_finger == task_hr->bottom() ||
2833                   !task_hr->in_collection_set(),
2834                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2835                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2836       }
2837     }
2838   }
2839 }
2840 #endif // PRODUCT
2841 
2842 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2843   _markStack.setEmpty();
2844   _markStack.clear_overflow();
2845   if (clear_overflow) {
2846     clear_has_overflown();
2847   } else {
2848     assert(has_overflown(), "pre-condition");
2849   }
2850   _finger = _heap_start;
2851 
2852   for (uint i = 0; i < _max_worker_id; ++i) {
2853     OopTaskQueue* queue = _task_queues->queue(i);
2854     queue->set_empty();
2855   }
2856 }
2857 
2858 // Aggregate the counting data that was constructed concurrently
2859 // with marking.
2860 class AggregateCountDataHRClosure: public HeapRegionClosure {
2861   G1CollectedHeap* _g1h;
2862   ConcurrentMark* _cm;
2863   CardTableModRefBS* _ct_bs;
2864   BitMap* _cm_card_bm;
2865   uint _max_worker_id;
2866 
2867  public:
2868   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2869                               BitMap* cm_card_bm,
2870                               uint max_worker_id) :
2871     _g1h(g1h), _cm(g1h->concurrent_mark()),
2872     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2873     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2874 
2875   bool doHeapRegion(HeapRegion* hr) {
2876     if (hr->continuesHumongous()) {
2877       // We will ignore these here and process them when their
2878       // associated "starts humongous" region is processed.
2879       // Note that we cannot rely on their associated
2880       // "starts humongous" region to have their bit set to 1
2881       // since, due to the region chunking in the parallel region
2882       // iteration, a "continues humongous" region might be visited
2883       // before its associated "starts humongous".
2884       return false;
2885     }
2886 
2887     HeapWord* start = hr->bottom();
2888     HeapWord* limit = hr->next_top_at_mark_start();
2889     HeapWord* end = hr->end();
2890 
2891     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2892            err_msg("Preconditions not met - "
2893                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2894                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2895                    start, limit, hr->top(), hr->end()));
2896 
2897     assert(hr->next_marked_bytes() == 0, "Precondition");
2898 
2899     if (start == limit) {
2900       // NTAMS of this region has not been set so nothing to do.
2901       return false;
2902     }
2903 
2904     // 'start' should be in the heap.
2905     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2906     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2907     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2908 
2909     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2910     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2911     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2912 
2913     // If ntams is not card aligned then we bump card bitmap index
2914     // for limit so that we get the all the cards spanned by
2915     // the object ending at ntams.
2916     // Note: if this is the last region in the heap then ntams
2917     // could be actually just beyond the end of the the heap;
2918     // limit_idx will then  correspond to a (non-existent) card
2919     // that is also outside the heap.
2920     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2921       limit_idx += 1;
2922     }
2923 
2924     assert(limit_idx <= end_idx, "or else use atomics");
2925 
2926     // Aggregate the "stripe" in the count data associated with hr.
2927     uint hrs_index = hr->hrs_index();
2928     size_t marked_bytes = 0;
2929 
2930     for (uint i = 0; i < _max_worker_id; i += 1) {
2931       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2932       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2933 
2934       // Fetch the marked_bytes in this region for task i and
2935       // add it to the running total for this region.
2936       marked_bytes += marked_bytes_array[hrs_index];
2937 
2938       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2939       // into the global card bitmap.
2940       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2941 
2942       while (scan_idx < limit_idx) {
2943         assert(task_card_bm->at(scan_idx) == true, "should be");
2944         _cm_card_bm->set_bit(scan_idx);
2945         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2946 
2947         // BitMap::get_next_one_offset() can handle the case when
2948         // its left_offset parameter is greater than its right_offset
2949         // parameter. It does, however, have an early exit if
2950         // left_offset == right_offset. So let's limit the value
2951         // passed in for left offset here.
2952         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2953         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2954       }
2955     }
2956 
2957     // Update the marked bytes for this region.
2958     hr->add_to_marked_bytes(marked_bytes);
2959 
2960     // Next heap region
2961     return false;
2962   }
2963 };
2964 
2965 class G1AggregateCountDataTask: public AbstractGangTask {
2966 protected:
2967   G1CollectedHeap* _g1h;
2968   ConcurrentMark* _cm;
2969   BitMap* _cm_card_bm;
2970   uint _max_worker_id;
2971   int _active_workers;
2972 
2973 public:
2974   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2975                            ConcurrentMark* cm,
2976                            BitMap* cm_card_bm,
2977                            uint max_worker_id,
2978                            int n_workers) :
2979     AbstractGangTask("Count Aggregation"),
2980     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2981     _max_worker_id(max_worker_id),
2982     _active_workers(n_workers) { }
2983 
2984   void work(uint worker_id) {
2985     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2986 
2987     if (G1CollectedHeap::use_parallel_gc_threads()) {
2988       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2989                                             _active_workers,
2990                                             HeapRegion::AggregateCountClaimValue);
2991     } else {
2992       _g1h->heap_region_iterate(&cl);
2993     }
2994   }
2995 };
2996 
2997 
2998 void ConcurrentMark::aggregate_count_data() {
2999   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3000                         _g1h->workers()->active_workers() :
3001                         1);
3002 
3003   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3004                                            _max_worker_id, n_workers);
3005 
3006   if (G1CollectedHeap::use_parallel_gc_threads()) {
3007     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3008            "sanity check");
3009     _g1h->set_par_threads(n_workers);
3010     _g1h->workers()->run_task(&g1_par_agg_task);
3011     _g1h->set_par_threads(0);
3012 
3013     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3014            "sanity check");
3015     _g1h->reset_heap_region_claim_values();
3016   } else {
3017     g1_par_agg_task.work(0);
3018   }
3019 }
3020 
3021 // Clear the per-worker arrays used to store the per-region counting data
3022 void ConcurrentMark::clear_all_count_data() {
3023   // Clear the global card bitmap - it will be filled during
3024   // liveness count aggregation (during remark) and the
3025   // final counting task.
3026   _card_bm.clear();
3027 
3028   // Clear the global region bitmap - it will be filled as part
3029   // of the final counting task.
3030   _region_bm.clear();
3031 
3032   uint max_regions = _g1h->max_regions();
3033   assert(_max_worker_id > 0, "uninitialized");
3034 
3035   for (uint i = 0; i < _max_worker_id; i += 1) {
3036     BitMap* task_card_bm = count_card_bitmap_for(i);
3037     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3038 
3039     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3040     assert(marked_bytes_array != NULL, "uninitialized");
3041 
3042     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3043     task_card_bm->clear();
3044   }
3045 }
3046 
3047 void ConcurrentMark::print_stats() {
3048   if (verbose_stats()) {
3049     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3050     for (size_t i = 0; i < _active_tasks; ++i) {
3051       _tasks[i]->print_stats();
3052       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3053     }
3054   }
3055 }
3056 
3057 // abandon current marking iteration due to a Full GC
3058 void ConcurrentMark::abort() {
3059   // Clear all marks to force marking thread to do nothing
3060   _nextMarkBitMap->clearAll();
3061   // Clear the liveness counting data
3062   clear_all_count_data();
3063   // Empty mark stack
3064   clear_marking_state();
3065   for (uint i = 0; i < _max_worker_id; ++i) {
3066     _tasks[i]->clear_region_fields();
3067   }
3068   _has_aborted = true;
3069 
3070   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3071   satb_mq_set.abandon_partial_marking();
3072   // This can be called either during or outside marking, we'll read
3073   // the expected_active value from the SATB queue set.
3074   satb_mq_set.set_active_all_threads(
3075                                  false, /* new active value */
3076                                  satb_mq_set.is_active() /* expected_active */);
3077 }
3078 
3079 static void print_ms_time_info(const char* prefix, const char* name,
3080                                NumberSeq& ns) {
3081   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3082                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3083   if (ns.num() > 0) {
3084     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3085                            prefix, ns.sd(), ns.maximum());
3086   }
3087 }
3088 
3089 void ConcurrentMark::print_summary_info() {
3090   gclog_or_tty->print_cr(" Concurrent marking:");
3091   print_ms_time_info("  ", "init marks", _init_times);
3092   print_ms_time_info("  ", "remarks", _remark_times);
3093   {
3094     print_ms_time_info("     ", "final marks", _remark_mark_times);
3095     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3096 
3097   }
3098   print_ms_time_info("  ", "cleanups", _cleanup_times);
3099   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3100                          _total_counting_time,
3101                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3102                           (double)_cleanup_times.num()
3103                          : 0.0));
3104   if (G1ScrubRemSets) {
3105     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3106                            _total_rs_scrub_time,
3107                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3108                             (double)_cleanup_times.num()
3109                            : 0.0));
3110   }
3111   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3112                          (_init_times.sum() + _remark_times.sum() +
3113                           _cleanup_times.sum())/1000.0);
3114   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3115                 "(%8.2f s marking).",
3116                 cmThread()->vtime_accum(),
3117                 cmThread()->vtime_mark_accum());
3118 }
3119 
3120 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3121   _parallel_workers->print_worker_threads_on(st);
3122 }
3123 
3124 // We take a break if someone is trying to stop the world.
3125 bool ConcurrentMark::do_yield_check(uint worker_id) {
3126   if (should_yield()) {
3127     if (worker_id == 0) {
3128       _g1h->g1_policy()->record_concurrent_pause();
3129     }
3130     cmThread()->yield();
3131     return true;
3132   } else {
3133     return false;
3134   }
3135 }
3136 
3137 bool ConcurrentMark::should_yield() {
3138   return cmThread()->should_yield();
3139 }
3140 
3141 bool ConcurrentMark::containing_card_is_marked(void* p) {
3142   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3143   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3144 }
3145 
3146 bool ConcurrentMark::containing_cards_are_marked(void* start,
3147                                                  void* last) {
3148   return containing_card_is_marked(start) &&
3149          containing_card_is_marked(last);
3150 }
3151 
3152 #ifndef PRODUCT
3153 // for debugging purposes
3154 void ConcurrentMark::print_finger() {
3155   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3156                          _heap_start, _heap_end, _finger);
3157   for (uint i = 0; i < _max_worker_id; ++i) {
3158     gclog_or_tty->print("   %u: "PTR_FORMAT, i, _tasks[i]->finger());
3159   }
3160   gclog_or_tty->print_cr("");
3161 }
3162 #endif
3163 
3164 void CMTask::scan_object(oop obj) {
3165   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3166 
3167   if (_cm->verbose_high()) {
3168     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3169                            _worker_id, (void*) obj);
3170   }
3171 
3172   size_t obj_size = obj->size();
3173   _words_scanned += obj_size;
3174 
3175   obj->oop_iterate(_cm_oop_closure);
3176   statsOnly( ++_objs_scanned );
3177   check_limits();
3178 }
3179 
3180 // Closure for iteration over bitmaps
3181 class CMBitMapClosure : public BitMapClosure {
3182 private:
3183   // the bitmap that is being iterated over
3184   CMBitMap*                   _nextMarkBitMap;
3185   ConcurrentMark*             _cm;
3186   CMTask*                     _task;
3187 
3188 public:
3189   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3190     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3191 
3192   bool do_bit(size_t offset) {
3193     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3194     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3195     assert( addr < _cm->finger(), "invariant");
3196 
3197     statsOnly( _task->increase_objs_found_on_bitmap() );
3198     assert(addr >= _task->finger(), "invariant");
3199 
3200     // We move that task's local finger along.
3201     _task->move_finger_to(addr);
3202 
3203     _task->scan_object(oop(addr));
3204     // we only partially drain the local queue and global stack
3205     _task->drain_local_queue(true);
3206     _task->drain_global_stack(true);
3207 
3208     // if the has_aborted flag has been raised, we need to bail out of
3209     // the iteration
3210     return !_task->has_aborted();
3211   }
3212 };
3213 
3214 // Closure for iterating over objects, currently only used for
3215 // processing SATB buffers.
3216 class CMObjectClosure : public ObjectClosure {
3217 private:
3218   CMTask* _task;
3219 
3220 public:
3221   void do_object(oop obj) {
3222     _task->deal_with_reference(obj);
3223   }
3224 
3225   CMObjectClosure(CMTask* task) : _task(task) { }
3226 };
3227 
3228 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3229                                ConcurrentMark* cm,
3230                                CMTask* task)
3231   : _g1h(g1h), _cm(cm), _task(task) {
3232   assert(_ref_processor == NULL, "should be initialized to NULL");
3233 
3234   if (G1UseConcMarkReferenceProcessing) {
3235     _ref_processor = g1h->ref_processor_cm();
3236     assert(_ref_processor != NULL, "should not be NULL");
3237   }
3238 }
3239 
3240 void CMTask::setup_for_region(HeapRegion* hr) {
3241   // Separated the asserts so that we know which one fires.
3242   assert(hr != NULL,
3243         "claim_region() should have filtered out continues humongous regions");
3244   assert(!hr->continuesHumongous(),
3245         "claim_region() should have filtered out continues humongous regions");
3246 
3247   if (_cm->verbose_low()) {
3248     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3249                            _worker_id, hr);
3250   }
3251 
3252   _curr_region  = hr;
3253   _finger       = hr->bottom();
3254   update_region_limit();
3255 }
3256 
3257 void CMTask::update_region_limit() {
3258   HeapRegion* hr            = _curr_region;
3259   HeapWord* bottom          = hr->bottom();
3260   HeapWord* limit           = hr->next_top_at_mark_start();
3261 
3262   if (limit == bottom) {
3263     if (_cm->verbose_low()) {
3264       gclog_or_tty->print_cr("[%u] found an empty region "
3265                              "["PTR_FORMAT", "PTR_FORMAT")",
3266                              _worker_id, bottom, limit);
3267     }
3268     // The region was collected underneath our feet.
3269     // We set the finger to bottom to ensure that the bitmap
3270     // iteration that will follow this will not do anything.
3271     // (this is not a condition that holds when we set the region up,
3272     // as the region is not supposed to be empty in the first place)
3273     _finger = bottom;
3274   } else if (limit >= _region_limit) {
3275     assert(limit >= _finger, "peace of mind");
3276   } else {
3277     assert(limit < _region_limit, "only way to get here");
3278     // This can happen under some pretty unusual circumstances.  An
3279     // evacuation pause empties the region underneath our feet (NTAMS
3280     // at bottom). We then do some allocation in the region (NTAMS
3281     // stays at bottom), followed by the region being used as a GC
3282     // alloc region (NTAMS will move to top() and the objects
3283     // originally below it will be grayed). All objects now marked in
3284     // the region are explicitly grayed, if below the global finger,
3285     // and we do not need in fact to scan anything else. So, we simply
3286     // set _finger to be limit to ensure that the bitmap iteration
3287     // doesn't do anything.
3288     _finger = limit;
3289   }
3290 
3291   _region_limit = limit;
3292 }
3293 
3294 void CMTask::giveup_current_region() {
3295   assert(_curr_region != NULL, "invariant");
3296   if (_cm->verbose_low()) {
3297     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3298                            _worker_id, _curr_region);
3299   }
3300   clear_region_fields();
3301 }
3302 
3303 void CMTask::clear_region_fields() {
3304   // Values for these three fields that indicate that we're not
3305   // holding on to a region.
3306   _curr_region   = NULL;
3307   _finger        = NULL;
3308   _region_limit  = NULL;
3309 }
3310 
3311 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3312   if (cm_oop_closure == NULL) {
3313     assert(_cm_oop_closure != NULL, "invariant");
3314   } else {
3315     assert(_cm_oop_closure == NULL, "invariant");
3316   }
3317   _cm_oop_closure = cm_oop_closure;
3318 }
3319 
3320 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3321   guarantee(nextMarkBitMap != NULL, "invariant");
3322 
3323   if (_cm->verbose_low()) {
3324     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3325   }
3326 
3327   _nextMarkBitMap                = nextMarkBitMap;
3328   clear_region_fields();
3329 
3330   _calls                         = 0;
3331   _elapsed_time_ms               = 0.0;
3332   _termination_time_ms           = 0.0;
3333   _termination_start_time_ms     = 0.0;
3334 
3335 #if _MARKING_STATS_
3336   _local_pushes                  = 0;
3337   _local_pops                    = 0;
3338   _local_max_size                = 0;
3339   _objs_scanned                  = 0;
3340   _global_pushes                 = 0;
3341   _global_pops                   = 0;
3342   _global_max_size               = 0;
3343   _global_transfers_to           = 0;
3344   _global_transfers_from         = 0;
3345   _regions_claimed               = 0;
3346   _objs_found_on_bitmap          = 0;
3347   _satb_buffers_processed        = 0;
3348   _steal_attempts                = 0;
3349   _steals                        = 0;
3350   _aborted                       = 0;
3351   _aborted_overflow              = 0;
3352   _aborted_cm_aborted            = 0;
3353   _aborted_yield                 = 0;
3354   _aborted_timed_out             = 0;
3355   _aborted_satb                  = 0;
3356   _aborted_termination           = 0;
3357 #endif // _MARKING_STATS_
3358 }
3359 
3360 bool CMTask::should_exit_termination() {
3361   regular_clock_call();
3362   // This is called when we are in the termination protocol. We should
3363   // quit if, for some reason, this task wants to abort or the global
3364   // stack is not empty (this means that we can get work from it).
3365   return !_cm->mark_stack_empty() || has_aborted();
3366 }
3367 
3368 void CMTask::reached_limit() {
3369   assert(_words_scanned >= _words_scanned_limit ||
3370          _refs_reached >= _refs_reached_limit ,
3371          "shouldn't have been called otherwise");
3372   regular_clock_call();
3373 }
3374 
3375 void CMTask::regular_clock_call() {
3376   if (has_aborted()) return;
3377 
3378   // First, we need to recalculate the words scanned and refs reached
3379   // limits for the next clock call.
3380   recalculate_limits();
3381 
3382   // During the regular clock call we do the following
3383 
3384   // (1) If an overflow has been flagged, then we abort.
3385   if (_cm->has_overflown()) {
3386     set_has_aborted();
3387     return;
3388   }
3389 
3390   // If we are not concurrent (i.e. we're doing remark) we don't need
3391   // to check anything else. The other steps are only needed during
3392   // the concurrent marking phase.
3393   if (!concurrent()) return;
3394 
3395   // (2) If marking has been aborted for Full GC, then we also abort.
3396   if (_cm->has_aborted()) {
3397     set_has_aborted();
3398     statsOnly( ++_aborted_cm_aborted );
3399     return;
3400   }
3401 
3402   double curr_time_ms = os::elapsedVTime() * 1000.0;
3403 
3404   // (3) If marking stats are enabled, then we update the step history.
3405 #if _MARKING_STATS_
3406   if (_words_scanned >= _words_scanned_limit) {
3407     ++_clock_due_to_scanning;
3408   }
3409   if (_refs_reached >= _refs_reached_limit) {
3410     ++_clock_due_to_marking;
3411   }
3412 
3413   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3414   _interval_start_time_ms = curr_time_ms;
3415   _all_clock_intervals_ms.add(last_interval_ms);
3416 
3417   if (_cm->verbose_medium()) {
3418       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3419                         "scanned = %d%s, refs reached = %d%s",
3420                         _worker_id, last_interval_ms,
3421                         _words_scanned,
3422                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3423                         _refs_reached,
3424                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3425   }
3426 #endif // _MARKING_STATS_
3427 
3428   // (4) We check whether we should yield. If we have to, then we abort.
3429   if (_cm->should_yield()) {
3430     // We should yield. To do this we abort the task. The caller is
3431     // responsible for yielding.
3432     set_has_aborted();
3433     statsOnly( ++_aborted_yield );
3434     return;
3435   }
3436 
3437   // (5) We check whether we've reached our time quota. If we have,
3438   // then we abort.
3439   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3440   if (elapsed_time_ms > _time_target_ms) {
3441     set_has_aborted();
3442     _has_timed_out = true;
3443     statsOnly( ++_aborted_timed_out );
3444     return;
3445   }
3446 
3447   // (6) Finally, we check whether there are enough completed STAB
3448   // buffers available for processing. If there are, we abort.
3449   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3450   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3451     if (_cm->verbose_low()) {
3452       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3453                              _worker_id);
3454     }
3455     // we do need to process SATB buffers, we'll abort and restart
3456     // the marking task to do so
3457     set_has_aborted();
3458     statsOnly( ++_aborted_satb );
3459     return;
3460   }
3461 }
3462 
3463 void CMTask::recalculate_limits() {
3464   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3465   _words_scanned_limit      = _real_words_scanned_limit;
3466 
3467   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3468   _refs_reached_limit       = _real_refs_reached_limit;
3469 }
3470 
3471 void CMTask::decrease_limits() {
3472   // This is called when we believe that we're going to do an infrequent
3473   // operation which will increase the per byte scanned cost (i.e. move
3474   // entries to/from the global stack). It basically tries to decrease the
3475   // scanning limit so that the clock is called earlier.
3476 
3477   if (_cm->verbose_medium()) {
3478     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3479   }
3480 
3481   _words_scanned_limit = _real_words_scanned_limit -
3482     3 * words_scanned_period / 4;
3483   _refs_reached_limit  = _real_refs_reached_limit -
3484     3 * refs_reached_period / 4;
3485 }
3486 
3487 void CMTask::move_entries_to_global_stack() {
3488   // local array where we'll store the entries that will be popped
3489   // from the local queue
3490   oop buffer[global_stack_transfer_size];
3491 
3492   int n = 0;
3493   oop obj;
3494   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3495     buffer[n] = obj;
3496     ++n;
3497   }
3498 
3499   if (n > 0) {
3500     // we popped at least one entry from the local queue
3501 
3502     statsOnly( ++_global_transfers_to; _local_pops += n );
3503 
3504     if (!_cm->mark_stack_push(buffer, n)) {
3505       if (_cm->verbose_low()) {
3506         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3507                                _worker_id);
3508       }
3509       set_has_aborted();
3510     } else {
3511       // the transfer was successful
3512 
3513       if (_cm->verbose_medium()) {
3514         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3515                                _worker_id, n);
3516       }
3517       statsOnly( int tmp_size = _cm->mark_stack_size();
3518                  if (tmp_size > _global_max_size) {
3519                    _global_max_size = tmp_size;
3520                  }
3521                  _global_pushes += n );
3522     }
3523   }
3524 
3525   // this operation was quite expensive, so decrease the limits
3526   decrease_limits();
3527 }
3528 
3529 void CMTask::get_entries_from_global_stack() {
3530   // local array where we'll store the entries that will be popped
3531   // from the global stack.
3532   oop buffer[global_stack_transfer_size];
3533   int n;
3534   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3535   assert(n <= global_stack_transfer_size,
3536          "we should not pop more than the given limit");
3537   if (n > 0) {
3538     // yes, we did actually pop at least one entry
3539 
3540     statsOnly( ++_global_transfers_from; _global_pops += n );
3541     if (_cm->verbose_medium()) {
3542       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3543                              _worker_id, n);
3544     }
3545     for (int i = 0; i < n; ++i) {
3546       bool success = _task_queue->push(buffer[i]);
3547       // We only call this when the local queue is empty or under a
3548       // given target limit. So, we do not expect this push to fail.
3549       assert(success, "invariant");
3550     }
3551 
3552     statsOnly( int tmp_size = _task_queue->size();
3553                if (tmp_size > _local_max_size) {
3554                  _local_max_size = tmp_size;
3555                }
3556                _local_pushes += n );
3557   }
3558 
3559   // this operation was quite expensive, so decrease the limits
3560   decrease_limits();
3561 }
3562 
3563 void CMTask::drain_local_queue(bool partially) {
3564   if (has_aborted()) return;
3565 
3566   // Decide what the target size is, depending whether we're going to
3567   // drain it partially (so that other tasks can steal if they run out
3568   // of things to do) or totally (at the very end).
3569   size_t target_size;
3570   if (partially) {
3571     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3572   } else {
3573     target_size = 0;
3574   }
3575 
3576   if (_task_queue->size() > target_size) {
3577     if (_cm->verbose_high()) {
3578       gclog_or_tty->print_cr("[%u] draining local queue, target size = %d",
3579                              _worker_id, target_size);
3580     }
3581 
3582     oop obj;
3583     bool ret = _task_queue->pop_local(obj);
3584     while (ret) {
3585       statsOnly( ++_local_pops );
3586 
3587       if (_cm->verbose_high()) {
3588         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3589                                (void*) obj);
3590       }
3591 
3592       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3593       assert(!_g1h->is_on_master_free_list(
3594                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3595 
3596       scan_object(obj);
3597 
3598       if (_task_queue->size() <= target_size || has_aborted()) {
3599         ret = false;
3600       } else {
3601         ret = _task_queue->pop_local(obj);
3602       }
3603     }
3604 
3605     if (_cm->verbose_high()) {
3606       gclog_or_tty->print_cr("[%u] drained local queue, size = %d",
3607                              _worker_id, _task_queue->size());
3608     }
3609   }
3610 }
3611 
3612 void CMTask::drain_global_stack(bool partially) {
3613   if (has_aborted()) return;
3614 
3615   // We have a policy to drain the local queue before we attempt to
3616   // drain the global stack.
3617   assert(partially || _task_queue->size() == 0, "invariant");
3618 
3619   // Decide what the target size is, depending whether we're going to
3620   // drain it partially (so that other tasks can steal if they run out
3621   // of things to do) or totally (at the very end).  Notice that,
3622   // because we move entries from the global stack in chunks or
3623   // because another task might be doing the same, we might in fact
3624   // drop below the target. But, this is not a problem.
3625   size_t target_size;
3626   if (partially) {
3627     target_size = _cm->partial_mark_stack_size_target();
3628   } else {
3629     target_size = 0;
3630   }
3631 
3632   if (_cm->mark_stack_size() > target_size) {
3633     if (_cm->verbose_low()) {
3634       gclog_or_tty->print_cr("[%u] draining global_stack, target size %d",
3635                              _worker_id, target_size);
3636     }
3637 
3638     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3639       get_entries_from_global_stack();
3640       drain_local_queue(partially);
3641     }
3642 
3643     if (_cm->verbose_low()) {
3644       gclog_or_tty->print_cr("[%u] drained global stack, size = %d",
3645                              _worker_id, _cm->mark_stack_size());
3646     }
3647   }
3648 }
3649 
3650 // SATB Queue has several assumptions on whether to call the par or
3651 // non-par versions of the methods. this is why some of the code is
3652 // replicated. We should really get rid of the single-threaded version
3653 // of the code to simplify things.
3654 void CMTask::drain_satb_buffers() {
3655   if (has_aborted()) return;
3656 
3657   // We set this so that the regular clock knows that we're in the
3658   // middle of draining buffers and doesn't set the abort flag when it
3659   // notices that SATB buffers are available for draining. It'd be
3660   // very counter productive if it did that. :-)
3661   _draining_satb_buffers = true;
3662 
3663   CMObjectClosure oc(this);
3664   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3665   if (G1CollectedHeap::use_parallel_gc_threads()) {
3666     satb_mq_set.set_par_closure(_worker_id, &oc);
3667   } else {
3668     satb_mq_set.set_closure(&oc);
3669   }
3670 
3671   // This keeps claiming and applying the closure to completed buffers
3672   // until we run out of buffers or we need to abort.
3673   if (G1CollectedHeap::use_parallel_gc_threads()) {
3674     while (!has_aborted() &&
3675            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
3676       if (_cm->verbose_medium()) {
3677         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3678       }
3679       statsOnly( ++_satb_buffers_processed );
3680       regular_clock_call();
3681     }
3682   } else {
3683     while (!has_aborted() &&
3684            satb_mq_set.apply_closure_to_completed_buffer()) {
3685       if (_cm->verbose_medium()) {
3686         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3687       }
3688       statsOnly( ++_satb_buffers_processed );
3689       regular_clock_call();
3690     }
3691   }
3692 
3693   if (!concurrent() && !has_aborted()) {
3694     // We should only do this during remark.
3695     if (G1CollectedHeap::use_parallel_gc_threads()) {
3696       satb_mq_set.par_iterate_closure_all_threads(_worker_id);
3697     } else {
3698       satb_mq_set.iterate_closure_all_threads();
3699     }
3700   }
3701 
3702   _draining_satb_buffers = false;
3703 
3704   assert(has_aborted() ||
3705          concurrent() ||
3706          satb_mq_set.completed_buffers_num() == 0, "invariant");
3707 
3708   if (G1CollectedHeap::use_parallel_gc_threads()) {
3709     satb_mq_set.set_par_closure(_worker_id, NULL);
3710   } else {
3711     satb_mq_set.set_closure(NULL);
3712   }
3713 
3714   // again, this was a potentially expensive operation, decrease the
3715   // limits to get the regular clock call early
3716   decrease_limits();
3717 }
3718 
3719 void CMTask::print_stats() {
3720   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3721                          _worker_id, _calls);
3722   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3723                          _elapsed_time_ms, _termination_time_ms);
3724   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3725                          _step_times_ms.num(), _step_times_ms.avg(),
3726                          _step_times_ms.sd());
3727   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3728                          _step_times_ms.maximum(), _step_times_ms.sum());
3729 
3730 #if _MARKING_STATS_
3731   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3732                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3733                          _all_clock_intervals_ms.sd());
3734   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3735                          _all_clock_intervals_ms.maximum(),
3736                          _all_clock_intervals_ms.sum());
3737   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3738                          _clock_due_to_scanning, _clock_due_to_marking);
3739   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3740                          _objs_scanned, _objs_found_on_bitmap);
3741   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3742                          _local_pushes, _local_pops, _local_max_size);
3743   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3744                          _global_pushes, _global_pops, _global_max_size);
3745   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3746                          _global_transfers_to,_global_transfers_from);
3747   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3748   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3749   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3750                          _steal_attempts, _steals);
3751   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3752   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3753                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3754   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3755                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3756 #endif // _MARKING_STATS_
3757 }
3758 
3759 /*****************************************************************************
3760 
3761     The do_marking_step(time_target_ms) method is the building block
3762     of the parallel marking framework. It can be called in parallel
3763     with other invocations of do_marking_step() on different tasks
3764     (but only one per task, obviously) and concurrently with the
3765     mutator threads, or during remark, hence it eliminates the need
3766     for two versions of the code. When called during remark, it will
3767     pick up from where the task left off during the concurrent marking
3768     phase. Interestingly, tasks are also claimable during evacuation
3769     pauses too, since do_marking_step() ensures that it aborts before
3770     it needs to yield.
3771 
3772     The data structures that is uses to do marking work are the
3773     following:
3774 
3775       (1) Marking Bitmap. If there are gray objects that appear only
3776       on the bitmap (this happens either when dealing with an overflow
3777       or when the initial marking phase has simply marked the roots
3778       and didn't push them on the stack), then tasks claim heap
3779       regions whose bitmap they then scan to find gray objects. A
3780       global finger indicates where the end of the last claimed region
3781       is. A local finger indicates how far into the region a task has
3782       scanned. The two fingers are used to determine how to gray an
3783       object (i.e. whether simply marking it is OK, as it will be
3784       visited by a task in the future, or whether it needs to be also
3785       pushed on a stack).
3786 
3787       (2) Local Queue. The local queue of the task which is accessed
3788       reasonably efficiently by the task. Other tasks can steal from
3789       it when they run out of work. Throughout the marking phase, a
3790       task attempts to keep its local queue short but not totally
3791       empty, so that entries are available for stealing by other
3792       tasks. Only when there is no more work, a task will totally
3793       drain its local queue.
3794 
3795       (3) Global Mark Stack. This handles local queue overflow. During
3796       marking only sets of entries are moved between it and the local
3797       queues, as access to it requires a mutex and more fine-grain
3798       interaction with it which might cause contention. If it
3799       overflows, then the marking phase should restart and iterate
3800       over the bitmap to identify gray objects. Throughout the marking
3801       phase, tasks attempt to keep the global mark stack at a small
3802       length but not totally empty, so that entries are available for
3803       popping by other tasks. Only when there is no more work, tasks
3804       will totally drain the global mark stack.
3805 
3806       (4) SATB Buffer Queue. This is where completed SATB buffers are
3807       made available. Buffers are regularly removed from this queue
3808       and scanned for roots, so that the queue doesn't get too
3809       long. During remark, all completed buffers are processed, as
3810       well as the filled in parts of any uncompleted buffers.
3811 
3812     The do_marking_step() method tries to abort when the time target
3813     has been reached. There are a few other cases when the
3814     do_marking_step() method also aborts:
3815 
3816       (1) When the marking phase has been aborted (after a Full GC).
3817 
3818       (2) When a global overflow (on the global stack) has been
3819       triggered. Before the task aborts, it will actually sync up with
3820       the other tasks to ensure that all the marking data structures
3821       (local queues, stacks, fingers etc.)  are re-initialised so that
3822       when do_marking_step() completes, the marking phase can
3823       immediately restart.
3824 
3825       (3) When enough completed SATB buffers are available. The
3826       do_marking_step() method only tries to drain SATB buffers right
3827       at the beginning. So, if enough buffers are available, the
3828       marking step aborts and the SATB buffers are processed at
3829       the beginning of the next invocation.
3830 
3831       (4) To yield. when we have to yield then we abort and yield
3832       right at the end of do_marking_step(). This saves us from a lot
3833       of hassle as, by yielding we might allow a Full GC. If this
3834       happens then objects will be compacted underneath our feet, the
3835       heap might shrink, etc. We save checking for this by just
3836       aborting and doing the yield right at the end.
3837 
3838     From the above it follows that the do_marking_step() method should
3839     be called in a loop (or, otherwise, regularly) until it completes.
3840 
3841     If a marking step completes without its has_aborted() flag being
3842     true, it means it has completed the current marking phase (and
3843     also all other marking tasks have done so and have all synced up).
3844 
3845     A method called regular_clock_call() is invoked "regularly" (in
3846     sub ms intervals) throughout marking. It is this clock method that
3847     checks all the abort conditions which were mentioned above and
3848     decides when the task should abort. A work-based scheme is used to
3849     trigger this clock method: when the number of object words the
3850     marking phase has scanned or the number of references the marking
3851     phase has visited reach a given limit. Additional invocations to
3852     the method clock have been planted in a few other strategic places
3853     too. The initial reason for the clock method was to avoid calling
3854     vtime too regularly, as it is quite expensive. So, once it was in
3855     place, it was natural to piggy-back all the other conditions on it
3856     too and not constantly check them throughout the code.
3857 
3858  *****************************************************************************/
3859 
3860 void CMTask::do_marking_step(double time_target_ms,
3861                              bool do_stealing,
3862                              bool do_termination) {
3863   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3864   assert(concurrent() == _cm->concurrent(), "they should be the same");
3865 
3866   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3867   assert(_task_queues != NULL, "invariant");
3868   assert(_task_queue != NULL, "invariant");
3869   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3870 
3871   assert(!_claimed,
3872          "only one thread should claim this task at any one time");
3873 
3874   // OK, this doesn't safeguard again all possible scenarios, as it is
3875   // possible for two threads to set the _claimed flag at the same
3876   // time. But it is only for debugging purposes anyway and it will
3877   // catch most problems.
3878   _claimed = true;
3879 
3880   _start_time_ms = os::elapsedVTime() * 1000.0;
3881   statsOnly( _interval_start_time_ms = _start_time_ms );
3882 
3883   double diff_prediction_ms =
3884     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3885   _time_target_ms = time_target_ms - diff_prediction_ms;
3886 
3887   // set up the variables that are used in the work-based scheme to
3888   // call the regular clock method
3889   _words_scanned = 0;
3890   _refs_reached  = 0;
3891   recalculate_limits();
3892 
3893   // clear all flags
3894   clear_has_aborted();
3895   _has_timed_out = false;
3896   _draining_satb_buffers = false;
3897 
3898   ++_calls;
3899 
3900   if (_cm->verbose_low()) {
3901     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3902                            "target = %1.2lfms >>>>>>>>>>",
3903                            _worker_id, _calls, _time_target_ms);
3904   }
3905 
3906   // Set up the bitmap and oop closures. Anything that uses them is
3907   // eventually called from this method, so it is OK to allocate these
3908   // statically.
3909   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3910   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3911   set_cm_oop_closure(&cm_oop_closure);
3912 
3913   if (_cm->has_overflown()) {
3914     // This can happen if the mark stack overflows during a GC pause
3915     // and this task, after a yield point, restarts. We have to abort
3916     // as we need to get into the overflow protocol which happens
3917     // right at the end of this task.
3918     set_has_aborted();
3919   }
3920 
3921   // First drain any available SATB buffers. After this, we will not
3922   // look at SATB buffers before the next invocation of this method.
3923   // If enough completed SATB buffers are queued up, the regular clock
3924   // will abort this task so that it restarts.
3925   drain_satb_buffers();
3926   // ...then partially drain the local queue and the global stack
3927   drain_local_queue(true);
3928   drain_global_stack(true);
3929 
3930   do {
3931     if (!has_aborted() && _curr_region != NULL) {
3932       // This means that we're already holding on to a region.
3933       assert(_finger != NULL, "if region is not NULL, then the finger "
3934              "should not be NULL either");
3935 
3936       // We might have restarted this task after an evacuation pause
3937       // which might have evacuated the region we're holding on to
3938       // underneath our feet. Let's read its limit again to make sure
3939       // that we do not iterate over a region of the heap that
3940       // contains garbage (update_region_limit() will also move
3941       // _finger to the start of the region if it is found empty).
3942       update_region_limit();
3943       // We will start from _finger not from the start of the region,
3944       // as we might be restarting this task after aborting half-way
3945       // through scanning this region. In this case, _finger points to
3946       // the address where we last found a marked object. If this is a
3947       // fresh region, _finger points to start().
3948       MemRegion mr = MemRegion(_finger, _region_limit);
3949 
3950       if (_cm->verbose_low()) {
3951         gclog_or_tty->print_cr("[%u] we're scanning part "
3952                                "["PTR_FORMAT", "PTR_FORMAT") "
3953                                "of region "PTR_FORMAT,
3954                                _worker_id, _finger, _region_limit, _curr_region);
3955       }
3956 
3957       // Let's iterate over the bitmap of the part of the
3958       // region that is left.
3959       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3960         // We successfully completed iterating over the region. Now,
3961         // let's give up the region.
3962         giveup_current_region();
3963         regular_clock_call();
3964       } else {
3965         assert(has_aborted(), "currently the only way to do so");
3966         // The only way to abort the bitmap iteration is to return
3967         // false from the do_bit() method. However, inside the
3968         // do_bit() method we move the _finger to point to the
3969         // object currently being looked at. So, if we bail out, we
3970         // have definitely set _finger to something non-null.
3971         assert(_finger != NULL, "invariant");
3972 
3973         // Region iteration was actually aborted. So now _finger
3974         // points to the address of the object we last scanned. If we
3975         // leave it there, when we restart this task, we will rescan
3976         // the object. It is easy to avoid this. We move the finger by
3977         // enough to point to the next possible object header (the
3978         // bitmap knows by how much we need to move it as it knows its
3979         // granularity).
3980         assert(_finger < _region_limit, "invariant");
3981         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3982         // Check if bitmap iteration was aborted while scanning the last object
3983         if (new_finger >= _region_limit) {
3984           giveup_current_region();
3985         } else {
3986           move_finger_to(new_finger);
3987         }
3988       }
3989     }
3990     // At this point we have either completed iterating over the
3991     // region we were holding on to, or we have aborted.
3992 
3993     // We then partially drain the local queue and the global stack.
3994     // (Do we really need this?)
3995     drain_local_queue(true);
3996     drain_global_stack(true);
3997 
3998     // Read the note on the claim_region() method on why it might
3999     // return NULL with potentially more regions available for
4000     // claiming and why we have to check out_of_regions() to determine
4001     // whether we're done or not.
4002     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4003       // We are going to try to claim a new region. We should have
4004       // given up on the previous one.
4005       // Separated the asserts so that we know which one fires.
4006       assert(_curr_region  == NULL, "invariant");
4007       assert(_finger       == NULL, "invariant");
4008       assert(_region_limit == NULL, "invariant");
4009       if (_cm->verbose_low()) {
4010         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4011       }
4012       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4013       if (claimed_region != NULL) {
4014         // Yes, we managed to claim one
4015         statsOnly( ++_regions_claimed );
4016 
4017         if (_cm->verbose_low()) {
4018           gclog_or_tty->print_cr("[%u] we successfully claimed "
4019                                  "region "PTR_FORMAT,
4020                                  _worker_id, claimed_region);
4021         }
4022 
4023         setup_for_region(claimed_region);
4024         assert(_curr_region == claimed_region, "invariant");
4025       }
4026       // It is important to call the regular clock here. It might take
4027       // a while to claim a region if, for example, we hit a large
4028       // block of empty regions. So we need to call the regular clock
4029       // method once round the loop to make sure it's called
4030       // frequently enough.
4031       regular_clock_call();
4032     }
4033 
4034     if (!has_aborted() && _curr_region == NULL) {
4035       assert(_cm->out_of_regions(),
4036              "at this point we should be out of regions");
4037     }
4038   } while ( _curr_region != NULL && !has_aborted());
4039 
4040   if (!has_aborted()) {
4041     // We cannot check whether the global stack is empty, since other
4042     // tasks might be pushing objects to it concurrently.
4043     assert(_cm->out_of_regions(),
4044            "at this point we should be out of regions");
4045 
4046     if (_cm->verbose_low()) {
4047       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4048     }
4049 
4050     // Try to reduce the number of available SATB buffers so that
4051     // remark has less work to do.
4052     drain_satb_buffers();
4053   }
4054 
4055   // Since we've done everything else, we can now totally drain the
4056   // local queue and global stack.
4057   drain_local_queue(false);
4058   drain_global_stack(false);
4059 
4060   // Attempt at work stealing from other task's queues.
4061   if (do_stealing && !has_aborted()) {
4062     // We have not aborted. This means that we have finished all that
4063     // we could. Let's try to do some stealing...
4064 
4065     // We cannot check whether the global stack is empty, since other
4066     // tasks might be pushing objects to it concurrently.
4067     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4068            "only way to reach here");
4069 
4070     if (_cm->verbose_low()) {
4071       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4072     }
4073 
4074     while (!has_aborted()) {
4075       oop obj;
4076       statsOnly( ++_steal_attempts );
4077 
4078       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4079         if (_cm->verbose_medium()) {
4080           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4081                                  _worker_id, (void*) obj);
4082         }
4083 
4084         statsOnly( ++_steals );
4085 
4086         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4087                "any stolen object should be marked");
4088         scan_object(obj);
4089 
4090         // And since we're towards the end, let's totally drain the
4091         // local queue and global stack.
4092         drain_local_queue(false);
4093         drain_global_stack(false);
4094       } else {
4095         break;
4096       }
4097     }
4098   }
4099 
4100   // If we are about to wrap up and go into termination, check if we
4101   // should raise the overflow flag.
4102   if (do_termination && !has_aborted()) {
4103     if (_cm->force_overflow()->should_force()) {
4104       _cm->set_has_overflown();
4105       regular_clock_call();
4106     }
4107   }
4108 
4109   // We still haven't aborted. Now, let's try to get into the
4110   // termination protocol.
4111   if (do_termination && !has_aborted()) {
4112     // We cannot check whether the global stack is empty, since other
4113     // tasks might be concurrently pushing objects on it.
4114     // Separated the asserts so that we know which one fires.
4115     assert(_cm->out_of_regions(), "only way to reach here");
4116     assert(_task_queue->size() == 0, "only way to reach here");
4117 
4118     if (_cm->verbose_low()) {
4119       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4120     }
4121 
4122     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4123     // The CMTask class also extends the TerminatorTerminator class,
4124     // hence its should_exit_termination() method will also decide
4125     // whether to exit the termination protocol or not.
4126     bool finished = _cm->terminator()->offer_termination(this);
4127     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4128     _termination_time_ms +=
4129       termination_end_time_ms - _termination_start_time_ms;
4130 
4131     if (finished) {
4132       // We're all done.
4133 
4134       if (_worker_id == 0) {
4135         // let's allow task 0 to do this
4136         if (concurrent()) {
4137           assert(_cm->concurrent_marking_in_progress(), "invariant");
4138           // we need to set this to false before the next
4139           // safepoint. This way we ensure that the marking phase
4140           // doesn't observe any more heap expansions.
4141           _cm->clear_concurrent_marking_in_progress();
4142         }
4143       }
4144 
4145       // We can now guarantee that the global stack is empty, since
4146       // all other tasks have finished. We separated the guarantees so
4147       // that, if a condition is false, we can immediately find out
4148       // which one.
4149       guarantee(_cm->out_of_regions(), "only way to reach here");
4150       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4151       guarantee(_task_queue->size() == 0, "only way to reach here");
4152       guarantee(!_cm->has_overflown(), "only way to reach here");
4153       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4154 
4155       if (_cm->verbose_low()) {
4156         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4157       }
4158     } else {
4159       // Apparently there's more work to do. Let's abort this task. It
4160       // will restart it and we can hopefully find more things to do.
4161 
4162       if (_cm->verbose_low()) {
4163         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4164                                _worker_id);
4165       }
4166 
4167       set_has_aborted();
4168       statsOnly( ++_aborted_termination );
4169     }
4170   }
4171 
4172   // Mainly for debugging purposes to make sure that a pointer to the
4173   // closure which was statically allocated in this frame doesn't
4174   // escape it by accident.
4175   set_cm_oop_closure(NULL);
4176   double end_time_ms = os::elapsedVTime() * 1000.0;
4177   double elapsed_time_ms = end_time_ms - _start_time_ms;
4178   // Update the step history.
4179   _step_times_ms.add(elapsed_time_ms);
4180 
4181   if (has_aborted()) {
4182     // The task was aborted for some reason.
4183 
4184     statsOnly( ++_aborted );
4185 
4186     if (_has_timed_out) {
4187       double diff_ms = elapsed_time_ms - _time_target_ms;
4188       // Keep statistics of how well we did with respect to hitting
4189       // our target only if we actually timed out (if we aborted for
4190       // other reasons, then the results might get skewed).
4191       _marking_step_diffs_ms.add(diff_ms);
4192     }
4193 
4194     if (_cm->has_overflown()) {
4195       // This is the interesting one. We aborted because a global
4196       // overflow was raised. This means we have to restart the
4197       // marking phase and start iterating over regions. However, in
4198       // order to do this we have to make sure that all tasks stop
4199       // what they are doing and re-initialise in a safe manner. We
4200       // will achieve this with the use of two barrier sync points.
4201 
4202       if (_cm->verbose_low()) {
4203         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4204       }
4205 
4206       _cm->enter_first_sync_barrier(_worker_id);
4207       // When we exit this sync barrier we know that all tasks have
4208       // stopped doing marking work. So, it's now safe to
4209       // re-initialise our data structures. At the end of this method,
4210       // task 0 will clear the global data structures.
4211 
4212       statsOnly( ++_aborted_overflow );
4213 
4214       // We clear the local state of this task...
4215       clear_region_fields();
4216 
4217       // ...and enter the second barrier.
4218       _cm->enter_second_sync_barrier(_worker_id);
4219       // At this point everything has bee re-initialised and we're
4220       // ready to restart.
4221     }
4222 
4223     if (_cm->verbose_low()) {
4224       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4225                              "elapsed = %1.2lfms <<<<<<<<<<",
4226                              _worker_id, _time_target_ms, elapsed_time_ms);
4227       if (_cm->has_aborted()) {
4228         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4229                                _worker_id);
4230       }
4231     }
4232   } else {
4233     if (_cm->verbose_low()) {
4234       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4235                              "elapsed = %1.2lfms <<<<<<<<<<",
4236                              _worker_id, _time_target_ms, elapsed_time_ms);
4237     }
4238   }
4239 
4240   _claimed = false;
4241 }
4242 
4243 CMTask::CMTask(uint worker_id,
4244                ConcurrentMark* cm,
4245                size_t* marked_bytes,
4246                BitMap* card_bm,
4247                CMTaskQueue* task_queue,
4248                CMTaskQueueSet* task_queues)
4249   : _g1h(G1CollectedHeap::heap()),
4250     _worker_id(worker_id), _cm(cm),
4251     _claimed(false),
4252     _nextMarkBitMap(NULL), _hash_seed(17),
4253     _task_queue(task_queue),
4254     _task_queues(task_queues),
4255     _cm_oop_closure(NULL),
4256     _marked_bytes_array(marked_bytes),
4257     _card_bm(card_bm) {
4258   guarantee(task_queue != NULL, "invariant");
4259   guarantee(task_queues != NULL, "invariant");
4260 
4261   statsOnly( _clock_due_to_scanning = 0;
4262              _clock_due_to_marking  = 0 );
4263 
4264   _marking_step_diffs_ms.add(0.5);
4265 }
4266 
4267 // These are formatting macros that are used below to ensure
4268 // consistent formatting. The *_H_* versions are used to format the
4269 // header for a particular value and they should be kept consistent
4270 // with the corresponding macro. Also note that most of the macros add
4271 // the necessary white space (as a prefix) which makes them a bit
4272 // easier to compose.
4273 
4274 // All the output lines are prefixed with this string to be able to
4275 // identify them easily in a large log file.
4276 #define G1PPRL_LINE_PREFIX            "###"
4277 
4278 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4279 #ifdef _LP64
4280 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4281 #else // _LP64
4282 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4283 #endif // _LP64
4284 
4285 // For per-region info
4286 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4287 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4288 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4289 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4290 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4291 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4292 
4293 // For summary info
4294 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4295 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4296 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4297 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4298 
4299 G1PrintRegionLivenessInfoClosure::
4300 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4301   : _out(out),
4302     _total_used_bytes(0), _total_capacity_bytes(0),
4303     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4304     _hum_used_bytes(0), _hum_capacity_bytes(0),
4305     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4306   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4307   MemRegion g1_committed = g1h->g1_committed();
4308   MemRegion g1_reserved = g1h->g1_reserved();
4309   double now = os::elapsedTime();
4310 
4311   // Print the header of the output.
4312   _out->cr();
4313   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4314   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4315                  G1PPRL_SUM_ADDR_FORMAT("committed")
4316                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4317                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4318                  g1_committed.start(), g1_committed.end(),
4319                  g1_reserved.start(), g1_reserved.end(),
4320                  HeapRegion::GrainBytes);
4321   _out->print_cr(G1PPRL_LINE_PREFIX);
4322   _out->print_cr(G1PPRL_LINE_PREFIX
4323                  G1PPRL_TYPE_H_FORMAT
4324                  G1PPRL_ADDR_BASE_H_FORMAT
4325                  G1PPRL_BYTE_H_FORMAT
4326                  G1PPRL_BYTE_H_FORMAT
4327                  G1PPRL_BYTE_H_FORMAT
4328                  G1PPRL_DOUBLE_H_FORMAT,
4329                  "type", "address-range",
4330                  "used", "prev-live", "next-live", "gc-eff");
4331   _out->print_cr(G1PPRL_LINE_PREFIX
4332                  G1PPRL_TYPE_H_FORMAT
4333                  G1PPRL_ADDR_BASE_H_FORMAT
4334                  G1PPRL_BYTE_H_FORMAT
4335                  G1PPRL_BYTE_H_FORMAT
4336                  G1PPRL_BYTE_H_FORMAT
4337                  G1PPRL_DOUBLE_H_FORMAT,
4338                  "", "",
4339                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4340 }
4341 
4342 // It takes as a parameter a reference to one of the _hum_* fields, it
4343 // deduces the corresponding value for a region in a humongous region
4344 // series (either the region size, or what's left if the _hum_* field
4345 // is < the region size), and updates the _hum_* field accordingly.
4346 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4347   size_t bytes = 0;
4348   // The > 0 check is to deal with the prev and next live bytes which
4349   // could be 0.
4350   if (*hum_bytes > 0) {
4351     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4352     *hum_bytes -= bytes;
4353   }
4354   return bytes;
4355 }
4356 
4357 // It deduces the values for a region in a humongous region series
4358 // from the _hum_* fields and updates those accordingly. It assumes
4359 // that that _hum_* fields have already been set up from the "starts
4360 // humongous" region and we visit the regions in address order.
4361 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4362                                                      size_t* capacity_bytes,
4363                                                      size_t* prev_live_bytes,
4364                                                      size_t* next_live_bytes) {
4365   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4366   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4367   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4368   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4369   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4370 }
4371 
4372 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4373   const char* type = "";
4374   HeapWord* bottom       = r->bottom();
4375   HeapWord* end          = r->end();
4376   size_t capacity_bytes  = r->capacity();
4377   size_t used_bytes      = r->used();
4378   size_t prev_live_bytes = r->live_bytes();
4379   size_t next_live_bytes = r->next_live_bytes();
4380   double gc_eff          = r->gc_efficiency();
4381   if (r->used() == 0) {
4382     type = "FREE";
4383   } else if (r->is_survivor()) {
4384     type = "SURV";
4385   } else if (r->is_young()) {
4386     type = "EDEN";
4387   } else if (r->startsHumongous()) {
4388     type = "HUMS";
4389 
4390     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4391            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4392            "they should have been zeroed after the last time we used them");
4393     // Set up the _hum_* fields.
4394     _hum_capacity_bytes  = capacity_bytes;
4395     _hum_used_bytes      = used_bytes;
4396     _hum_prev_live_bytes = prev_live_bytes;
4397     _hum_next_live_bytes = next_live_bytes;
4398     get_hum_bytes(&used_bytes, &capacity_bytes,
4399                   &prev_live_bytes, &next_live_bytes);
4400     end = bottom + HeapRegion::GrainWords;
4401   } else if (r->continuesHumongous()) {
4402     type = "HUMC";
4403     get_hum_bytes(&used_bytes, &capacity_bytes,
4404                   &prev_live_bytes, &next_live_bytes);
4405     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4406   } else {
4407     type = "OLD";
4408   }
4409 
4410   _total_used_bytes      += used_bytes;
4411   _total_capacity_bytes  += capacity_bytes;
4412   _total_prev_live_bytes += prev_live_bytes;
4413   _total_next_live_bytes += next_live_bytes;
4414 
4415   // Print a line for this particular region.
4416   _out->print_cr(G1PPRL_LINE_PREFIX
4417                  G1PPRL_TYPE_FORMAT
4418                  G1PPRL_ADDR_BASE_FORMAT
4419                  G1PPRL_BYTE_FORMAT
4420                  G1PPRL_BYTE_FORMAT
4421                  G1PPRL_BYTE_FORMAT
4422                  G1PPRL_DOUBLE_FORMAT,
4423                  type, bottom, end,
4424                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4425 
4426   return false;
4427 }
4428 
4429 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4430   // Print the footer of the output.
4431   _out->print_cr(G1PPRL_LINE_PREFIX);
4432   _out->print_cr(G1PPRL_LINE_PREFIX
4433                  " SUMMARY"
4434                  G1PPRL_SUM_MB_FORMAT("capacity")
4435                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4436                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4437                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4438                  bytes_to_mb(_total_capacity_bytes),
4439                  bytes_to_mb(_total_used_bytes),
4440                  perc(_total_used_bytes, _total_capacity_bytes),
4441                  bytes_to_mb(_total_prev_live_bytes),
4442                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4443                  bytes_to_mb(_total_next_live_bytes),
4444                  perc(_total_next_live_bytes, _total_capacity_bytes));
4445   _out->cr();
4446 }