New src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  50   _bm((uintptr_t*)NULL,0),
  51   _shifter(shifter) {
  52   _bmStartWord = (HeapWord*)(rs.base());
  53   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  54   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  55                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  56 
  57   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  58 
  59   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  60   // For now we'll just commit all of the bit map up fromt.
  61   // Later on we'll try to be more parsimonious with swap.
  62   guarantee(_virtual_space.initialize(brs, brs.size()),
  63             "couldn't reseve backing store for concurrent marking bit map");
  64   assert(_virtual_space.committed_size() == brs.size(),
  65          "didn't reserve backing store for all of concurrent marking bit map?");
  66   _bm.set_map((uintptr_t*)_virtual_space.low());
  67   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  68          _bmWordSize, "inconsistency in bit map sizing");
  69   _bm.set_size(_bmWordSize >> _shifter);
  70 }
  71 
  72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  73                                                HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = (HeapWord*)align_size_up((intptr_t)addr,
  76                                   HeapWordSize << _shifter);
  77   size_t addrOffset = heapWordToOffset(addr);
  78   if (limit == NULL) {
  79     limit = _bmStartWord + _bmWordSize;
  80   }
  81   size_t limitOffset = heapWordToOffset(limit);
  82   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  83   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  84   assert(nextAddr >= addr, "get_next_one postcondition");
  85   assert(nextAddr == limit || isMarked(nextAddr),
  86          "get_next_one postcondition");
  87   return nextAddr;
  88 }
  89 
  90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  91                                                  HeapWord* limit) const {
  92   size_t addrOffset = heapWordToOffset(addr);
  93   if (limit == NULL) {
  94     limit = _bmStartWord + _bmWordSize;
  95   }
  96   size_t limitOffset = heapWordToOffset(limit);
  97   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  98   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  99   assert(nextAddr >= addr, "get_next_one postcondition");
 100   assert(nextAddr == limit || !isMarked(nextAddr),
 101          "get_next_one postcondition");
 102   return nextAddr;
 103 }
 104 
 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 106   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 107   return (int) (diff >> _shifter);
 108 }
 109 
 110 #ifndef PRODUCT
 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
 112   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 113   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 114          "size inconsistency");
 115   return _bmStartWord == (HeapWord*)(rs.base()) &&
 116          _bmWordSize  == rs.size()>>LogHeapWordSize;
 117 }
 118 #endif
 119 
 120 void CMBitMap::clearAll() {
 121   _bm.clear();
 122   return;
 123 }
 124 
 125 void CMBitMap::markRange(MemRegion mr) {
 126   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 127   assert(!mr.is_empty(), "unexpected empty region");
 128   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 129           ((HeapWord *) mr.end())),
 130          "markRange memory region end is not card aligned");
 131   // convert address range into offset range
 132   _bm.at_put_range(heapWordToOffset(mr.start()),
 133                    heapWordToOffset(mr.end()), true);
 134 }
 135 
 136 void CMBitMap::clearRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   // convert address range into offset range
 140   _bm.at_put_range(heapWordToOffset(mr.start()),
 141                    heapWordToOffset(mr.end()), false);
 142 }
 143 
 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 145                                             HeapWord* end_addr) {
 146   HeapWord* start = getNextMarkedWordAddress(addr);
 147   start = MIN2(start, end_addr);
 148   HeapWord* end   = getNextUnmarkedWordAddress(start);
 149   end = MIN2(end, end_addr);
 150   assert(start <= end, "Consistency check");
 151   MemRegion mr(start, end);
 152   if (!mr.is_empty()) {
 153     clearRange(mr);
 154   }
 155   return mr;
 156 }
 157 
 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 159   _base(NULL), _cm(cm)
 160 #ifdef ASSERT
 161   , _drain_in_progress(false)
 162   , _drain_in_progress_yields(false)
 163 #endif
 164 {}
 165 
 166 void CMMarkStack::allocate(size_t size) {
 167   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 168   if (_base == NULL) {
 169     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 170   }
 171   _index = 0;
 172   _capacity = (jint) size;
 173   _saved_index = -1;
 174   NOT_PRODUCT(_max_depth = 0);
 175 }
 176 
 177 CMMarkStack::~CMMarkStack() {
 178   if (_base != NULL) {
 179     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 180   }
 181 }
 182 
 183 void CMMarkStack::par_push(oop ptr) {
 184   while (true) {
 185     if (isFull()) {
 186       _overflow = true;
 187       return;
 188     }
 189     // Otherwise...
 190     jint index = _index;
 191     jint next_index = index+1;
 192     jint res = Atomic::cmpxchg(next_index, &_index, index);
 193     if (res == index) {
 194       _base[index] = ptr;
 195       // Note that we don't maintain this atomically.  We could, but it
 196       // doesn't seem necessary.
 197       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 198       return;
 199     }
 200     // Otherwise, we need to try again.
 201   }
 202 }
 203 
 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 205   while (true) {
 206     if (isFull()) {
 207       _overflow = true;
 208       return;
 209     }
 210     // Otherwise...
 211     jint index = _index;
 212     jint next_index = index + n;
 213     if (next_index > _capacity) {
 214       _overflow = true;
 215       return;
 216     }
 217     jint res = Atomic::cmpxchg(next_index, &_index, index);
 218     if (res == index) {
 219       for (int i = 0; i < n; i++) {
 220         int ind = index + i;
 221         assert(ind < _capacity, "By overflow test above.");
 222         _base[ind] = ptr_arr[i];
 223       }
 224       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 225       return;
 226     }
 227     // Otherwise, we need to try again.
 228   }
 229 }
 230 
 231 
 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234   jint start = _index;
 235   jint next_index = start + n;
 236   if (next_index > _capacity) {
 237     _overflow = true;
 238     return;
 239   }
 240   // Otherwise.
 241   _index = next_index;
 242   for (int i = 0; i < n; i++) {
 243     int ind = start + i;
 244     assert(ind < _capacity, "By overflow test above.");
 245     _base[ind] = ptr_arr[i];
 246   }
 247 }
 248 
 249 
 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint index = _index;
 253   if (index == 0) {
 254     *n = 0;
 255     return false;
 256   } else {
 257     int k = MIN2(max, index);
 258     jint new_ind = index - k;
 259     for (int j = 0; j < k; j++) {
 260       ptr_arr[j] = _base[new_ind + j];
 261     }
 262     _index = new_ind;
 263     *n = k;
 264     return true;
 265   }
 266 }
 267 
 268 template<class OopClosureClass>
 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 270   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 271          || SafepointSynchronize::is_at_safepoint(),
 272          "Drain recursion must be yield-safe.");
 273   bool res = true;
 274   debug_only(_drain_in_progress = true);
 275   debug_only(_drain_in_progress_yields = yield_after);
 276   while (!isEmpty()) {
 277     oop newOop = pop();
 278     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 279     assert(newOop->is_oop(), "Expected an oop");
 280     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 281            "only grey objects on this stack");
 282     newOop->oop_iterate(cl);
 283     if (yield_after && _cm->do_yield_check()) {
 284       res = false;
 285       break;
 286     }
 287   }
 288   debug_only(_drain_in_progress = false);
 289   return res;
 290 }
 291 
 292 void CMMarkStack::note_start_of_gc() {
 293   assert(_saved_index == -1,
 294          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 295   _saved_index = _index;
 296 }
 297 
 298 void CMMarkStack::note_end_of_gc() {
 299   // This is intentionally a guarantee, instead of an assert. If we
 300   // accidentally add something to the mark stack during GC, it
 301   // will be a correctness issue so it's better if we crash. we'll
 302   // only check this once per GC anyway, so it won't be a performance
 303   // issue in any way.
 304   guarantee(_saved_index == _index,
 305             err_msg("saved index: %d index: %d", _saved_index, _index));
 306   _saved_index = -1;
 307 }
 308 
 309 void CMMarkStack::oops_do(OopClosure* f) {
 310   assert(_saved_index == _index,
 311          err_msg("saved index: %d index: %d", _saved_index, _index));
 312   for (int i = 0; i < _index; i += 1) {
 313     f->do_oop(&_base[i]);
 314   }
 315 }
 316 
 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
 318   return _g1h->is_obj_ill(obj);
 319 }
 320 
 321 CMRootRegions::CMRootRegions() :
 322   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 323   _should_abort(false),  _next_survivor(NULL) { }
 324 
 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 326   _young_list = g1h->young_list();
 327   _cm = cm;
 328 }
 329 
 330 void CMRootRegions::prepare_for_scan() {
 331   assert(!scan_in_progress(), "pre-condition");
 332 
 333   // Currently, only survivors can be root regions.
 334   assert(_next_survivor == NULL, "pre-condition");
 335   _next_survivor = _young_list->first_survivor_region();
 336   _scan_in_progress = (_next_survivor != NULL);
 337   _should_abort = false;
 338 }
 339 
 340 HeapRegion* CMRootRegions::claim_next() {
 341   if (_should_abort) {
 342     // If someone has set the should_abort flag, we return NULL to
 343     // force the caller to bail out of their loop.
 344     return NULL;
 345   }
 346 
 347   // Currently, only survivors can be root regions.
 348   HeapRegion* res = _next_survivor;
 349   if (res != NULL) {
 350     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 351     // Read it again in case it changed while we were waiting for the lock.
 352     res = _next_survivor;
 353     if (res != NULL) {
 354       if (res == _young_list->last_survivor_region()) {
 355         // We just claimed the last survivor so store NULL to indicate
 356         // that we're done.
 357         _next_survivor = NULL;
 358       } else {
 359         _next_survivor = res->get_next_young_region();
 360       }
 361     } else {
 362       // Someone else claimed the last survivor while we were trying
 363       // to take the lock so nothing else to do.
 364     }
 365   }
 366   assert(res == NULL || res->is_survivor(), "post-condition");
 367 
 368   return res;
 369 }
 370 
 371 void CMRootRegions::scan_finished() {
 372   assert(scan_in_progress(), "pre-condition");
 373 
 374   // Currently, only survivors can be root regions.
 375   if (!_should_abort) {
 376     assert(_next_survivor == NULL, "we should have claimed all survivors");
 377   }
 378   _next_survivor = NULL;
 379 
 380   {
 381     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 382     _scan_in_progress = false;
 383     RootRegionScan_lock->notify_all();
 384   }
 385 }
 386 
 387 bool CMRootRegions::wait_until_scan_finished() {
 388   if (!scan_in_progress()) return false;
 389 
 390   {
 391     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 392     while (scan_in_progress()) {
 393       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 394     }
 395   }
 396   return true;
 397 }
 398 
 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 401 #endif // _MSC_VER
 402 
 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 404   return MAX2((n_par_threads + 2) / 4, 1U);
 405 }
 406 
 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 408   _markBitMap1(rs, MinObjAlignment - 1),
 409   _markBitMap2(rs, MinObjAlignment - 1),
 410 
 411   _parallel_marking_threads(0),
 412   _max_parallel_marking_threads(0),
 413   _sleep_factor(0.0),
 414   _marking_task_overhead(1.0),
 415   _cleanup_sleep_factor(0.0),
 416   _cleanup_task_overhead(1.0),
 417   _cleanup_list("Cleanup List"),
 418   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 419   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 420            CardTableModRefBS::card_shift,
 421            false /* in_resource_area*/),
 422 
 423   _prevMarkBitMap(&_markBitMap1),
 424   _nextMarkBitMap(&_markBitMap2),
 425 
 426   _markStack(this),
 427   // _finger set in set_non_marking_state
 428 
 429   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 430   // _active_tasks set in set_non_marking_state
 431   // _tasks set inside the constructor
 432   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 433   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 434 
 435   _has_overflown(false),
 436   _concurrent(false),
 437   _has_aborted(false),
 438   _restart_for_overflow(false),
 439   _concurrent_marking_in_progress(false),
 440 
 441   // _verbose_level set below
 442 
 443   _init_times(),
 444   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 445   _cleanup_times(),
 446   _total_counting_time(0.0),
 447   _total_rs_scrub_time(0.0),
 448 
 449   _parallel_workers(NULL),
 450 
 451   _count_card_bitmaps(NULL),
 452   _count_marked_bytes(NULL) {
 453   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 454   if (verbose_level < no_verbose) {
 455     verbose_level = no_verbose;
 456   }
 457   if (verbose_level > high_verbose) {
 458     verbose_level = high_verbose;
 459   }
 460   _verbose_level = verbose_level;
 461 
 462   if (verbose_low()) {
 463     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 464                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 465   }
 466 
 467   _markStack.allocate(MarkStackSize);
 468 
 469   // Create & start a ConcurrentMark thread.
 470   _cmThread = new ConcurrentMarkThread(this);
 471   assert(cmThread() != NULL, "CM Thread should have been created");
 472   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 473 
 474   _g1h = G1CollectedHeap::heap();
 475   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 476   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 477   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 478 
 479   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 480   satb_qs.set_buffer_size(G1SATBBufferSize);
 481 
 482   _root_regions.init(_g1h, this);
 483 
 484   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 485   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 486 
 487   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 488   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 489 
 490   BitMap::idx_t card_bm_size = _card_bm.size();
 491 
 492   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 493   _active_tasks = _max_task_num;
 494   for (int i = 0; i < (int) _max_task_num; ++i) {
 495     CMTaskQueue* task_queue = new CMTaskQueue();
 496     task_queue->initialize();
 497     _task_queues->register_queue(i, task_queue);
 498 
 499     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 500     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 501 
 502     _tasks[i] = new CMTask(i, this,
 503                            _count_marked_bytes[i],
 504                            &_count_card_bitmaps[i],
 505                            task_queue, _task_queues);
 506 
 507     _accum_task_vtime[i] = 0.0;
 508   }
 509 
 510   // Calculate the card number for the bottom of the heap. Used
 511   // in biasing indexes into the accounting card bitmaps.
 512   _heap_bottom_card_num =
 513     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 514                                 CardTableModRefBS::card_shift);
 515 
 516   // Clear all the liveness counting data
 517   clear_all_count_data();
 518 
 519   if (ConcGCThreads > ParallelGCThreads) {
 520     vm_exit_during_initialization("Can't have more ConcGCThreads "
 521                                   "than ParallelGCThreads.");
 522   }
 523   if (ParallelGCThreads == 0) {
 524     // if we are not running with any parallel GC threads we will not
 525     // spawn any marking threads either
 526     _parallel_marking_threads =       0;
 527     _max_parallel_marking_threads =   0;
 528     _sleep_factor             =     0.0;
 529     _marking_task_overhead    =     1.0;
 530   } else {
 531     if (ConcGCThreads > 0) {
 532       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 533       // if both are set
 534 
 535       _parallel_marking_threads = (uint) ConcGCThreads;
 536       _max_parallel_marking_threads = _parallel_marking_threads;
 537       _sleep_factor             = 0.0;
 538       _marking_task_overhead    = 1.0;
 539     } else if (G1MarkingOverheadPercent > 0) {
 540       // we will calculate the number of parallel marking threads
 541       // based on a target overhead with respect to the soft real-time
 542       // goal
 543 
 544       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 545       double overall_cm_overhead =
 546         (double) MaxGCPauseMillis * marking_overhead /
 547         (double) GCPauseIntervalMillis;
 548       double cpu_ratio = 1.0 / (double) os::processor_count();
 549       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 550       double marking_task_overhead =
 551         overall_cm_overhead / marking_thread_num *
 552                                                 (double) os::processor_count();
 553       double sleep_factor =
 554                          (1.0 - marking_task_overhead) / marking_task_overhead;
 555 
 556       _parallel_marking_threads = (uint) marking_thread_num;
 557       _max_parallel_marking_threads = _parallel_marking_threads;
 558       _sleep_factor             = sleep_factor;
 559       _marking_task_overhead    = marking_task_overhead;
 560     } else {
 561       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 562       _max_parallel_marking_threads = _parallel_marking_threads;
 563       _sleep_factor             = 0.0;
 564       _marking_task_overhead    = 1.0;
 565     }
 566 
 567     if (parallel_marking_threads() > 1) {
 568       _cleanup_task_overhead = 1.0;
 569     } else {
 570       _cleanup_task_overhead = marking_task_overhead();
 571     }
 572     _cleanup_sleep_factor =
 573                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 574 
 575 #if 0
 576     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 577     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 578     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 579     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 580     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 581 #endif
 582 
 583     guarantee(parallel_marking_threads() > 0, "peace of mind");
 584     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 585          _max_parallel_marking_threads, false, true);
 586     if (_parallel_workers == NULL) {
 587       vm_exit_during_initialization("Failed necessary allocation.");
 588     } else {
 589       _parallel_workers->initialize_workers();
 590     }
 591   }
 592 
 593   // so that the call below can read a sensible value
 594   _heap_start = (HeapWord*) rs.base();
 595   set_non_marking_state();
 596 }
 597 
 598 void ConcurrentMark::update_g1_committed(bool force) {
 599   // If concurrent marking is not in progress, then we do not need to
 600   // update _heap_end.
 601   if (!concurrent_marking_in_progress() && !force) return;
 602 
 603   MemRegion committed = _g1h->g1_committed();
 604   assert(committed.start() == _heap_start, "start shouldn't change");
 605   HeapWord* new_end = committed.end();
 606   if (new_end > _heap_end) {
 607     // The heap has been expanded.
 608 
 609     _heap_end = new_end;
 610   }
 611   // Notice that the heap can also shrink. However, this only happens
 612   // during a Full GC (at least currently) and the entire marking
 613   // phase will bail out and the task will not be restarted. So, let's
 614   // do nothing.
 615 }
 616 
 617 void ConcurrentMark::reset() {
 618   // Starting values for these two. This should be called in a STW
 619   // phase. CM will be notified of any future g1_committed expansions
 620   // will be at the end of evacuation pauses, when tasks are
 621   // inactive.
 622   MemRegion committed = _g1h->g1_committed();
 623   _heap_start = committed.start();
 624   _heap_end   = committed.end();
 625 
 626   // Separated the asserts so that we know which one fires.
 627   assert(_heap_start != NULL, "heap bounds should look ok");
 628   assert(_heap_end != NULL, "heap bounds should look ok");
 629   assert(_heap_start < _heap_end, "heap bounds should look ok");
 630 
 631   // reset all the marking data structures and any necessary flags
 632   clear_marking_state();
 633 
 634   if (verbose_low()) {
 635     gclog_or_tty->print_cr("[global] resetting");
 636   }
 637 
 638   // We do reset all of them, since different phases will use
 639   // different number of active threads. So, it's easiest to have all
 640   // of them ready.
 641   for (int i = 0; i < (int) _max_task_num; ++i) {
 642     _tasks[i]->reset(_nextMarkBitMap);
 643   }
 644 
 645   // we need this to make sure that the flag is on during the evac
 646   // pause with initial mark piggy-backed
 647   set_concurrent_marking_in_progress();
 648 }
 649 
 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 651   assert(active_tasks <= _max_task_num, "we should not have more");
 652 
 653   _active_tasks = active_tasks;
 654   // Need to update the three data structures below according to the
 655   // number of active threads for this phase.
 656   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 657   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659 
 660   _concurrent = concurrent;
 661   // We propagate this to all tasks, not just the active ones.
 662   for (int i = 0; i < (int) _max_task_num; ++i)
 663     _tasks[i]->set_concurrent(concurrent);
 664 
 665   if (concurrent) {
 666     set_concurrent_marking_in_progress();
 667   } else {
 668     // We currently assume that the concurrent flag has been set to
 669     // false before we start remark. At this point we should also be
 670     // in a STW phase.
 671     assert(!concurrent_marking_in_progress(), "invariant");
 672     assert(_finger == _heap_end, "only way to get here");
 673     update_g1_committed(true);
 674   }
 675 }
 676 
 677 void ConcurrentMark::set_non_marking_state() {
 678   // We set the global marking state to some default values when we're
 679   // not doing marking.
 680   clear_marking_state();
 681   _active_tasks = 0;
 682   clear_concurrent_marking_in_progress();
 683 }
 684 
 685 ConcurrentMark::~ConcurrentMark() {
 686   // The ConcurrentMark instance is never freed.
 687   ShouldNotReachHere();
 688 }
 689 
 690 void ConcurrentMark::clearNextBitmap() {
 691   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 692   G1CollectorPolicy* g1p = g1h->g1_policy();
 693 
 694   // Make sure that the concurrent mark thread looks to still be in
 695   // the current cycle.
 696   guarantee(cmThread()->during_cycle(), "invariant");
 697 
 698   // We are finishing up the current cycle by clearing the next
 699   // marking bitmap and getting it ready for the next cycle. During
 700   // this time no other cycle can start. So, let's make sure that this
 701   // is the case.
 702   guarantee(!g1h->mark_in_progress(), "invariant");
 703 
 704   // clear the mark bitmap (no grey objects to start with).
 705   // We need to do this in chunks and offer to yield in between
 706   // each chunk.
 707   HeapWord* start  = _nextMarkBitMap->startWord();
 708   HeapWord* end    = _nextMarkBitMap->endWord();
 709   HeapWord* cur    = start;
 710   size_t chunkSize = M;
 711   while (cur < end) {
 712     HeapWord* next = cur + chunkSize;
 713     if (next > end) {
 714       next = end;
 715     }
 716     MemRegion mr(cur,next);
 717     _nextMarkBitMap->clearRange(mr);
 718     cur = next;
 719     do_yield_check();
 720 
 721     // Repeat the asserts from above. We'll do them as asserts here to
 722     // minimize their overhead on the product. However, we'll have
 723     // them as guarantees at the beginning / end of the bitmap
 724     // clearing to get some checking in the product.
 725     assert(cmThread()->during_cycle(), "invariant");
 726     assert(!g1h->mark_in_progress(), "invariant");
 727   }
 728 
 729   // Clear the liveness counting data
 730   clear_all_count_data();
 731 
 732   // Repeat the asserts from above.
 733   guarantee(cmThread()->during_cycle(), "invariant");
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 }
 736 
 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 738 public:
 739   bool doHeapRegion(HeapRegion* r) {
 740     if (!r->continuesHumongous()) {
 741       r->note_start_of_marking();
 742     }
 743     return false;
 744   }
 745 };
 746 
 747 void ConcurrentMark::checkpointRootsInitialPre() {
 748   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749   G1CollectorPolicy* g1p = g1h->g1_policy();
 750 
 751   _has_aborted = false;
 752 
 753 #ifndef PRODUCT
 754   if (G1PrintReachableAtInitialMark) {
 755     print_reachable("at-cycle-start",
 756                     VerifyOption_G1UsePrevMarking, true /* all */);
 757   }
 758 #endif
 759 
 760   // Initialise marking structures. This has to be done in a STW phase.
 761   reset();
 762 
 763   // For each region note start of marking.
 764   NoteStartOfMarkHRClosure startcl;
 765   g1h->heap_region_iterate(&startcl);
 766 }
 767 
 768 
 769 void ConcurrentMark::checkpointRootsInitialPost() {
 770   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771 
 772   // If we force an overflow during remark, the remark operation will
 773   // actually abort and we'll restart concurrent marking. If we always
 774   // force an oveflow during remark we'll never actually complete the
 775   // marking phase. So, we initilize this here, at the start of the
 776   // cycle, so that at the remaining overflow number will decrease at
 777   // every remark and we'll eventually not need to cause one.
 778   force_overflow_stw()->init();
 779 
 780   // Start Concurrent Marking weak-reference discovery.
 781   ReferenceProcessor* rp = g1h->ref_processor_cm();
 782   // enable ("weak") refs discovery
 783   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 784   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 785 
 786   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 787   // This is the start of  the marking cycle, we're expected all
 788   // threads to have SATB queues with active set to false.
 789   satb_mq_set.set_active_all_threads(true, /* new active value */
 790                                      false /* expected_active */);
 791 
 792   _root_regions.prepare_for_scan();
 793 
 794   // update_g1_committed() will be called at the end of an evac pause
 795   // when marking is on. So, it's also called at the end of the
 796   // initial-mark pause to update the heap end, if the heap expands
 797   // during it. No need to call it here.
 798 }
 799 
 800 /*
 801  * Notice that in the next two methods, we actually leave the STS
 802  * during the barrier sync and join it immediately afterwards. If we
 803  * do not do this, the following deadlock can occur: one thread could
 804  * be in the barrier sync code, waiting for the other thread to also
 805  * sync up, whereas another one could be trying to yield, while also
 806  * waiting for the other threads to sync up too.
 807  *
 808  * Note, however, that this code is also used during remark and in
 809  * this case we should not attempt to leave / enter the STS, otherwise
 810  * we'll either hit an asseert (debug / fastdebug) or deadlock
 811  * (product). So we should only leave / enter the STS if we are
 812  * operating concurrently.
 813  *
 814  * Because the thread that does the sync barrier has left the STS, it
 815  * is possible to be suspended for a Full GC or an evacuation pause
 816  * could occur. This is actually safe, since the entering the sync
 817  * barrier is one of the last things do_marking_step() does, and it
 818  * doesn't manipulate any data structures afterwards.
 819  */
 820 
 821 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 822   if (verbose_low()) {
 823     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 824   }
 825 
 826   if (concurrent()) {
 827     ConcurrentGCThread::stsLeave();
 828   }
 829   _first_overflow_barrier_sync.enter();
 830   if (concurrent()) {
 831     ConcurrentGCThread::stsJoin();
 832   }
 833   // at this point everyone should have synced up and not be doing any
 834   // more work
 835 
 836   if (verbose_low()) {
 837     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 838   }
 839 
 840   // let task 0 do this
 841   if (task_num == 0) {
 842     // task 0 is responsible for clearing the global data structures
 843     // We should be here because of an overflow. During STW we should
 844     // not clear the overflow flag since we rely on it being true when
 845     // we exit this method to abort the pause and restart concurent
 846     // marking.
 847     clear_marking_state(concurrent() /* clear_overflow */);
 848     force_overflow()->update();
 849 
 850     if (G1Log::fine()) {
 851       gclog_or_tty->date_stamp(PrintGCDateStamps);
 852       gclog_or_tty->stamp(PrintGCTimeStamps);
 853       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 854     }
 855   }
 856 
 857   // after this, each task should reset its own data structures then
 858   // then go into the second barrier
 859 }
 860 
 861 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 862   if (verbose_low()) {
 863     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 864   }
 865 
 866   if (concurrent()) {
 867     ConcurrentGCThread::stsLeave();
 868   }
 869   _second_overflow_barrier_sync.enter();
 870   if (concurrent()) {
 871     ConcurrentGCThread::stsJoin();
 872   }
 873   // at this point everything should be re-initialised and ready to go
 874 
 875   if (verbose_low()) {
 876     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 877   }
 878 }
 879 
 880 #ifndef PRODUCT
 881 void ForceOverflowSettings::init() {
 882   _num_remaining = G1ConcMarkForceOverflow;
 883   _force = false;
 884   update();
 885 }
 886 
 887 void ForceOverflowSettings::update() {
 888   if (_num_remaining > 0) {
 889     _num_remaining -= 1;
 890     _force = true;
 891   } else {
 892     _force = false;
 893   }
 894 }
 895 
 896 bool ForceOverflowSettings::should_force() {
 897   if (_force) {
 898     _force = false;
 899     return true;
 900   } else {
 901     return false;
 902   }
 903 }
 904 #endif // !PRODUCT
 905 
 906 class CMConcurrentMarkingTask: public AbstractGangTask {
 907 private:
 908   ConcurrentMark*       _cm;
 909   ConcurrentMarkThread* _cmt;
 910 
 911 public:
 912   void work(uint worker_id) {
 913     assert(Thread::current()->is_ConcurrentGC_thread(),
 914            "this should only be done by a conc GC thread");
 915     ResourceMark rm;
 916 
 917     double start_vtime = os::elapsedVTime();
 918 
 919     ConcurrentGCThread::stsJoin();
 920 
 921     assert(worker_id < _cm->active_tasks(), "invariant");
 922     CMTask* the_task = _cm->task(worker_id);
 923     the_task->record_start_time();
 924     if (!_cm->has_aborted()) {
 925       do {
 926         double start_vtime_sec = os::elapsedVTime();
 927         double start_time_sec = os::elapsedTime();
 928         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 929 
 930         the_task->do_marking_step(mark_step_duration_ms,
 931                                   true /* do_stealing    */,
 932                                   true /* do_termination */);
 933 
 934         double end_time_sec = os::elapsedTime();
 935         double end_vtime_sec = os::elapsedVTime();
 936         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 937         double elapsed_time_sec = end_time_sec - start_time_sec;
 938         _cm->clear_has_overflown();
 939 
 940         bool ret = _cm->do_yield_check(worker_id);
 941 
 942         jlong sleep_time_ms;
 943         if (!_cm->has_aborted() && the_task->has_aborted()) {
 944           sleep_time_ms =
 945             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 946           ConcurrentGCThread::stsLeave();
 947           os::sleep(Thread::current(), sleep_time_ms, false);
 948           ConcurrentGCThread::stsJoin();
 949         }
 950         double end_time2_sec = os::elapsedTime();
 951         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 952 
 953 #if 0
 954           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 955                                  "overhead %1.4lf",
 956                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 957                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 958           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 959                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 960 #endif
 961       } while (!_cm->has_aborted() && the_task->has_aborted());
 962     }
 963     the_task->record_end_time();
 964     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 965 
 966     ConcurrentGCThread::stsLeave();
 967 
 968     double end_vtime = os::elapsedVTime();
 969     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 970   }
 971 
 972   CMConcurrentMarkingTask(ConcurrentMark* cm,
 973                           ConcurrentMarkThread* cmt) :
 974       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 975 
 976   ~CMConcurrentMarkingTask() { }
 977 };
 978 
 979 // Calculates the number of active workers for a concurrent
 980 // phase.
 981 uint ConcurrentMark::calc_parallel_marking_threads() {
 982   if (G1CollectedHeap::use_parallel_gc_threads()) {
 983     uint n_conc_workers = 0;
 984     if (!UseDynamicNumberOfGCThreads ||
 985         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 986          !ForceDynamicNumberOfGCThreads)) {
 987       n_conc_workers = max_parallel_marking_threads();
 988     } else {
 989       n_conc_workers =
 990         AdaptiveSizePolicy::calc_default_active_workers(
 991                                      max_parallel_marking_threads(),
 992                                      1, /* Minimum workers */
 993                                      parallel_marking_threads(),
 994                                      Threads::number_of_non_daemon_threads());
 995       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 996       // that scaling has already gone into "_max_parallel_marking_threads".
 997     }
 998     assert(n_conc_workers > 0, "Always need at least 1");
 999     return n_conc_workers;
1000   }
1001   // If we are not running with any parallel GC threads we will not
1002   // have spawned any marking threads either. Hence the number of
1003   // concurrent workers should be 0.
1004   return 0;
1005 }
1006 
1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008   // Currently, only survivors can be root regions.
1009   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 
1012   const uintx interval = PrefetchScanIntervalInBytes;
1013   HeapWord* curr = hr->bottom();
1014   const HeapWord* end = hr->top();
1015   while (curr < end) {
1016     Prefetch::read(curr, interval);
1017     oop obj = oop(curr);
1018     int size = obj->oop_iterate(&cl);
1019     assert(size == obj->size(), "sanity");
1020     curr += size;
1021   }
1022 }
1023 
1024 class CMRootRegionScanTask : public AbstractGangTask {
1025 private:
1026   ConcurrentMark* _cm;
1027 
1028 public:
1029   CMRootRegionScanTask(ConcurrentMark* cm) :
1030     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 
1032   void work(uint worker_id) {
1033     assert(Thread::current()->is_ConcurrentGC_thread(),
1034            "this should only be done by a conc GC thread");
1035 
1036     CMRootRegions* root_regions = _cm->root_regions();
1037     HeapRegion* hr = root_regions->claim_next();
1038     while (hr != NULL) {
1039       _cm->scanRootRegion(hr, worker_id);
1040       hr = root_regions->claim_next();
1041     }
1042   }
1043 };
1044 
1045 void ConcurrentMark::scanRootRegions() {
1046   // scan_in_progress() will have been set to true only if there was
1047   // at least one root region to scan. So, if it's false, we
1048   // should not attempt to do any further work.
1049   if (root_regions()->scan_in_progress()) {
1050     _parallel_marking_threads = calc_parallel_marking_threads();
1051     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052            "Maximum number of marking threads exceeded");
1053     uint active_workers = MAX2(1U, parallel_marking_threads());
1054 
1055     CMRootRegionScanTask task(this);
1056     if (parallel_marking_threads() > 0) {
1057       _parallel_workers->set_active_workers((int) active_workers);
1058       _parallel_workers->run_task(&task);
1059     } else {
1060       task.work(0);
1061     }
1062 
1063     // It's possible that has_aborted() is true here without actually
1064     // aborting the survivor scan earlier. This is OK as it's
1065     // mainly used for sanity checking.
1066     root_regions()->scan_finished();
1067   }
1068 }
1069 
1070 void ConcurrentMark::markFromRoots() {
1071   // we might be tempted to assert that:
1072   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073   //        "inconsistent argument?");
1074   // However that wouldn't be right, because it's possible that
1075   // a safepoint is indeed in progress as a younger generation
1076   // stop-the-world GC happens even as we mark in this generation.
1077 
1078   _restart_for_overflow = false;
1079   force_overflow_conc()->init();
1080 
1081   // _g1h has _n_par_threads
1082   _parallel_marking_threads = calc_parallel_marking_threads();
1083   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084     "Maximum number of marking threads exceeded");
1085 
1086   uint active_workers = MAX2(1U, parallel_marking_threads());
1087 
1088   // Parallel task terminator is set in "set_phase()"
1089   set_phase(active_workers, true /* concurrent */);
1090 
1091   CMConcurrentMarkingTask markingTask(this, cmThread());
1092   if (parallel_marking_threads() > 0) {
1093     _parallel_workers->set_active_workers((int)active_workers);
1094     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095     // and the decisions on that MT processing is made elsewhere.
1096     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097     _parallel_workers->run_task(&markingTask);
1098   } else {
1099     markingTask.work(0);
1100   }
1101   print_stats();
1102 }
1103 
1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105   // world is stopped at this checkpoint
1106   assert(SafepointSynchronize::is_at_safepoint(),
1107          "world should be stopped");
1108 
1109   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 
1111   // If a full collection has happened, we shouldn't do this.
1112   if (has_aborted()) {
1113     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114     return;
1115   }
1116 
1117   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 
1119   if (VerifyDuringGC) {
1120     HandleMark hm;  // handle scope
1121     gclog_or_tty->print(" VerifyDuringGC:(before)");
1122     Universe::heap()->prepare_for_verify();
1123     Universe::verify(/* silent      */ false,
1124                      /* option      */ VerifyOption_G1UsePrevMarking);
1125   }
1126 
1127   G1CollectorPolicy* g1p = g1h->g1_policy();
1128   g1p->record_concurrent_mark_remark_start();
1129 
1130   double start = os::elapsedTime();
1131 
1132   checkpointRootsFinalWork();
1133 
1134   double mark_work_end = os::elapsedTime();
1135 
1136   weakRefsWork(clear_all_soft_refs);
1137 
1138   if (has_overflown()) {
1139     // Oops.  We overflowed.  Restart concurrent marking.
1140     _restart_for_overflow = true;
1141     // Clear the flag. We do not need it any more.
1142     clear_has_overflown();
1143     if (G1TraceMarkStackOverflow) {
1144       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145     }
1146   } else {
1147     // Aggregate the per-task counting data that we have accumulated
1148     // while marking.
1149     aggregate_count_data();
1150 
1151     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152     // We're done with marking.
1153     // This is the end of  the marking cycle, we're expected all
1154     // threads to have SATB queues with active set to true.
1155     satb_mq_set.set_active_all_threads(false, /* new active value */
1156                                        true /* expected_active */);
1157 
1158     if (VerifyDuringGC) {
1159       HandleMark hm;  // handle scope
1160       gclog_or_tty->print(" VerifyDuringGC:(after)");
1161       Universe::heap()->prepare_for_verify();
1162       Universe::verify(/* silent      */ false,
1163                        /* option      */ VerifyOption_G1UseNextMarking);
1164     }
1165     assert(!restart_for_overflow(), "sanity");
1166   }
1167 
1168   // Reset the marking state if marking completed
1169   if (!restart_for_overflow()) {
1170     set_non_marking_state();
1171   }
1172 
1173 #if VERIFY_OBJS_PROCESSED
1174   _scan_obj_cl.objs_processed = 0;
1175   ThreadLocalObjQueue::objs_enqueued = 0;
1176 #endif
1177 
1178   // Statistics
1179   double now = os::elapsedTime();
1180   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182   _remark_times.add((now - start) * 1000.0);
1183 
1184   g1p->record_concurrent_mark_remark_end();
1185 }
1186 
1187 // Base class of the closures that finalize and verify the
1188 // liveness counting data.
1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 protected:
1191   G1CollectedHeap* _g1h;
1192   ConcurrentMark* _cm;
1193   CardTableModRefBS* _ct_bs;
1194 
1195   BitMap* _region_bm;
1196   BitMap* _card_bm;
1197 
1198   // Takes a region that's not empty (i.e., it has at least one
1199   // live object in it and sets its corresponding bit on the region
1200   // bitmap to 1. If the region is "starts humongous" it will also set
1201   // to 1 the bits on the region bitmap that correspond to its
1202   // associated "continues humongous" regions.
1203   void set_bit_for_region(HeapRegion* hr) {
1204     assert(!hr->continuesHumongous(), "should have filtered those out");
1205 
1206     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1207     if (!hr->startsHumongous()) {
1208       // Normal (non-humongous) case: just set the bit.
1209       _region_bm->par_at_put(index, true);
1210     } else {
1211       // Starts humongous case: calculate how many regions are part of
1212       // this humongous region and then set the bit range.
1213       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1214       _region_bm->par_at_put_range(index, end_index, true);
1215     }
1216   }
1217 
1218 public:
1219   CMCountDataClosureBase(G1CollectedHeap* g1h,
1220                          BitMap* region_bm, BitMap* card_bm):
1221     _g1h(g1h), _cm(g1h->concurrent_mark()),
1222     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1223     _region_bm(region_bm), _card_bm(card_bm) { }
1224 };
1225 
1226 // Closure that calculates the # live objects per region. Used
1227 // for verification purposes during the cleanup pause.
1228 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1229   CMBitMapRO* _bm;
1230   size_t _region_marked_bytes;
1231 
1232 public:
1233   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1234                          BitMap* region_bm, BitMap* card_bm) :
1235     CMCountDataClosureBase(g1h, region_bm, card_bm),
1236     _bm(bm), _region_marked_bytes(0) { }
1237 
1238   bool doHeapRegion(HeapRegion* hr) {
1239 
1240     if (hr->continuesHumongous()) {
1241       // We will ignore these here and process them when their
1242       // associated "starts humongous" region is processed (see
1243       // set_bit_for_heap_region()). Note that we cannot rely on their
1244       // associated "starts humongous" region to have their bit set to
1245       // 1 since, due to the region chunking in the parallel region
1246       // iteration, a "continues humongous" region might be visited
1247       // before its associated "starts humongous".
1248       return false;
1249     }
1250 
1251     HeapWord* ntams = hr->next_top_at_mark_start();
1252     HeapWord* start = hr->bottom();
1253 
1254     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1255            err_msg("Preconditions not met - "
1256                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1257                    start, ntams, hr->end()));
1258 
1259     // Find the first marked object at or after "start".
1260     start = _bm->getNextMarkedWordAddress(start, ntams);
1261 
1262     size_t marked_bytes = 0;
1263 
1264     while (start < ntams) {
1265       oop obj = oop(start);
1266       int obj_sz = obj->size();
1267       HeapWord* obj_end = start + obj_sz;
1268 
1269       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1270       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1271       
1272       // Note: if we're looking at the last region in heap - obj_end
1273       // could be actually outside the heap and end_idx correspond
1274       // to a card that is also outside the heap.
1275       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1276         // end of object is not card aligned - increment to cover
1277         // all the cards spanned by the object
1278         end_idx += 1;
1279       }
1280 
1281       // Set the bits in the card BM for the cards spanned by this object.
1282       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1283 
1284       // Add the size of this object to the number of marked bytes.
1285       marked_bytes += (size_t)obj_sz * HeapWordSize;
1286 
1287       // Find the next marked object after this one.
1288       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1289     }
1290 
1291     // Mark the allocated-since-marking portion...
1292     HeapWord* top = hr->top();
1293     if (ntams < top) {
1294       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1295       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1296 
1297       // Note: if we're looking at the last region in heap - top
1298       // could actually be outside the heap and end_idx correspond
1299       // to a card that is also outside the heap.
1300       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1301         // end of object is not card aligned - increment to cover
1302         // all the cards spanned by the object
1303         end_idx += 1;
1304       }
1305       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1306 
1307       // This definitely means the region has live objects.
1308       set_bit_for_region(hr);
1309     }
1310 
1311     // Update the live region bitmap.
1312     if (marked_bytes > 0) {
1313       set_bit_for_region(hr);
1314     }
1315 
1316     // Set the marked bytes for the current region so that
1317     // it can be queried by a calling verificiation routine
1318     _region_marked_bytes = marked_bytes;
1319 
1320     return false;
1321   }
1322 
1323   size_t region_marked_bytes() const { return _region_marked_bytes; }
1324 };
1325 
1326 // Heap region closure used for verifying the counting data
1327 // that was accumulated concurrently and aggregated during
1328 // the remark pause. This closure is applied to the heap
1329 // regions during the STW cleanup pause.
1330 
1331 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1332   G1CollectedHeap* _g1h;
1333   ConcurrentMark* _cm;
1334   CalcLiveObjectsClosure _calc_cl;
1335   BitMap* _region_bm;   // Region BM to be verified
1336   BitMap* _card_bm;     // Card BM to be verified
1337   bool _verbose;        // verbose output?
1338 
1339   BitMap* _exp_region_bm; // Expected Region BM values
1340   BitMap* _exp_card_bm;   // Expected card BM values
1341 
1342   int _failures;
1343 
1344 public:
1345   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1346                                 BitMap* region_bm,
1347                                 BitMap* card_bm,
1348                                 BitMap* exp_region_bm,
1349                                 BitMap* exp_card_bm,
1350                                 bool verbose) :
1351     _g1h(g1h), _cm(g1h->concurrent_mark()), 
1352     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1353     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1354     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1355     _failures(0) { }
1356 
1357   int failures() const { return _failures; }
1358 
1359   bool doHeapRegion(HeapRegion* hr) {
1360     if (hr->continuesHumongous()) {
1361       // We will ignore these here and process them when their
1362       // associated "starts humongous" region is processed (see
1363       // set_bit_for_heap_region()). Note that we cannot rely on their
1364       // associated "starts humongous" region to have their bit set to
1365       // 1 since, due to the region chunking in the parallel region
1366       // iteration, a "continues humongous" region might be visited
1367       // before its associated "starts humongous".
1368       return false;
1369     }
1370 
1371     int failures = 0;
1372 
1373     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1374     // this region and set the corresponding bits in the expected region
1375     // and card bitmaps.
1376     bool res = _calc_cl.doHeapRegion(hr);
1377     assert(res == false, "should be continuing");
1378 
1379     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1380                     Mutex::_no_safepoint_check_flag);
1381 
1382     // Verify the marked bytes for this region.
1383     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1384     size_t act_marked_bytes = hr->next_marked_bytes();
1385 
1386     // We're not OK if expected marked bytes > actual marked bytes. It means
1387     // we have missed accounting some objects during the actual marking.
1388     if (exp_marked_bytes > act_marked_bytes) {
1389       if (_verbose) {
1390         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1391                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1392                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1393       }
1394       failures += 1;
1395     }
1396 
1397     // Verify the bit, for this region, in the actual and expected
1398     // (which was just calculated) region bit maps.
1399     // We're not OK if the bit in the calculated expected region
1400     // bitmap is set and the bit in the actual region bitmap is not.
1401     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1402 
1403     bool expected = _exp_region_bm->at(index);
1404     bool actual = _region_bm->at(index);
1405     if (expected && !actual) {
1406       if (_verbose) {
1407         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1408                                "expected: %s, actual: %s",
1409                                hr->hrs_index(),
1410                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1411       }
1412       failures += 1;
1413     }
1414 
1415     // Verify that the card bit maps for the cards spanned by the current
1416     // region match. We have an error if we have a set bit in the expected
1417     // bit map and the corresponding bit in the actual bitmap is not set.
1418 
1419     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1420     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1421 
1422     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1423       expected = _exp_card_bm->at(i);
1424       actual = _card_bm->at(i);
1425 
1426       if (expected && !actual) {
1427         if (_verbose) {
1428           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1429                                  "expected: %s, actual: %s",
1430                                  hr->hrs_index(), i,
1431                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1432         }
1433         failures += 1;
1434       }
1435     }
1436 
1437     if (failures > 0 && _verbose)  {
1438       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1439                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1440                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1441                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1442     }
1443 
1444     _failures += failures;
1445 
1446     // We could stop iteration over the heap when we
1447     // find the first violating region by returning true.
1448     return false;
1449   }
1450 };
1451 
1452 
1453 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1454 protected:
1455   G1CollectedHeap* _g1h;
1456   ConcurrentMark* _cm;
1457   BitMap* _actual_region_bm;
1458   BitMap* _actual_card_bm;
1459 
1460   uint    _n_workers;
1461 
1462   BitMap* _expected_region_bm;
1463   BitMap* _expected_card_bm;
1464 
1465   int  _failures;
1466   bool _verbose;
1467 
1468 public:
1469   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1470                             BitMap* region_bm, BitMap* card_bm,
1471                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1472     : AbstractGangTask("G1 verify final counting"),
1473       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1474       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1475       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1476       _failures(0), _verbose(false),
1477       _n_workers(0) {
1478     assert(VerifyDuringGC, "don't call this otherwise");
1479 
1480     // Use the value already set as the number of active threads
1481     // in the call to run_task().
1482     if (G1CollectedHeap::use_parallel_gc_threads()) {
1483       assert( _g1h->workers()->active_workers() > 0,
1484         "Should have been previously set");
1485       _n_workers = _g1h->workers()->active_workers();
1486     } else {
1487       _n_workers = 1;
1488     }
1489 
1490     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1491     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1492 
1493     _verbose = _cm->verbose_medium();
1494   }
1495 
1496   void work(uint worker_id) {
1497     assert(worker_id < _n_workers, "invariant");
1498 
1499     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1500                                             _actual_region_bm, _actual_card_bm,
1501                                             _expected_region_bm,
1502                                             _expected_card_bm,
1503                                             _verbose);
1504 
1505     if (G1CollectedHeap::use_parallel_gc_threads()) {
1506       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1507                                             worker_id,
1508                                             _n_workers,
1509                                             HeapRegion::VerifyCountClaimValue);
1510     } else {
1511       _g1h->heap_region_iterate(&verify_cl);
1512     }
1513 
1514     Atomic::add(verify_cl.failures(), &_failures);
1515   }
1516 
1517   int failures() const { return _failures; }
1518 };
1519 
1520 // Closure that finalizes the liveness counting data.
1521 // Used during the cleanup pause.
1522 // Sets the bits corresponding to the interval [NTAMS, top]
1523 // (which contains the implicitly live objects) in the
1524 // card liveness bitmap. Also sets the bit for each region,
1525 // containing live data, in the region liveness bitmap.
1526 
1527 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1528  public:
1529   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1530                               BitMap* region_bm,
1531                               BitMap* card_bm) :
1532     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1533 
1534   bool doHeapRegion(HeapRegion* hr) {
1535 
1536     if (hr->continuesHumongous()) {
1537       // We will ignore these here and process them when their
1538       // associated "starts humongous" region is processed (see
1539       // set_bit_for_heap_region()). Note that we cannot rely on their
1540       // associated "starts humongous" region to have their bit set to
1541       // 1 since, due to the region chunking in the parallel region
1542       // iteration, a "continues humongous" region might be visited
1543       // before its associated "starts humongous".
1544       return false;
1545     }
1546 
1547     HeapWord* ntams = hr->next_top_at_mark_start();
1548     HeapWord* top   = hr->top();
1549 
1550     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1551 
1552     // Mark the allocated-since-marking portion...
1553     if (ntams < top) {
1554       // This definitely means the region has live objects.
1555       set_bit_for_region(hr);
1556       
1557       // Now set the bits in the card bitmap for [ntams, top)
1558       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1559       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1560       
1561       // Note: if we're looking at the last region in heap - top
1562       // could actually be outside the heap and end_idx correspond
1563       // to a card that is also outside the heap.
1564       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1565         // end of object is not card aligned - increment to cover
1566         // all the cards spanned by the object
1567         end_idx += 1;
1568       }
1569 
1570       assert(end_idx <= _card_bm->size(),
1571              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1572                      end_idx, _card_bm->size()));
1573       assert(start_idx < _card_bm->size(),
1574              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1575                      start_idx, _card_bm->size()));
1576       
1577       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1578     }
1579 
1580     // Set the bit for the region if it contains live data
1581     if (hr->next_marked_bytes() > 0) {
1582       set_bit_for_region(hr);
1583     }
1584 
1585     return false;
1586   }
1587 };
1588 
1589 class G1ParFinalCountTask: public AbstractGangTask {
1590 protected:
1591   G1CollectedHeap* _g1h;
1592   ConcurrentMark* _cm;
1593   BitMap* _actual_region_bm;
1594   BitMap* _actual_card_bm;
1595 
1596   uint    _n_workers;
1597 
1598 public:
1599   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1600     : AbstractGangTask("G1 final counting"),
1601       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1602       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1603       _n_workers(0) {
1604     // Use the value already set as the number of active threads
1605     // in the call to run_task().
1606     if (G1CollectedHeap::use_parallel_gc_threads()) {
1607       assert( _g1h->workers()->active_workers() > 0,
1608         "Should have been previously set");
1609       _n_workers = _g1h->workers()->active_workers();
1610     } else {
1611       _n_workers = 1;
1612     }
1613   }
1614 
1615   void work(uint worker_id) {
1616     assert(worker_id < _n_workers, "invariant");
1617 
1618     FinalCountDataUpdateClosure final_update_cl(_g1h,
1619                                                 _actual_region_bm,
1620                                                 _actual_card_bm);
1621 
1622     if (G1CollectedHeap::use_parallel_gc_threads()) {
1623       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1624                                             worker_id,
1625                                             _n_workers,
1626                                             HeapRegion::FinalCountClaimValue);
1627     } else {
1628       _g1h->heap_region_iterate(&final_update_cl);
1629     }
1630   }
1631 };
1632 
1633 class G1ParNoteEndTask;
1634 
1635 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1636   G1CollectedHeap* _g1;
1637   int _worker_num;
1638   size_t _max_live_bytes;
1639   uint _regions_claimed;
1640   size_t _freed_bytes;
1641   FreeRegionList* _local_cleanup_list;
1642   OldRegionSet* _old_proxy_set;
1643   HumongousRegionSet* _humongous_proxy_set;
1644   HRRSCleanupTask* _hrrs_cleanup_task;
1645   double _claimed_region_time;
1646   double _max_region_time;
1647 
1648 public:
1649   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1650                              int worker_num,
1651                              FreeRegionList* local_cleanup_list,
1652                              OldRegionSet* old_proxy_set,
1653                              HumongousRegionSet* humongous_proxy_set,
1654                              HRRSCleanupTask* hrrs_cleanup_task) :
1655     _g1(g1), _worker_num(worker_num),
1656     _max_live_bytes(0), _regions_claimed(0),
1657     _freed_bytes(0),
1658     _claimed_region_time(0.0), _max_region_time(0.0),
1659     _local_cleanup_list(local_cleanup_list),
1660     _old_proxy_set(old_proxy_set),
1661     _humongous_proxy_set(humongous_proxy_set),
1662     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1663 
1664   size_t freed_bytes() { return _freed_bytes; }
1665 
1666   bool doHeapRegion(HeapRegion *hr) {
1667     if (hr->continuesHumongous()) {
1668       return false;
1669     }
1670     // We use a claim value of zero here because all regions
1671     // were claimed with value 1 in the FinalCount task.
1672     _g1->reset_gc_time_stamps(hr);
1673     double start = os::elapsedTime();
1674     _regions_claimed++;
1675     hr->note_end_of_marking();
1676     _max_live_bytes += hr->max_live_bytes();
1677     _g1->free_region_if_empty(hr,
1678                               &_freed_bytes,
1679                               _local_cleanup_list,
1680                               _old_proxy_set,
1681                               _humongous_proxy_set,
1682                               _hrrs_cleanup_task,
1683                               true /* par */);
1684     double region_time = (os::elapsedTime() - start);
1685     _claimed_region_time += region_time;
1686     if (region_time > _max_region_time) {
1687       _max_region_time = region_time;
1688     }
1689     return false;
1690   }
1691 
1692   size_t max_live_bytes() { return _max_live_bytes; }
1693   uint regions_claimed() { return _regions_claimed; }
1694   double claimed_region_time_sec() { return _claimed_region_time; }
1695   double max_region_time_sec() { return _max_region_time; }
1696 };
1697 
1698 class G1ParNoteEndTask: public AbstractGangTask {
1699   friend class G1NoteEndOfConcMarkClosure;
1700 
1701 protected:
1702   G1CollectedHeap* _g1h;
1703   size_t _max_live_bytes;
1704   size_t _freed_bytes;
1705   FreeRegionList* _cleanup_list;
1706 
1707 public:
1708   G1ParNoteEndTask(G1CollectedHeap* g1h,
1709                    FreeRegionList* cleanup_list) :
1710     AbstractGangTask("G1 note end"), _g1h(g1h),
1711     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1712 
1713   void work(uint worker_id) {
1714     double start = os::elapsedTime();
1715     FreeRegionList local_cleanup_list("Local Cleanup List");
1716     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1717     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1718     HRRSCleanupTask hrrs_cleanup_task;
1719     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1720                                            &old_proxy_set,
1721                                            &humongous_proxy_set,
1722                                            &hrrs_cleanup_task);
1723     if (G1CollectedHeap::use_parallel_gc_threads()) {
1724       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1725                                             _g1h->workers()->active_workers(),
1726                                             HeapRegion::NoteEndClaimValue);
1727     } else {
1728       _g1h->heap_region_iterate(&g1_note_end);
1729     }
1730     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1731 
1732     // Now update the lists
1733     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1734                                             NULL /* free_list */,
1735                                             &old_proxy_set,
1736                                             &humongous_proxy_set,
1737                                             true /* par */);
1738     {
1739       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1740       _max_live_bytes += g1_note_end.max_live_bytes();
1741       _freed_bytes += g1_note_end.freed_bytes();
1742 
1743       // If we iterate over the global cleanup list at the end of
1744       // cleanup to do this printing we will not guarantee to only
1745       // generate output for the newly-reclaimed regions (the list
1746       // might not be empty at the beginning of cleanup; we might
1747       // still be working on its previous contents). So we do the
1748       // printing here, before we append the new regions to the global
1749       // cleanup list.
1750 
1751       G1HRPrinter* hr_printer = _g1h->hr_printer();
1752       if (hr_printer->is_active()) {
1753         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1754         while (iter.more_available()) {
1755           HeapRegion* hr = iter.get_next();
1756           hr_printer->cleanup(hr);
1757         }
1758       }
1759 
1760       _cleanup_list->add_as_tail(&local_cleanup_list);
1761       assert(local_cleanup_list.is_empty(), "post-condition");
1762 
1763       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1764     }
1765   }
1766   size_t max_live_bytes() { return _max_live_bytes; }
1767   size_t freed_bytes() { return _freed_bytes; }
1768 };
1769 
1770 class G1ParScrubRemSetTask: public AbstractGangTask {
1771 protected:
1772   G1RemSet* _g1rs;
1773   BitMap* _region_bm;
1774   BitMap* _card_bm;
1775 public:
1776   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1777                        BitMap* region_bm, BitMap* card_bm) :
1778     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1779     _region_bm(region_bm), _card_bm(card_bm) { }
1780 
1781   void work(uint worker_id) {
1782     if (G1CollectedHeap::use_parallel_gc_threads()) {
1783       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1784                        HeapRegion::ScrubRemSetClaimValue);
1785     } else {
1786       _g1rs->scrub(_region_bm, _card_bm);
1787     }
1788   }
1789 
1790 };
1791 
1792 void ConcurrentMark::cleanup() {
1793   // world is stopped at this checkpoint
1794   assert(SafepointSynchronize::is_at_safepoint(),
1795          "world should be stopped");
1796   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1797 
1798   // If a full collection has happened, we shouldn't do this.
1799   if (has_aborted()) {
1800     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1801     return;
1802   }
1803 
1804   HRSPhaseSetter x(HRSPhaseCleanup);
1805   g1h->verify_region_sets_optional();
1806 
1807   if (VerifyDuringGC) {
1808     HandleMark hm;  // handle scope
1809     gclog_or_tty->print(" VerifyDuringGC:(before)");
1810     Universe::heap()->prepare_for_verify();
1811     Universe::verify(/* silent      */ false,
1812                      /* option      */ VerifyOption_G1UsePrevMarking);
1813   }
1814 
1815   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1816   g1p->record_concurrent_mark_cleanup_start();
1817 
1818   double start = os::elapsedTime();
1819 
1820   HeapRegionRemSet::reset_for_cleanup_tasks();
1821 
1822   uint n_workers;
1823 
1824   // Do counting once more with the world stopped for good measure.
1825   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1826 
1827   if (G1CollectedHeap::use_parallel_gc_threads()) {
1828    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1829            "sanity check");
1830 
1831     g1h->set_par_threads();
1832     n_workers = g1h->n_par_threads();
1833     assert(g1h->n_par_threads() == n_workers,
1834            "Should not have been reset");
1835     g1h->workers()->run_task(&g1_par_count_task);
1836     // Done with the parallel phase so reset to 0.
1837     g1h->set_par_threads(0);
1838 
1839     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1840            "sanity check");
1841   } else {
1842     n_workers = 1;
1843     g1_par_count_task.work(0);
1844   }
1845 
1846   if (VerifyDuringGC) {
1847     // Verify that the counting data accumulated during marking matches
1848     // that calculated by walking the marking bitmap.
1849 
1850     // Bitmaps to hold expected values
1851     BitMap expected_region_bm(_region_bm.size(), false);
1852     BitMap expected_card_bm(_card_bm.size(), false);
1853 
1854     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1855                                                  &_region_bm,
1856                                                  &_card_bm,
1857                                                  &expected_region_bm,
1858                                                  &expected_card_bm);
1859 
1860     if (G1CollectedHeap::use_parallel_gc_threads()) {
1861       g1h->set_par_threads((int)n_workers);
1862       g1h->workers()->run_task(&g1_par_verify_task);
1863       // Done with the parallel phase so reset to 0.
1864       g1h->set_par_threads(0);
1865 
1866       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1867              "sanity check");
1868     } else {
1869       g1_par_verify_task.work(0);
1870     }
1871 
1872     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1873   }
1874 
1875   size_t start_used_bytes = g1h->used();
1876   g1h->set_marking_complete();
1877 
1878   double count_end = os::elapsedTime();
1879   double this_final_counting_time = (count_end - start);
1880   _total_counting_time += this_final_counting_time;
1881 
1882   if (G1PrintRegionLivenessInfo) {
1883     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1884     _g1h->heap_region_iterate(&cl);
1885   }
1886 
1887   // Install newly created mark bitMap as "prev".
1888   swapMarkBitMaps();
1889 
1890   g1h->reset_gc_time_stamp();
1891 
1892   // Note end of marking in all heap regions.
1893   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1894   if (G1CollectedHeap::use_parallel_gc_threads()) {
1895     g1h->set_par_threads((int)n_workers);
1896     g1h->workers()->run_task(&g1_par_note_end_task);
1897     g1h->set_par_threads(0);
1898 
1899     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1900            "sanity check");
1901   } else {
1902     g1_par_note_end_task.work(0);
1903   }
1904   g1h->check_gc_time_stamps();
1905 
1906   if (!cleanup_list_is_empty()) {
1907     // The cleanup list is not empty, so we'll have to process it
1908     // concurrently. Notify anyone else that might be wanting free
1909     // regions that there will be more free regions coming soon.
1910     g1h->set_free_regions_coming();
1911   }
1912 
1913   // call below, since it affects the metric by which we sort the heap
1914   // regions.
1915   if (G1ScrubRemSets) {
1916     double rs_scrub_start = os::elapsedTime();
1917     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1918     if (G1CollectedHeap::use_parallel_gc_threads()) {
1919       g1h->set_par_threads((int)n_workers);
1920       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1921       g1h->set_par_threads(0);
1922 
1923       assert(g1h->check_heap_region_claim_values(
1924                                             HeapRegion::ScrubRemSetClaimValue),
1925              "sanity check");
1926     } else {
1927       g1_par_scrub_rs_task.work(0);
1928     }
1929 
1930     double rs_scrub_end = os::elapsedTime();
1931     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1932     _total_rs_scrub_time += this_rs_scrub_time;
1933   }
1934 
1935   // this will also free any regions totally full of garbage objects,
1936   // and sort the regions.
1937   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1938 
1939   // Statistics.
1940   double end = os::elapsedTime();
1941   _cleanup_times.add((end - start) * 1000.0);
1942 
1943   if (G1Log::fine()) {
1944     g1h->print_size_transition(gclog_or_tty,
1945                                start_used_bytes,
1946                                g1h->used(),
1947                                g1h->capacity());
1948   }
1949 
1950   // Clean up will have freed any regions completely full of garbage.
1951   // Update the soft reference policy with the new heap occupancy.
1952   Universe::update_heap_info_at_gc();
1953 
1954   // We need to make this be a "collection" so any collection pause that
1955   // races with it goes around and waits for completeCleanup to finish.
1956   g1h->increment_total_collections();
1957 
1958   // We reclaimed old regions so we should calculate the sizes to make
1959   // sure we update the old gen/space data.
1960   g1h->g1mm()->update_sizes();
1961 
1962   if (VerifyDuringGC) {
1963     HandleMark hm;  // handle scope
1964     gclog_or_tty->print(" VerifyDuringGC:(after)");
1965     Universe::heap()->prepare_for_verify();
1966     Universe::verify(/* silent      */ false,
1967                      /* option      */ VerifyOption_G1UsePrevMarking);
1968   }
1969 
1970   g1h->verify_region_sets_optional();
1971 }
1972 
1973 void ConcurrentMark::completeCleanup() {
1974   if (has_aborted()) return;
1975 
1976   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1977 
1978   _cleanup_list.verify_optional();
1979   FreeRegionList tmp_free_list("Tmp Free List");
1980 
1981   if (G1ConcRegionFreeingVerbose) {
1982     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1983                            "cleanup list has %u entries",
1984                            _cleanup_list.length());
1985   }
1986 
1987   // Noone else should be accessing the _cleanup_list at this point,
1988   // so it's not necessary to take any locks
1989   while (!_cleanup_list.is_empty()) {
1990     HeapRegion* hr = _cleanup_list.remove_head();
1991     assert(hr != NULL, "the list was not empty");
1992     hr->par_clear();
1993     tmp_free_list.add_as_tail(hr);
1994 
1995     // Instead of adding one region at a time to the secondary_free_list,
1996     // we accumulate them in the local list and move them a few at a
1997     // time. This also cuts down on the number of notify_all() calls
1998     // we do during this process. We'll also append the local list when
1999     // _cleanup_list is empty (which means we just removed the last
2000     // region from the _cleanup_list).
2001     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2002         _cleanup_list.is_empty()) {
2003       if (G1ConcRegionFreeingVerbose) {
2004         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2005                                "appending %u entries to the secondary_free_list, "
2006                                "cleanup list still has %u entries",
2007                                tmp_free_list.length(),
2008                                _cleanup_list.length());
2009       }
2010 
2011       {
2012         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2013         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2014         SecondaryFreeList_lock->notify_all();
2015       }
2016 
2017       if (G1StressConcRegionFreeing) {
2018         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2019           os::sleep(Thread::current(), (jlong) 1, false);
2020         }
2021       }
2022     }
2023   }
2024   assert(tmp_free_list.is_empty(), "post-condition");
2025 }
2026 
2027 // Support closures for reference procssing in G1
2028 
2029 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2030   HeapWord* addr = (HeapWord*)obj;
2031   return addr != NULL &&
2032          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2033 }
2034 
2035 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2036   G1CollectedHeap* _g1;
2037   ConcurrentMark*  _cm;
2038  public:
2039   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2040     _g1(g1), _cm(cm) {
2041     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2042   }
2043 
2044   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2045   virtual void do_oop(      oop* p) { do_oop_work(p); }
2046 
2047   template <class T> void do_oop_work(T* p) {
2048     oop obj = oopDesc::load_decode_heap_oop(p);
2049     HeapWord* addr = (HeapWord*)obj;
2050 
2051     if (_cm->verbose_high()) {
2052       gclog_or_tty->print_cr("\t[0] we're looking at location "
2053                              "*"PTR_FORMAT" = "PTR_FORMAT,
2054                              p, (void*) obj);
2055     }
2056 
2057     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2058       _cm->mark_and_count(obj);
2059       _cm->mark_stack_push(obj);
2060     }
2061   }
2062 };
2063 
2064 class G1CMDrainMarkingStackClosure: public VoidClosure {
2065   ConcurrentMark*               _cm;
2066   CMMarkStack*                  _markStack;
2067   G1CMKeepAliveClosure*         _oopClosure;
2068  public:
2069   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2070                                G1CMKeepAliveClosure* oopClosure) :
2071     _cm(cm),
2072     _markStack(markStack),
2073     _oopClosure(oopClosure) { }
2074 
2075   void do_void() {
2076     _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2077   }
2078 };
2079 
2080 // 'Keep Alive' closure used by parallel reference processing.
2081 // An instance of this closure is used in the parallel reference processing
2082 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2083 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2084 // placed on to discovered ref lists once so we can mark and push with no
2085 // need to check whether the object has already been marked. Using the
2086 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2087 // operating on the global mark stack. This means that an individual
2088 // worker would be doing lock-free pushes while it processes its own
2089 // discovered ref list followed by drain call. If the discovered ref lists
2090 // are unbalanced then this could cause interference with the other
2091 // workers. Using a CMTask (and its embedded local data structures)
2092 // avoids that potential interference.
2093 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2094   ConcurrentMark*  _cm;
2095   CMTask*          _task;
2096   int              _ref_counter_limit;
2097   int              _ref_counter;
2098  public:
2099   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2100     _cm(cm), _task(task),
2101     _ref_counter_limit(G1RefProcDrainInterval) {
2102     assert(_ref_counter_limit > 0, "sanity");
2103     _ref_counter = _ref_counter_limit;
2104   }
2105 
2106   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2107   virtual void do_oop(      oop* p) { do_oop_work(p); }
2108 
2109   template <class T> void do_oop_work(T* p) {
2110     if (!_cm->has_overflown()) {
2111       oop obj = oopDesc::load_decode_heap_oop(p);
2112       if (_cm->verbose_high()) {
2113         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2114                                "*"PTR_FORMAT" = "PTR_FORMAT,
2115                                _task->task_id(), p, (void*) obj);
2116       }
2117 
2118       _task->deal_with_reference(obj);
2119       _ref_counter--;
2120 
2121       if (_ref_counter == 0) {
2122         // We have dealt with _ref_counter_limit references, pushing them and objects
2123         // reachable from them on to the local stack (and possibly the global stack).
2124         // Call do_marking_step() to process these entries. We call the routine in a
2125         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2126         // with the entries that we've pushed as a result of the deal_with_reference
2127         // calls above) or we overflow.
2128         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2129         // while there may still be some work to do. (See the comment at the
2130         // beginning of CMTask::do_marking_step() for those conditions - one of which
2131         // is reaching the specified time target.) It is only when
2132         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2133         // that the marking has completed.
2134         do {
2135           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2136           _task->do_marking_step(mark_step_duration_ms,
2137                                  false /* do_stealing    */,
2138                                  false /* do_termination */);
2139         } while (_task->has_aborted() && !_cm->has_overflown());
2140         _ref_counter = _ref_counter_limit;
2141       }
2142     } else {
2143       if (_cm->verbose_high()) {
2144          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2145       }
2146     }
2147   }
2148 };
2149 
2150 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2151   ConcurrentMark* _cm;
2152   CMTask* _task;
2153  public:
2154   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2155     _cm(cm), _task(task) { }
2156 
2157   void do_void() {
2158     do {
2159       if (_cm->verbose_high()) {
2160         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2161                                _task->task_id());
2162       }
2163 
2164       // We call CMTask::do_marking_step() to completely drain the local and
2165       // global marking stacks. The routine is called in a loop, which we'll
2166       // exit if there's nothing more to do (i.e. we'completely drained the
2167       // entries that were pushed as a result of applying the
2168       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2169       // lists above) or we overflow the global marking stack.
2170       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2171       // while there may still be some work to do. (See the comment at the
2172       // beginning of CMTask::do_marking_step() for those conditions - one of which
2173       // is reaching the specified time target.) It is only when
2174       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2175       // that the marking has completed.
2176 
2177       _task->do_marking_step(1000000000.0 /* something very large */,
2178                              true /* do_stealing    */,
2179                              true /* do_termination */);
2180     } while (_task->has_aborted() && !_cm->has_overflown());
2181   }
2182 };
2183 
2184 // Implementation of AbstractRefProcTaskExecutor for parallel
2185 // reference processing at the end of G1 concurrent marking
2186 
2187 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2188 private:
2189   G1CollectedHeap* _g1h;
2190   ConcurrentMark*  _cm;
2191   WorkGang*        _workers;
2192   int              _active_workers;
2193 
2194 public:
2195   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2196                         ConcurrentMark* cm,
2197                         WorkGang* workers,
2198                         int n_workers) :
2199     _g1h(g1h), _cm(cm),
2200     _workers(workers), _active_workers(n_workers) { }
2201 
2202   // Executes the given task using concurrent marking worker threads.
2203   virtual void execute(ProcessTask& task);
2204   virtual void execute(EnqueueTask& task);
2205 };
2206 
2207 class G1CMRefProcTaskProxy: public AbstractGangTask {
2208   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2209   ProcessTask&     _proc_task;
2210   G1CollectedHeap* _g1h;
2211   ConcurrentMark*  _cm;
2212 
2213 public:
2214   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2215                      G1CollectedHeap* g1h,
2216                      ConcurrentMark* cm) :
2217     AbstractGangTask("Process reference objects in parallel"),
2218     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2219 
2220   virtual void work(uint worker_id) {
2221     CMTask* marking_task = _cm->task(worker_id);
2222     G1CMIsAliveClosure g1_is_alive(_g1h);
2223     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2224     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2225 
2226     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2227   }
2228 };
2229 
2230 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2231   assert(_workers != NULL, "Need parallel worker threads.");
2232 
2233   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2234 
2235   // We need to reset the phase for each task execution so that
2236   // the termination protocol of CMTask::do_marking_step works.
2237   _cm->set_phase(_active_workers, false /* concurrent */);
2238   _g1h->set_par_threads(_active_workers);
2239   _workers->run_task(&proc_task_proxy);
2240   _g1h->set_par_threads(0);
2241 }
2242 
2243 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2244   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2245   EnqueueTask& _enq_task;
2246 
2247 public:
2248   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2249     AbstractGangTask("Enqueue reference objects in parallel"),
2250     _enq_task(enq_task) { }
2251 
2252   virtual void work(uint worker_id) {
2253     _enq_task.work(worker_id);
2254   }
2255 };
2256 
2257 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2258   assert(_workers != NULL, "Need parallel worker threads.");
2259 
2260   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2261 
2262   _g1h->set_par_threads(_active_workers);
2263   _workers->run_task(&enq_task_proxy);
2264   _g1h->set_par_threads(0);
2265 }
2266 
2267 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2268   ResourceMark rm;
2269   HandleMark   hm;
2270 
2271   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2272 
2273   // Is alive closure.
2274   G1CMIsAliveClosure g1_is_alive(g1h);
2275 
2276   // Inner scope to exclude the cleaning of the string and symbol
2277   // tables from the displayed time.
2278   {
2279     if (G1Log::finer()) {
2280       gclog_or_tty->put(' ');
2281     }
2282     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2283 
2284     ReferenceProcessor* rp = g1h->ref_processor_cm();
2285 
2286     // See the comment in G1CollectedHeap::ref_processing_init()
2287     // about how reference processing currently works in G1.
2288 
2289     // Process weak references.
2290     rp->setup_policy(clear_all_soft_refs);
2291     assert(_markStack.isEmpty(), "mark stack should be empty");
2292 
2293     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2294     G1CMDrainMarkingStackClosure
2295       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2296 
2297     // We use the work gang from the G1CollectedHeap and we utilize all
2298     // the worker threads.
2299     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2300     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2301 
2302     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2303                                               g1h->workers(), active_workers);
2304 
2305     if (rp->processing_is_mt()) {
2306       // Set the degree of MT here.  If the discovery is done MT, there
2307       // may have been a different number of threads doing the discovery
2308       // and a different number of discovered lists may have Ref objects.
2309       // That is OK as long as the Reference lists are balanced (see
2310       // balance_all_queues() and balance_queues()).
2311       rp->set_active_mt_degree(active_workers);
2312 
2313       rp->process_discovered_references(&g1_is_alive,
2314                                       &g1_keep_alive,
2315                                       &g1_drain_mark_stack,
2316                                       &par_task_executor);
2317 
2318       // The work routines of the parallel keep_alive and drain_marking_stack
2319       // will set the has_overflown flag if we overflow the global marking
2320       // stack.
2321     } else {
2322       rp->process_discovered_references(&g1_is_alive,
2323                                         &g1_keep_alive,
2324                                         &g1_drain_mark_stack,
2325                                         NULL);
2326     }
2327 
2328     assert(_markStack.overflow() || _markStack.isEmpty(),
2329             "mark stack should be empty (unless it overflowed)");
2330     if (_markStack.overflow()) {
2331       // Should have been done already when we tried to push an
2332       // entry on to the global mark stack. But let's do it again.
2333       set_has_overflown();
2334     }
2335 
2336     if (rp->processing_is_mt()) {
2337       assert(rp->num_q() == active_workers, "why not");
2338       rp->enqueue_discovered_references(&par_task_executor);
2339     } else {
2340       rp->enqueue_discovered_references();
2341     }
2342 
2343     rp->verify_no_references_recorded();
2344     assert(!rp->discovery_enabled(), "Post condition");
2345   }
2346 
2347   // Now clean up stale oops in StringTable
2348   StringTable::unlink(&g1_is_alive);
2349   // Clean up unreferenced symbols in symbol table.
2350   SymbolTable::unlink();
2351 }
2352 
2353 void ConcurrentMark::swapMarkBitMaps() {
2354   CMBitMapRO* temp = _prevMarkBitMap;
2355   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2356   _nextMarkBitMap  = (CMBitMap*)  temp;
2357 }
2358 
2359 class CMRemarkTask: public AbstractGangTask {
2360 private:
2361   ConcurrentMark *_cm;
2362 
2363 public:
2364   void work(uint worker_id) {
2365     // Since all available tasks are actually started, we should
2366     // only proceed if we're supposed to be actived.
2367     if (worker_id < _cm->active_tasks()) {
2368       CMTask* task = _cm->task(worker_id);
2369       task->record_start_time();
2370       do {
2371         task->do_marking_step(1000000000.0 /* something very large */,
2372                               true /* do_stealing    */,
2373                               true /* do_termination */);
2374       } while (task->has_aborted() && !_cm->has_overflown());
2375       // If we overflow, then we do not want to restart. We instead
2376       // want to abort remark and do concurrent marking again.
2377       task->record_end_time();
2378     }
2379   }
2380 
2381   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2382     AbstractGangTask("Par Remark"), _cm(cm) {
2383     _cm->terminator()->reset_for_reuse(active_workers);
2384   }
2385 };
2386 
2387 void ConcurrentMark::checkpointRootsFinalWork() {
2388   ResourceMark rm;
2389   HandleMark   hm;
2390   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2391 
2392   g1h->ensure_parsability(false);
2393 
2394   if (G1CollectedHeap::use_parallel_gc_threads()) {
2395     G1CollectedHeap::StrongRootsScope srs(g1h);
2396     // this is remark, so we'll use up all active threads
2397     uint active_workers = g1h->workers()->active_workers();
2398     if (active_workers == 0) {
2399       assert(active_workers > 0, "Should have been set earlier");
2400       active_workers = (uint) ParallelGCThreads;
2401       g1h->workers()->set_active_workers(active_workers);
2402     }
2403     set_phase(active_workers, false /* concurrent */);
2404     // Leave _parallel_marking_threads at it's
2405     // value originally calculated in the ConcurrentMark
2406     // constructor and pass values of the active workers
2407     // through the gang in the task.
2408 
2409     CMRemarkTask remarkTask(this, active_workers);
2410     g1h->set_par_threads(active_workers);
2411     g1h->workers()->run_task(&remarkTask);
2412     g1h->set_par_threads(0);
2413   } else {
2414     G1CollectedHeap::StrongRootsScope srs(g1h);
2415     // this is remark, so we'll use up all available threads
2416     uint active_workers = 1;
2417     set_phase(active_workers, false /* concurrent */);
2418 
2419     CMRemarkTask remarkTask(this, active_workers);
2420     // We will start all available threads, even if we decide that the
2421     // active_workers will be fewer. The extra ones will just bail out
2422     // immediately.
2423     remarkTask.work(0);
2424   }
2425   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2426   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2427 
2428   print_stats();
2429 
2430 #if VERIFY_OBJS_PROCESSED
2431   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2432     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2433                            _scan_obj_cl.objs_processed,
2434                            ThreadLocalObjQueue::objs_enqueued);
2435     guarantee(_scan_obj_cl.objs_processed ==
2436               ThreadLocalObjQueue::objs_enqueued,
2437               "Different number of objs processed and enqueued.");
2438   }
2439 #endif
2440 }
2441 
2442 #ifndef PRODUCT
2443 
2444 class PrintReachableOopClosure: public OopClosure {
2445 private:
2446   G1CollectedHeap* _g1h;
2447   outputStream*    _out;
2448   VerifyOption     _vo;
2449   bool             _all;
2450 
2451 public:
2452   PrintReachableOopClosure(outputStream* out,
2453                            VerifyOption  vo,
2454                            bool          all) :
2455     _g1h(G1CollectedHeap::heap()),
2456     _out(out), _vo(vo), _all(all) { }
2457 
2458   void do_oop(narrowOop* p) { do_oop_work(p); }
2459   void do_oop(      oop* p) { do_oop_work(p); }
2460 
2461   template <class T> void do_oop_work(T* p) {
2462     oop         obj = oopDesc::load_decode_heap_oop(p);
2463     const char* str = NULL;
2464     const char* str2 = "";
2465 
2466     if (obj == NULL) {
2467       str = "";
2468     } else if (!_g1h->is_in_g1_reserved(obj)) {
2469       str = " O";
2470     } else {
2471       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2472       guarantee(hr != NULL, "invariant");
2473       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2474       bool marked = _g1h->is_marked(obj, _vo);
2475 
2476       if (over_tams) {
2477         str = " >";
2478         if (marked) {
2479           str2 = " AND MARKED";
2480         }
2481       } else if (marked) {
2482         str = " M";
2483       } else {
2484         str = " NOT";
2485       }
2486     }
2487 
2488     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2489                    p, (void*) obj, str, str2);
2490   }
2491 };
2492 
2493 class PrintReachableObjectClosure : public ObjectClosure {
2494 private:
2495   G1CollectedHeap* _g1h;
2496   outputStream*    _out;
2497   VerifyOption     _vo;
2498   bool             _all;
2499   HeapRegion*      _hr;
2500 
2501 public:
2502   PrintReachableObjectClosure(outputStream* out,
2503                               VerifyOption  vo,
2504                               bool          all,
2505                               HeapRegion*   hr) :
2506     _g1h(G1CollectedHeap::heap()),
2507     _out(out), _vo(vo), _all(all), _hr(hr) { }
2508 
2509   void do_object(oop o) {
2510     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2511     bool marked = _g1h->is_marked(o, _vo);
2512     bool print_it = _all || over_tams || marked;
2513 
2514     if (print_it) {
2515       _out->print_cr(" "PTR_FORMAT"%s",
2516                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2517       PrintReachableOopClosure oopCl(_out, _vo, _all);
2518       o->oop_iterate_no_header(&oopCl);
2519     }
2520   }
2521 };
2522 
2523 class PrintReachableRegionClosure : public HeapRegionClosure {
2524 private:
2525   G1CollectedHeap* _g1h;
2526   outputStream*    _out;
2527   VerifyOption     _vo;
2528   bool             _all;
2529 
2530 public:
2531   bool doHeapRegion(HeapRegion* hr) {
2532     HeapWord* b = hr->bottom();
2533     HeapWord* e = hr->end();
2534     HeapWord* t = hr->top();
2535     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2536     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2537                    "TAMS: "PTR_FORMAT, b, e, t, p);
2538     _out->cr();
2539 
2540     HeapWord* from = b;
2541     HeapWord* to   = t;
2542 
2543     if (to > from) {
2544       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2545       _out->cr();
2546       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2547       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2548       _out->cr();
2549     }
2550 
2551     return false;
2552   }
2553 
2554   PrintReachableRegionClosure(outputStream* out,
2555                               VerifyOption  vo,
2556                               bool          all) :
2557     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2558 };
2559 
2560 void ConcurrentMark::print_reachable(const char* str,
2561                                      VerifyOption vo,
2562                                      bool all) {
2563   gclog_or_tty->cr();
2564   gclog_or_tty->print_cr("== Doing heap dump... ");
2565 
2566   if (G1PrintReachableBaseFile == NULL) {
2567     gclog_or_tty->print_cr("  #### error: no base file defined");
2568     return;
2569   }
2570 
2571   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2572       (JVM_MAXPATHLEN - 1)) {
2573     gclog_or_tty->print_cr("  #### error: file name too long");
2574     return;
2575   }
2576 
2577   char file_name[JVM_MAXPATHLEN];
2578   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2579   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2580 
2581   fileStream fout(file_name);
2582   if (!fout.is_open()) {
2583     gclog_or_tty->print_cr("  #### error: could not open file");
2584     return;
2585   }
2586 
2587   outputStream* out = &fout;
2588   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2589   out->cr();
2590 
2591   out->print_cr("--- ITERATING OVER REGIONS");
2592   out->cr();
2593   PrintReachableRegionClosure rcl(out, vo, all);
2594   _g1h->heap_region_iterate(&rcl);
2595   out->cr();
2596 
2597   gclog_or_tty->print_cr("  done");
2598   gclog_or_tty->flush();
2599 }
2600 
2601 #endif // PRODUCT
2602 
2603 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2604   // Note we are overriding the read-only view of the prev map here, via
2605   // the cast.
2606   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2607 }
2608 
2609 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2610   _nextMarkBitMap->clearRange(mr);
2611 }
2612 
2613 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2614   clearRangePrevBitmap(mr);
2615   clearRangeNextBitmap(mr);
2616 }
2617 
2618 HeapRegion*
2619 ConcurrentMark::claim_region(int task_num) {
2620   // "checkpoint" the finger
2621   HeapWord* finger = _finger;
2622 
2623   // _heap_end will not change underneath our feet; it only changes at
2624   // yield points.
2625   while (finger < _heap_end) {
2626     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2627 
2628     // Note on how this code handles humongous regions. In the
2629     // normal case the finger will reach the start of a "starts
2630     // humongous" (SH) region. Its end will either be the end of the
2631     // last "continues humongous" (CH) region in the sequence, or the
2632     // standard end of the SH region (if the SH is the only region in
2633     // the sequence). That way claim_region() will skip over the CH
2634     // regions. However, there is a subtle race between a CM thread
2635     // executing this method and a mutator thread doing a humongous
2636     // object allocation. The two are not mutually exclusive as the CM
2637     // thread does not need to hold the Heap_lock when it gets
2638     // here. So there is a chance that claim_region() will come across
2639     // a free region that's in the progress of becoming a SH or a CH
2640     // region. In the former case, it will either
2641     //   a) Miss the update to the region's end, in which case it will
2642     //      visit every subsequent CH region, will find their bitmaps
2643     //      empty, and do nothing, or
2644     //   b) Will observe the update of the region's end (in which case
2645     //      it will skip the subsequent CH regions).
2646     // If it comes across a region that suddenly becomes CH, the
2647     // scenario will be similar to b). So, the race between
2648     // claim_region() and a humongous object allocation might force us
2649     // to do a bit of unnecessary work (due to some unnecessary bitmap
2650     // iterations) but it should not introduce and correctness issues.
2651     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2652     HeapWord*   bottom        = curr_region->bottom();
2653     HeapWord*   end           = curr_region->end();
2654     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2655 
2656     if (verbose_low()) {
2657       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2658                              "["PTR_FORMAT", "PTR_FORMAT"), "
2659                              "limit = "PTR_FORMAT,
2660                              task_num, curr_region, bottom, end, limit);
2661     }
2662 
2663     // Is the gap between reading the finger and doing the CAS too long?
2664     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2665     if (res == finger) {
2666       // we succeeded
2667 
2668       // notice that _finger == end cannot be guaranteed here since,
2669       // someone else might have moved the finger even further
2670       assert(_finger >= end, "the finger should have moved forward");
2671 
2672       if (verbose_low()) {
2673         gclog_or_tty->print_cr("[%d] we were successful with region = "
2674                                PTR_FORMAT, task_num, curr_region);
2675       }
2676 
2677       if (limit > bottom) {
2678         if (verbose_low()) {
2679           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2680                                  "returning it ", task_num, curr_region);
2681         }
2682         return curr_region;
2683       } else {
2684         assert(limit == bottom,
2685                "the region limit should be at bottom");
2686         if (verbose_low()) {
2687           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2688                                  "returning NULL", task_num, curr_region);
2689         }
2690         // we return NULL and the caller should try calling
2691         // claim_region() again.
2692         return NULL;
2693       }
2694     } else {
2695       assert(_finger > finger, "the finger should have moved forward");
2696       if (verbose_low()) {
2697         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2698                                "global finger = "PTR_FORMAT", "
2699                                "our finger = "PTR_FORMAT,
2700                                task_num, _finger, finger);
2701       }
2702 
2703       // read it again
2704       finger = _finger;
2705     }
2706   }
2707 
2708   return NULL;
2709 }
2710 
2711 #ifndef PRODUCT
2712 enum VerifyNoCSetOopsPhase {
2713   VerifyNoCSetOopsStack,
2714   VerifyNoCSetOopsQueues,
2715   VerifyNoCSetOopsSATBCompleted,
2716   VerifyNoCSetOopsSATBThread
2717 };
2718 
2719 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2720 private:
2721   G1CollectedHeap* _g1h;
2722   VerifyNoCSetOopsPhase _phase;
2723   int _info;
2724 
2725   const char* phase_str() {
2726     switch (_phase) {
2727     case VerifyNoCSetOopsStack:         return "Stack";
2728     case VerifyNoCSetOopsQueues:        return "Queue";
2729     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2730     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2731     default:                            ShouldNotReachHere();
2732     }
2733     return NULL;
2734   }
2735 
2736   void do_object_work(oop obj) {
2737     guarantee(!_g1h->obj_in_cs(obj),
2738               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2739                       (void*) obj, phase_str(), _info));
2740   }
2741 
2742 public:
2743   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2744 
2745   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2746     _phase = phase;
2747     _info = info;
2748   }
2749 
2750   virtual void do_oop(oop* p) {
2751     oop obj = oopDesc::load_decode_heap_oop(p);
2752     do_object_work(obj);
2753   }
2754 
2755   virtual void do_oop(narrowOop* p) {
2756     // We should not come across narrow oops while scanning marking
2757     // stacks and SATB buffers.
2758     ShouldNotReachHere();
2759   }
2760 
2761   virtual void do_object(oop obj) {
2762     do_object_work(obj);
2763   }
2764 };
2765 
2766 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2767                                          bool verify_enqueued_buffers,
2768                                          bool verify_thread_buffers,
2769                                          bool verify_fingers) {
2770   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2771   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2772     return;
2773   }
2774 
2775   VerifyNoCSetOopsClosure cl;
2776 
2777   if (verify_stacks) {
2778     // Verify entries on the global mark stack
2779     cl.set_phase(VerifyNoCSetOopsStack);
2780     _markStack.oops_do(&cl);
2781 
2782     // Verify entries on the task queues
2783     for (int i = 0; i < (int) _max_task_num; i += 1) {
2784       cl.set_phase(VerifyNoCSetOopsQueues, i);
2785       OopTaskQueue* queue = _task_queues->queue(i);
2786       queue->oops_do(&cl);
2787     }
2788   }
2789 
2790   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2791 
2792   // Verify entries on the enqueued SATB buffers
2793   if (verify_enqueued_buffers) {
2794     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2795     satb_qs.iterate_completed_buffers_read_only(&cl);
2796   }
2797 
2798   // Verify entries on the per-thread SATB buffers
2799   if (verify_thread_buffers) {
2800     cl.set_phase(VerifyNoCSetOopsSATBThread);
2801     satb_qs.iterate_thread_buffers_read_only(&cl);
2802   }
2803 
2804   if (verify_fingers) {
2805     // Verify the global finger
2806     HeapWord* global_finger = finger();
2807     if (global_finger != NULL && global_finger < _heap_end) {
2808       // The global finger always points to a heap region boundary. We
2809       // use heap_region_containing_raw() to get the containing region
2810       // given that the global finger could be pointing to a free region
2811       // which subsequently becomes continues humongous. If that
2812       // happens, heap_region_containing() will return the bottom of the
2813       // corresponding starts humongous region and the check below will
2814       // not hold any more.
2815       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2816       guarantee(global_finger == global_hr->bottom(),
2817                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2818                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2819     }
2820 
2821     // Verify the task fingers
2822     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2823     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2824       CMTask* task = _tasks[i];
2825       HeapWord* task_finger = task->finger();
2826       if (task_finger != NULL && task_finger < _heap_end) {
2827         // See above note on the global finger verification.
2828         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2829         guarantee(task_finger == task_hr->bottom() ||
2830                   !task_hr->in_collection_set(),
2831                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2832                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2833       }
2834     }
2835   }
2836 }
2837 #endif // PRODUCT
2838 
2839 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2840   _markStack.setEmpty();
2841   _markStack.clear_overflow();
2842   if (clear_overflow) {
2843     clear_has_overflown();
2844   } else {
2845     assert(has_overflown(), "pre-condition");
2846   }
2847   _finger = _heap_start;
2848 
2849   for (int i = 0; i < (int)_max_task_num; ++i) {
2850     OopTaskQueue* queue = _task_queues->queue(i);
2851     queue->set_empty();
2852   }
2853 }
2854 
2855 // Aggregate the counting data that was constructed concurrently
2856 // with marking.
2857 class AggregateCountDataHRClosure: public HeapRegionClosure {
2858   G1CollectedHeap* _g1h;
2859   ConcurrentMark* _cm;
2860   CardTableModRefBS* _ct_bs;
2861   BitMap* _cm_card_bm;
2862   size_t _max_task_num;
2863 
2864  public:
2865   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2866                               BitMap* cm_card_bm,
2867                               size_t max_task_num) :
2868     _g1h(g1h), _cm(g1h->concurrent_mark()),
2869     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2870     _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2871 
2872   bool doHeapRegion(HeapRegion* hr) {
2873     if (hr->continuesHumongous()) {
2874       // We will ignore these here and process them when their
2875       // associated "starts humongous" region is processed.
2876       // Note that we cannot rely on their associated
2877       // "starts humongous" region to have their bit set to 1
2878       // since, due to the region chunking in the parallel region
2879       // iteration, a "continues humongous" region might be visited
2880       // before its associated "starts humongous".
2881       return false;
2882     }
2883 
2884     HeapWord* start = hr->bottom();
2885     HeapWord* limit = hr->next_top_at_mark_start();
2886     HeapWord* end = hr->end();
2887 
2888     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2889            err_msg("Preconditions not met - "
2890                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2891                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2892                    start, limit, hr->top(), hr->end()));
2893 
2894     assert(hr->next_marked_bytes() == 0, "Precondition");
2895 
2896     if (start == limit) {
2897       // NTAMS of this region has not been set so nothing to do.
2898       return false;
2899     }
2900 
2901     // 'start' should be in the heap.
2902     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2903     // 'end' *may* be outside the heap (if hr is the last region)
2904     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2905 
2906     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2907     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2908     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2909 
2910     // If ntams is not card aligned then we bump card bitmap index
2911     // for limit so that we get the all the cards spanned by
2912     // the object ending at ntams.
2913     // Note: if this is the last region in the heap then ntams
2914     // *may* be outside the heap and limit_idx will correspond
2915     // to a card that is also outside the heap.
2916     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2917       limit_idx += 1;
2918     }
2919 
2920     assert(limit_idx <= end_idx, "or else use atomics");
2921 
2922     // Aggregate the "stripe" in the count data associated with hr.
2923     uint hrs_index = hr->hrs_index();
2924     size_t marked_bytes = 0;
2925 
2926     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2927       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2928       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2929 
2930       // Fetch the marked_bytes in this region for task i and
2931       // add it to the running total for this region.
2932       marked_bytes += marked_bytes_array[hrs_index];
2933 
2934       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2935       // into the global card bitmap.
2936       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2937 
2938       while (scan_idx < limit_idx) {
2939         assert(task_card_bm->at(scan_idx) == true, "should be");
2940         _cm_card_bm->set_bit(scan_idx);
2941         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2942 
2943         // BitMap::get_next_one_offset() can handle the case when
2944         // its left_offset parameter is greater than its right_offset
2945         // parameter. It does, however, have an early exit if
2946         // left_offset == right_offset. So let's limit the value
2947         // passed in for left offset here.
2948         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2949         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2950       }
2951     }
2952 
2953     // Update the marked bytes for this region.
2954     hr->add_to_marked_bytes(marked_bytes);
2955 
2956     // Next heap region
2957     return false;
2958   }
2959 };
2960 
2961 class G1AggregateCountDataTask: public AbstractGangTask {
2962 protected:
2963   G1CollectedHeap* _g1h;
2964   ConcurrentMark* _cm;
2965   BitMap* _cm_card_bm;
2966   size_t _max_task_num;
2967   int _active_workers;
2968 
2969 public:
2970   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2971                            ConcurrentMark* cm,
2972                            BitMap* cm_card_bm,
2973                            size_t max_task_num,
2974                            int n_workers) :
2975     AbstractGangTask("Count Aggregation"),
2976     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2977     _max_task_num(max_task_num),
2978     _active_workers(n_workers) { }
2979 
2980   void work(uint worker_id) {
2981     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
2982 
2983     if (G1CollectedHeap::use_parallel_gc_threads()) {
2984       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2985                                             _active_workers,
2986                                             HeapRegion::AggregateCountClaimValue);
2987     } else {
2988       _g1h->heap_region_iterate(&cl);
2989     }
2990   }
2991 };
2992 
2993 
2994 void ConcurrentMark::aggregate_count_data() {
2995   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2996                         _g1h->workers()->active_workers() :
2997                         1);
2998 
2999   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3000                                            _max_task_num, n_workers);
3001 
3002   if (G1CollectedHeap::use_parallel_gc_threads()) {
3003     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3004            "sanity check");
3005     _g1h->set_par_threads(n_workers);
3006     _g1h->workers()->run_task(&g1_par_agg_task);
3007     _g1h->set_par_threads(0);
3008 
3009     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3010            "sanity check");
3011     _g1h->reset_heap_region_claim_values();
3012   } else {
3013     g1_par_agg_task.work(0);
3014   }
3015 }
3016 
3017 // Clear the per-worker arrays used to store the per-region counting data
3018 void ConcurrentMark::clear_all_count_data() {
3019   // Clear the global card bitmap - it will be filled during
3020   // liveness count aggregation (during remark) and the
3021   // final counting task.
3022   _card_bm.clear();
3023 
3024   // Clear the global region bitmap - it will be filled as part
3025   // of the final counting task.
3026   _region_bm.clear();
3027 
3028   uint max_regions = _g1h->max_regions();
3029   assert(_max_task_num != 0, "unitialized");
3030 
3031   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3032     BitMap* task_card_bm = count_card_bitmap_for(i);
3033     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3034 
3035     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3036     assert(marked_bytes_array != NULL, "uninitialized");
3037 
3038     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3039     task_card_bm->clear();
3040   }
3041 }
3042 
3043 void ConcurrentMark::print_stats() {
3044   if (verbose_stats()) {
3045     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3046     for (size_t i = 0; i < _active_tasks; ++i) {
3047       _tasks[i]->print_stats();
3048       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3049     }
3050   }
3051 }
3052 
3053 // abandon current marking iteration due to a Full GC
3054 void ConcurrentMark::abort() {
3055   // Clear all marks to force marking thread to do nothing
3056   _nextMarkBitMap->clearAll();
3057   // Clear the liveness counting data
3058   clear_all_count_data();
3059   // Empty mark stack
3060   clear_marking_state();
3061   for (int i = 0; i < (int)_max_task_num; ++i) {
3062     _tasks[i]->clear_region_fields();
3063   }
3064   _has_aborted = true;
3065 
3066   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3067   satb_mq_set.abandon_partial_marking();
3068   // This can be called either during or outside marking, we'll read
3069   // the expected_active value from the SATB queue set.
3070   satb_mq_set.set_active_all_threads(
3071                                  false, /* new active value */
3072                                  satb_mq_set.is_active() /* expected_active */);
3073 }
3074 
3075 static void print_ms_time_info(const char* prefix, const char* name,
3076                                NumberSeq& ns) {
3077   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3078                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3079   if (ns.num() > 0) {
3080     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3081                            prefix, ns.sd(), ns.maximum());
3082   }
3083 }
3084 
3085 void ConcurrentMark::print_summary_info() {
3086   gclog_or_tty->print_cr(" Concurrent marking:");
3087   print_ms_time_info("  ", "init marks", _init_times);
3088   print_ms_time_info("  ", "remarks", _remark_times);
3089   {
3090     print_ms_time_info("     ", "final marks", _remark_mark_times);
3091     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3092 
3093   }
3094   print_ms_time_info("  ", "cleanups", _cleanup_times);
3095   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3096                          _total_counting_time,
3097                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3098                           (double)_cleanup_times.num()
3099                          : 0.0));
3100   if (G1ScrubRemSets) {
3101     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3102                            _total_rs_scrub_time,
3103                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3104                             (double)_cleanup_times.num()
3105                            : 0.0));
3106   }
3107   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3108                          (_init_times.sum() + _remark_times.sum() +
3109                           _cleanup_times.sum())/1000.0);
3110   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3111                 "(%8.2f s marking).",
3112                 cmThread()->vtime_accum(),
3113                 cmThread()->vtime_mark_accum());
3114 }
3115 
3116 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3117   _parallel_workers->print_worker_threads_on(st);
3118 }
3119 
3120 // We take a break if someone is trying to stop the world.
3121 bool ConcurrentMark::do_yield_check(uint worker_id) {
3122   if (should_yield()) {
3123     if (worker_id == 0) {
3124       _g1h->g1_policy()->record_concurrent_pause();
3125     }
3126     cmThread()->yield();
3127     return true;
3128   } else {
3129     return false;
3130   }
3131 }
3132 
3133 bool ConcurrentMark::should_yield() {
3134   return cmThread()->should_yield();
3135 }
3136 
3137 bool ConcurrentMark::containing_card_is_marked(void* p) {
3138   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3139   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3140 }
3141 
3142 bool ConcurrentMark::containing_cards_are_marked(void* start,
3143                                                  void* last) {
3144   return containing_card_is_marked(start) &&
3145          containing_card_is_marked(last);
3146 }
3147 
3148 #ifndef PRODUCT
3149 // for debugging purposes
3150 void ConcurrentMark::print_finger() {
3151   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3152                          _heap_start, _heap_end, _finger);
3153   for (int i = 0; i < (int) _max_task_num; ++i) {
3154     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3155   }
3156   gclog_or_tty->print_cr("");
3157 }
3158 #endif
3159 
3160 void CMTask::scan_object(oop obj) {
3161   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3162 
3163   if (_cm->verbose_high()) {
3164     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3165                            _task_id, (void*) obj);
3166   }
3167 
3168   size_t obj_size = obj->size();
3169   _words_scanned += obj_size;
3170 
3171   obj->oop_iterate(_cm_oop_closure);
3172   statsOnly( ++_objs_scanned );
3173   check_limits();
3174 }
3175 
3176 // Closure for iteration over bitmaps
3177 class CMBitMapClosure : public BitMapClosure {
3178 private:
3179   // the bitmap that is being iterated over
3180   CMBitMap*                   _nextMarkBitMap;
3181   ConcurrentMark*             _cm;
3182   CMTask*                     _task;
3183 
3184 public:
3185   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3186     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3187 
3188   bool do_bit(size_t offset) {
3189     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3190     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3191     assert( addr < _cm->finger(), "invariant");
3192 
3193     statsOnly( _task->increase_objs_found_on_bitmap() );
3194     assert(addr >= _task->finger(), "invariant");
3195 
3196     // We move that task's local finger along.
3197     _task->move_finger_to(addr);
3198 
3199     _task->scan_object(oop(addr));
3200     // we only partially drain the local queue and global stack
3201     _task->drain_local_queue(true);
3202     _task->drain_global_stack(true);
3203 
3204     // if the has_aborted flag has been raised, we need to bail out of
3205     // the iteration
3206     return !_task->has_aborted();
3207   }
3208 };
3209 
3210 // Closure for iterating over objects, currently only used for
3211 // processing SATB buffers.
3212 class CMObjectClosure : public ObjectClosure {
3213 private:
3214   CMTask* _task;
3215 
3216 public:
3217   void do_object(oop obj) {
3218     _task->deal_with_reference(obj);
3219   }
3220 
3221   CMObjectClosure(CMTask* task) : _task(task) { }
3222 };
3223 
3224 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3225                                ConcurrentMark* cm,
3226                                CMTask* task)
3227   : _g1h(g1h), _cm(cm), _task(task) {
3228   assert(_ref_processor == NULL, "should be initialized to NULL");
3229 
3230   if (G1UseConcMarkReferenceProcessing) {
3231     _ref_processor = g1h->ref_processor_cm();
3232     assert(_ref_processor != NULL, "should not be NULL");
3233   }
3234 }
3235 
3236 void CMTask::setup_for_region(HeapRegion* hr) {
3237   // Separated the asserts so that we know which one fires.
3238   assert(hr != NULL,
3239         "claim_region() should have filtered out continues humongous regions");
3240   assert(!hr->continuesHumongous(),
3241         "claim_region() should have filtered out continues humongous regions");
3242 
3243   if (_cm->verbose_low()) {
3244     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3245                            _task_id, hr);
3246   }
3247 
3248   _curr_region  = hr;
3249   _finger       = hr->bottom();
3250   update_region_limit();
3251 }
3252 
3253 void CMTask::update_region_limit() {
3254   HeapRegion* hr            = _curr_region;
3255   HeapWord* bottom          = hr->bottom();
3256   HeapWord* limit           = hr->next_top_at_mark_start();
3257 
3258   if (limit == bottom) {
3259     if (_cm->verbose_low()) {
3260       gclog_or_tty->print_cr("[%d] found an empty region "
3261                              "["PTR_FORMAT", "PTR_FORMAT")",
3262                              _task_id, bottom, limit);
3263     }
3264     // The region was collected underneath our feet.
3265     // We set the finger to bottom to ensure that the bitmap
3266     // iteration that will follow this will not do anything.
3267     // (this is not a condition that holds when we set the region up,
3268     // as the region is not supposed to be empty in the first place)
3269     _finger = bottom;
3270   } else if (limit >= _region_limit) {
3271     assert(limit >= _finger, "peace of mind");
3272   } else {
3273     assert(limit < _region_limit, "only way to get here");
3274     // This can happen under some pretty unusual circumstances.  An
3275     // evacuation pause empties the region underneath our feet (NTAMS
3276     // at bottom). We then do some allocation in the region (NTAMS
3277     // stays at bottom), followed by the region being used as a GC
3278     // alloc region (NTAMS will move to top() and the objects
3279     // originally below it will be grayed). All objects now marked in
3280     // the region are explicitly grayed, if below the global finger,
3281     // and we do not need in fact to scan anything else. So, we simply
3282     // set _finger to be limit to ensure that the bitmap iteration
3283     // doesn't do anything.
3284     _finger = limit;
3285   }
3286 
3287   _region_limit = limit;
3288 }
3289 
3290 void CMTask::giveup_current_region() {
3291   assert(_curr_region != NULL, "invariant");
3292   if (_cm->verbose_low()) {
3293     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3294                            _task_id, _curr_region);
3295   }
3296   clear_region_fields();
3297 }
3298 
3299 void CMTask::clear_region_fields() {
3300   // Values for these three fields that indicate that we're not
3301   // holding on to a region.
3302   _curr_region   = NULL;
3303   _finger        = NULL;
3304   _region_limit  = NULL;
3305 }
3306 
3307 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3308   if (cm_oop_closure == NULL) {
3309     assert(_cm_oop_closure != NULL, "invariant");
3310   } else {
3311     assert(_cm_oop_closure == NULL, "invariant");
3312   }
3313   _cm_oop_closure = cm_oop_closure;
3314 }
3315 
3316 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3317   guarantee(nextMarkBitMap != NULL, "invariant");
3318 
3319   if (_cm->verbose_low()) {
3320     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3321   }
3322 
3323   _nextMarkBitMap                = nextMarkBitMap;
3324   clear_region_fields();
3325 
3326   _calls                         = 0;
3327   _elapsed_time_ms               = 0.0;
3328   _termination_time_ms           = 0.0;
3329   _termination_start_time_ms     = 0.0;
3330 
3331 #if _MARKING_STATS_
3332   _local_pushes                  = 0;
3333   _local_pops                    = 0;
3334   _local_max_size                = 0;
3335   _objs_scanned                  = 0;
3336   _global_pushes                 = 0;
3337   _global_pops                   = 0;
3338   _global_max_size               = 0;
3339   _global_transfers_to           = 0;
3340   _global_transfers_from         = 0;
3341   _regions_claimed               = 0;
3342   _objs_found_on_bitmap          = 0;
3343   _satb_buffers_processed        = 0;
3344   _steal_attempts                = 0;
3345   _steals                        = 0;
3346   _aborted                       = 0;
3347   _aborted_overflow              = 0;
3348   _aborted_cm_aborted            = 0;
3349   _aborted_yield                 = 0;
3350   _aborted_timed_out             = 0;
3351   _aborted_satb                  = 0;
3352   _aborted_termination           = 0;
3353 #endif // _MARKING_STATS_
3354 }
3355 
3356 bool CMTask::should_exit_termination() {
3357   regular_clock_call();
3358   // This is called when we are in the termination protocol. We should
3359   // quit if, for some reason, this task wants to abort or the global
3360   // stack is not empty (this means that we can get work from it).
3361   return !_cm->mark_stack_empty() || has_aborted();
3362 }
3363 
3364 void CMTask::reached_limit() {
3365   assert(_words_scanned >= _words_scanned_limit ||
3366          _refs_reached >= _refs_reached_limit ,
3367          "shouldn't have been called otherwise");
3368   regular_clock_call();
3369 }
3370 
3371 void CMTask::regular_clock_call() {
3372   if (has_aborted()) return;
3373 
3374   // First, we need to recalculate the words scanned and refs reached
3375   // limits for the next clock call.
3376   recalculate_limits();
3377 
3378   // During the regular clock call we do the following
3379 
3380   // (1) If an overflow has been flagged, then we abort.
3381   if (_cm->has_overflown()) {
3382     set_has_aborted();
3383     return;
3384   }
3385 
3386   // If we are not concurrent (i.e. we're doing remark) we don't need
3387   // to check anything else. The other steps are only needed during
3388   // the concurrent marking phase.
3389   if (!concurrent()) return;
3390 
3391   // (2) If marking has been aborted for Full GC, then we also abort.
3392   if (_cm->has_aborted()) {
3393     set_has_aborted();
3394     statsOnly( ++_aborted_cm_aborted );
3395     return;
3396   }
3397 
3398   double curr_time_ms = os::elapsedVTime() * 1000.0;
3399 
3400   // (3) If marking stats are enabled, then we update the step history.
3401 #if _MARKING_STATS_
3402   if (_words_scanned >= _words_scanned_limit) {
3403     ++_clock_due_to_scanning;
3404   }
3405   if (_refs_reached >= _refs_reached_limit) {
3406     ++_clock_due_to_marking;
3407   }
3408 
3409   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3410   _interval_start_time_ms = curr_time_ms;
3411   _all_clock_intervals_ms.add(last_interval_ms);
3412 
3413   if (_cm->verbose_medium()) {
3414       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3415                         "scanned = %d%s, refs reached = %d%s",
3416                         _task_id, last_interval_ms,
3417                         _words_scanned,
3418                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3419                         _refs_reached,
3420                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3421   }
3422 #endif // _MARKING_STATS_
3423 
3424   // (4) We check whether we should yield. If we have to, then we abort.
3425   if (_cm->should_yield()) {
3426     // We should yield. To do this we abort the task. The caller is
3427     // responsible for yielding.
3428     set_has_aborted();
3429     statsOnly( ++_aborted_yield );
3430     return;
3431   }
3432 
3433   // (5) We check whether we've reached our time quota. If we have,
3434   // then we abort.
3435   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3436   if (elapsed_time_ms > _time_target_ms) {
3437     set_has_aborted();
3438     _has_timed_out = true;
3439     statsOnly( ++_aborted_timed_out );
3440     return;
3441   }
3442 
3443   // (6) Finally, we check whether there are enough completed STAB
3444   // buffers available for processing. If there are, we abort.
3445   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3446   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3447     if (_cm->verbose_low()) {
3448       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3449                              _task_id);
3450     }
3451     // we do need to process SATB buffers, we'll abort and restart
3452     // the marking task to do so
3453     set_has_aborted();
3454     statsOnly( ++_aborted_satb );
3455     return;
3456   }
3457 }
3458 
3459 void CMTask::recalculate_limits() {
3460   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3461   _words_scanned_limit      = _real_words_scanned_limit;
3462 
3463   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3464   _refs_reached_limit       = _real_refs_reached_limit;
3465 }
3466 
3467 void CMTask::decrease_limits() {
3468   // This is called when we believe that we're going to do an infrequent
3469   // operation which will increase the per byte scanned cost (i.e. move
3470   // entries to/from the global stack). It basically tries to decrease the
3471   // scanning limit so that the clock is called earlier.
3472 
3473   if (_cm->verbose_medium()) {
3474     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3475   }
3476 
3477   _words_scanned_limit = _real_words_scanned_limit -
3478     3 * words_scanned_period / 4;
3479   _refs_reached_limit  = _real_refs_reached_limit -
3480     3 * refs_reached_period / 4;
3481 }
3482 
3483 void CMTask::move_entries_to_global_stack() {
3484   // local array where we'll store the entries that will be popped
3485   // from the local queue
3486   oop buffer[global_stack_transfer_size];
3487 
3488   int n = 0;
3489   oop obj;
3490   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3491     buffer[n] = obj;
3492     ++n;
3493   }
3494 
3495   if (n > 0) {
3496     // we popped at least one entry from the local queue
3497 
3498     statsOnly( ++_global_transfers_to; _local_pops += n );
3499 
3500     if (!_cm->mark_stack_push(buffer, n)) {
3501       if (_cm->verbose_low()) {
3502         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3503                                _task_id);
3504       }
3505       set_has_aborted();
3506     } else {
3507       // the transfer was successful
3508 
3509       if (_cm->verbose_medium()) {
3510         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3511                                _task_id, n);
3512       }
3513       statsOnly( int tmp_size = _cm->mark_stack_size();
3514                  if (tmp_size > _global_max_size) {
3515                    _global_max_size = tmp_size;
3516                  }
3517                  _global_pushes += n );
3518     }
3519   }
3520 
3521   // this operation was quite expensive, so decrease the limits
3522   decrease_limits();
3523 }
3524 
3525 void CMTask::get_entries_from_global_stack() {
3526   // local array where we'll store the entries that will be popped
3527   // from the global stack.
3528   oop buffer[global_stack_transfer_size];
3529   int n;
3530   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3531   assert(n <= global_stack_transfer_size,
3532          "we should not pop more than the given limit");
3533   if (n > 0) {
3534     // yes, we did actually pop at least one entry
3535 
3536     statsOnly( ++_global_transfers_from; _global_pops += n );
3537     if (_cm->verbose_medium()) {
3538       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3539                              _task_id, n);
3540     }
3541     for (int i = 0; i < n; ++i) {
3542       bool success = _task_queue->push(buffer[i]);
3543       // We only call this when the local queue is empty or under a
3544       // given target limit. So, we do not expect this push to fail.
3545       assert(success, "invariant");
3546     }
3547 
3548     statsOnly( int tmp_size = _task_queue->size();
3549                if (tmp_size > _local_max_size) {
3550                  _local_max_size = tmp_size;
3551                }
3552                _local_pushes += n );
3553   }
3554 
3555   // this operation was quite expensive, so decrease the limits
3556   decrease_limits();
3557 }
3558 
3559 void CMTask::drain_local_queue(bool partially) {
3560   if (has_aborted()) return;
3561 
3562   // Decide what the target size is, depending whether we're going to
3563   // drain it partially (so that other tasks can steal if they run out
3564   // of things to do) or totally (at the very end).
3565   size_t target_size;
3566   if (partially) {
3567     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3568   } else {
3569     target_size = 0;
3570   }
3571 
3572   if (_task_queue->size() > target_size) {
3573     if (_cm->verbose_high()) {
3574       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3575                              _task_id, target_size);
3576     }
3577 
3578     oop obj;
3579     bool ret = _task_queue->pop_local(obj);
3580     while (ret) {
3581       statsOnly( ++_local_pops );
3582 
3583       if (_cm->verbose_high()) {
3584         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3585                                (void*) obj);
3586       }
3587 
3588       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3589       assert(!_g1h->is_on_master_free_list(
3590                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3591 
3592       scan_object(obj);
3593 
3594       if (_task_queue->size() <= target_size || has_aborted()) {
3595         ret = false;
3596       } else {
3597         ret = _task_queue->pop_local(obj);
3598       }
3599     }
3600 
3601     if (_cm->verbose_high()) {
3602       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3603                              _task_id, _task_queue->size());
3604     }
3605   }
3606 }
3607 
3608 void CMTask::drain_global_stack(bool partially) {
3609   if (has_aborted()) return;
3610 
3611   // We have a policy to drain the local queue before we attempt to
3612   // drain the global stack.
3613   assert(partially || _task_queue->size() == 0, "invariant");
3614 
3615   // Decide what the target size is, depending whether we're going to
3616   // drain it partially (so that other tasks can steal if they run out
3617   // of things to do) or totally (at the very end).  Notice that,
3618   // because we move entries from the global stack in chunks or
3619   // because another task might be doing the same, we might in fact
3620   // drop below the target. But, this is not a problem.
3621   size_t target_size;
3622   if (partially) {
3623     target_size = _cm->partial_mark_stack_size_target();
3624   } else {
3625     target_size = 0;
3626   }
3627 
3628   if (_cm->mark_stack_size() > target_size) {
3629     if (_cm->verbose_low()) {
3630       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3631                              _task_id, target_size);
3632     }
3633 
3634     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3635       get_entries_from_global_stack();
3636       drain_local_queue(partially);
3637     }
3638 
3639     if (_cm->verbose_low()) {
3640       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3641                              _task_id, _cm->mark_stack_size());
3642     }
3643   }
3644 }
3645 
3646 // SATB Queue has several assumptions on whether to call the par or
3647 // non-par versions of the methods. this is why some of the code is
3648 // replicated. We should really get rid of the single-threaded version
3649 // of the code to simplify things.
3650 void CMTask::drain_satb_buffers() {
3651   if (has_aborted()) return;
3652 
3653   // We set this so that the regular clock knows that we're in the
3654   // middle of draining buffers and doesn't set the abort flag when it
3655   // notices that SATB buffers are available for draining. It'd be
3656   // very counter productive if it did that. :-)
3657   _draining_satb_buffers = true;
3658 
3659   CMObjectClosure oc(this);
3660   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3661   if (G1CollectedHeap::use_parallel_gc_threads()) {
3662     satb_mq_set.set_par_closure(_task_id, &oc);
3663   } else {
3664     satb_mq_set.set_closure(&oc);
3665   }
3666 
3667   // This keeps claiming and applying the closure to completed buffers
3668   // until we run out of buffers or we need to abort.
3669   if (G1CollectedHeap::use_parallel_gc_threads()) {
3670     while (!has_aborted() &&
3671            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3672       if (_cm->verbose_medium()) {
3673         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3674       }
3675       statsOnly( ++_satb_buffers_processed );
3676       regular_clock_call();
3677     }
3678   } else {
3679     while (!has_aborted() &&
3680            satb_mq_set.apply_closure_to_completed_buffer()) {
3681       if (_cm->verbose_medium()) {
3682         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3683       }
3684       statsOnly( ++_satb_buffers_processed );
3685       regular_clock_call();
3686     }
3687   }
3688 
3689   if (!concurrent() && !has_aborted()) {
3690     // We should only do this during remark.
3691     if (G1CollectedHeap::use_parallel_gc_threads()) {
3692       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3693     } else {
3694       satb_mq_set.iterate_closure_all_threads();
3695     }
3696   }
3697 
3698   _draining_satb_buffers = false;
3699 
3700   assert(has_aborted() ||
3701          concurrent() ||
3702          satb_mq_set.completed_buffers_num() == 0, "invariant");
3703 
3704   if (G1CollectedHeap::use_parallel_gc_threads()) {
3705     satb_mq_set.set_par_closure(_task_id, NULL);
3706   } else {
3707     satb_mq_set.set_closure(NULL);
3708   }
3709 
3710   // again, this was a potentially expensive operation, decrease the
3711   // limits to get the regular clock call early
3712   decrease_limits();
3713 }
3714 
3715 void CMTask::print_stats() {
3716   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3717                          _task_id, _calls);
3718   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3719                          _elapsed_time_ms, _termination_time_ms);
3720   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3721                          _step_times_ms.num(), _step_times_ms.avg(),
3722                          _step_times_ms.sd());
3723   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3724                          _step_times_ms.maximum(), _step_times_ms.sum());
3725 
3726 #if _MARKING_STATS_
3727   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3728                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3729                          _all_clock_intervals_ms.sd());
3730   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3731                          _all_clock_intervals_ms.maximum(),
3732                          _all_clock_intervals_ms.sum());
3733   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3734                          _clock_due_to_scanning, _clock_due_to_marking);
3735   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3736                          _objs_scanned, _objs_found_on_bitmap);
3737   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3738                          _local_pushes, _local_pops, _local_max_size);
3739   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3740                          _global_pushes, _global_pops, _global_max_size);
3741   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3742                          _global_transfers_to,_global_transfers_from);
3743   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3744   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3745   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3746                          _steal_attempts, _steals);
3747   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3748   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3749                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3750   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3751                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3752 #endif // _MARKING_STATS_
3753 }
3754 
3755 /*****************************************************************************
3756 
3757     The do_marking_step(time_target_ms) method is the building block
3758     of the parallel marking framework. It can be called in parallel
3759     with other invocations of do_marking_step() on different tasks
3760     (but only one per task, obviously) and concurrently with the
3761     mutator threads, or during remark, hence it eliminates the need
3762     for two versions of the code. When called during remark, it will
3763     pick up from where the task left off during the concurrent marking
3764     phase. Interestingly, tasks are also claimable during evacuation
3765     pauses too, since do_marking_step() ensures that it aborts before
3766     it needs to yield.
3767 
3768     The data structures that is uses to do marking work are the
3769     following:
3770 
3771       (1) Marking Bitmap. If there are gray objects that appear only
3772       on the bitmap (this happens either when dealing with an overflow
3773       or when the initial marking phase has simply marked the roots
3774       and didn't push them on the stack), then tasks claim heap
3775       regions whose bitmap they then scan to find gray objects. A
3776       global finger indicates where the end of the last claimed region
3777       is. A local finger indicates how far into the region a task has
3778       scanned. The two fingers are used to determine how to gray an
3779       object (i.e. whether simply marking it is OK, as it will be
3780       visited by a task in the future, or whether it needs to be also
3781       pushed on a stack).
3782 
3783       (2) Local Queue. The local queue of the task which is accessed
3784       reasonably efficiently by the task. Other tasks can steal from
3785       it when they run out of work. Throughout the marking phase, a
3786       task attempts to keep its local queue short but not totally
3787       empty, so that entries are available for stealing by other
3788       tasks. Only when there is no more work, a task will totally
3789       drain its local queue.
3790 
3791       (3) Global Mark Stack. This handles local queue overflow. During
3792       marking only sets of entries are moved between it and the local
3793       queues, as access to it requires a mutex and more fine-grain
3794       interaction with it which might cause contention. If it
3795       overflows, then the marking phase should restart and iterate
3796       over the bitmap to identify gray objects. Throughout the marking
3797       phase, tasks attempt to keep the global mark stack at a small
3798       length but not totally empty, so that entries are available for
3799       popping by other tasks. Only when there is no more work, tasks
3800       will totally drain the global mark stack.
3801 
3802       (4) SATB Buffer Queue. This is where completed SATB buffers are
3803       made available. Buffers are regularly removed from this queue
3804       and scanned for roots, so that the queue doesn't get too
3805       long. During remark, all completed buffers are processed, as
3806       well as the filled in parts of any uncompleted buffers.
3807 
3808     The do_marking_step() method tries to abort when the time target
3809     has been reached. There are a few other cases when the
3810     do_marking_step() method also aborts:
3811 
3812       (1) When the marking phase has been aborted (after a Full GC).
3813 
3814       (2) When a global overflow (on the global stack) has been
3815       triggered. Before the task aborts, it will actually sync up with
3816       the other tasks to ensure that all the marking data structures
3817       (local queues, stacks, fingers etc.)  are re-initialised so that
3818       when do_marking_step() completes, the marking phase can
3819       immediately restart.
3820 
3821       (3) When enough completed SATB buffers are available. The
3822       do_marking_step() method only tries to drain SATB buffers right
3823       at the beginning. So, if enough buffers are available, the
3824       marking step aborts and the SATB buffers are processed at
3825       the beginning of the next invocation.
3826 
3827       (4) To yield. when we have to yield then we abort and yield
3828       right at the end of do_marking_step(). This saves us from a lot
3829       of hassle as, by yielding we might allow a Full GC. If this
3830       happens then objects will be compacted underneath our feet, the
3831       heap might shrink, etc. We save checking for this by just
3832       aborting and doing the yield right at the end.
3833 
3834     From the above it follows that the do_marking_step() method should
3835     be called in a loop (or, otherwise, regularly) until it completes.
3836 
3837     If a marking step completes without its has_aborted() flag being
3838     true, it means it has completed the current marking phase (and
3839     also all other marking tasks have done so and have all synced up).
3840 
3841     A method called regular_clock_call() is invoked "regularly" (in
3842     sub ms intervals) throughout marking. It is this clock method that
3843     checks all the abort conditions which were mentioned above and
3844     decides when the task should abort. A work-based scheme is used to
3845     trigger this clock method: when the number of object words the
3846     marking phase has scanned or the number of references the marking
3847     phase has visited reach a given limit. Additional invocations to
3848     the method clock have been planted in a few other strategic places
3849     too. The initial reason for the clock method was to avoid calling
3850     vtime too regularly, as it is quite expensive. So, once it was in
3851     place, it was natural to piggy-back all the other conditions on it
3852     too and not constantly check them throughout the code.
3853 
3854  *****************************************************************************/
3855 
3856 void CMTask::do_marking_step(double time_target_ms,
3857                              bool do_stealing,
3858                              bool do_termination) {
3859   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3860   assert(concurrent() == _cm->concurrent(), "they should be the same");
3861 
3862   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3863   assert(_task_queues != NULL, "invariant");
3864   assert(_task_queue != NULL, "invariant");
3865   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3866 
3867   assert(!_claimed,
3868          "only one thread should claim this task at any one time");
3869 
3870   // OK, this doesn't safeguard again all possible scenarios, as it is
3871   // possible for two threads to set the _claimed flag at the same
3872   // time. But it is only for debugging purposes anyway and it will
3873   // catch most problems.
3874   _claimed = true;
3875 
3876   _start_time_ms = os::elapsedVTime() * 1000.0;
3877   statsOnly( _interval_start_time_ms = _start_time_ms );
3878 
3879   double diff_prediction_ms =
3880     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3881   _time_target_ms = time_target_ms - diff_prediction_ms;
3882 
3883   // set up the variables that are used in the work-based scheme to
3884   // call the regular clock method
3885   _words_scanned = 0;
3886   _refs_reached  = 0;
3887   recalculate_limits();
3888 
3889   // clear all flags
3890   clear_has_aborted();
3891   _has_timed_out = false;
3892   _draining_satb_buffers = false;
3893 
3894   ++_calls;
3895 
3896   if (_cm->verbose_low()) {
3897     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3898                            "target = %1.2lfms >>>>>>>>>>",
3899                            _task_id, _calls, _time_target_ms);
3900   }
3901 
3902   // Set up the bitmap and oop closures. Anything that uses them is
3903   // eventually called from this method, so it is OK to allocate these
3904   // statically.
3905   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3906   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3907   set_cm_oop_closure(&cm_oop_closure);
3908 
3909   if (_cm->has_overflown()) {
3910     // This can happen if the mark stack overflows during a GC pause
3911     // and this task, after a yield point, restarts. We have to abort
3912     // as we need to get into the overflow protocol which happens
3913     // right at the end of this task.
3914     set_has_aborted();
3915   }
3916 
3917   // First drain any available SATB buffers. After this, we will not
3918   // look at SATB buffers before the next invocation of this method.
3919   // If enough completed SATB buffers are queued up, the regular clock
3920   // will abort this task so that it restarts.
3921   drain_satb_buffers();
3922   // ...then partially drain the local queue and the global stack
3923   drain_local_queue(true);
3924   drain_global_stack(true);
3925 
3926   do {
3927     if (!has_aborted() && _curr_region != NULL) {
3928       // This means that we're already holding on to a region.
3929       assert(_finger != NULL, "if region is not NULL, then the finger "
3930              "should not be NULL either");
3931 
3932       // We might have restarted this task after an evacuation pause
3933       // which might have evacuated the region we're holding on to
3934       // underneath our feet. Let's read its limit again to make sure
3935       // that we do not iterate over a region of the heap that
3936       // contains garbage (update_region_limit() will also move
3937       // _finger to the start of the region if it is found empty).
3938       update_region_limit();
3939       // We will start from _finger not from the start of the region,
3940       // as we might be restarting this task after aborting half-way
3941       // through scanning this region. In this case, _finger points to
3942       // the address where we last found a marked object. If this is a
3943       // fresh region, _finger points to start().
3944       MemRegion mr = MemRegion(_finger, _region_limit);
3945 
3946       if (_cm->verbose_low()) {
3947         gclog_or_tty->print_cr("[%d] we're scanning part "
3948                                "["PTR_FORMAT", "PTR_FORMAT") "
3949                                "of region "PTR_FORMAT,
3950                                _task_id, _finger, _region_limit, _curr_region);
3951       }
3952 
3953       // Let's iterate over the bitmap of the part of the
3954       // region that is left.
3955       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3956         // We successfully completed iterating over the region. Now,
3957         // let's give up the region.
3958         giveup_current_region();
3959         regular_clock_call();
3960       } else {
3961         assert(has_aborted(), "currently the only way to do so");
3962         // The only way to abort the bitmap iteration is to return
3963         // false from the do_bit() method. However, inside the
3964         // do_bit() method we move the _finger to point to the
3965         // object currently being looked at. So, if we bail out, we
3966         // have definitely set _finger to something non-null.
3967         assert(_finger != NULL, "invariant");
3968 
3969         // Region iteration was actually aborted. So now _finger
3970         // points to the address of the object we last scanned. If we
3971         // leave it there, when we restart this task, we will rescan
3972         // the object. It is easy to avoid this. We move the finger by
3973         // enough to point to the next possible object header (the
3974         // bitmap knows by how much we need to move it as it knows its
3975         // granularity).
3976         assert(_finger < _region_limit, "invariant");
3977         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3978         // Check if bitmap iteration was aborted while scanning the last object
3979         if (new_finger >= _region_limit) {
3980           giveup_current_region();
3981         } else {
3982           move_finger_to(new_finger);
3983         }
3984       }
3985     }
3986     // At this point we have either completed iterating over the
3987     // region we were holding on to, or we have aborted.
3988 
3989     // We then partially drain the local queue and the global stack.
3990     // (Do we really need this?)
3991     drain_local_queue(true);
3992     drain_global_stack(true);
3993 
3994     // Read the note on the claim_region() method on why it might
3995     // return NULL with potentially more regions available for
3996     // claiming and why we have to check out_of_regions() to determine
3997     // whether we're done or not.
3998     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3999       // We are going to try to claim a new region. We should have
4000       // given up on the previous one.
4001       // Separated the asserts so that we know which one fires.
4002       assert(_curr_region  == NULL, "invariant");
4003       assert(_finger       == NULL, "invariant");
4004       assert(_region_limit == NULL, "invariant");
4005       if (_cm->verbose_low()) {
4006         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4007       }
4008       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4009       if (claimed_region != NULL) {
4010         // Yes, we managed to claim one
4011         statsOnly( ++_regions_claimed );
4012 
4013         if (_cm->verbose_low()) {
4014           gclog_or_tty->print_cr("[%d] we successfully claimed "
4015                                  "region "PTR_FORMAT,
4016                                  _task_id, claimed_region);
4017         }
4018 
4019         setup_for_region(claimed_region);
4020         assert(_curr_region == claimed_region, "invariant");
4021       }
4022       // It is important to call the regular clock here. It might take
4023       // a while to claim a region if, for example, we hit a large
4024       // block of empty regions. So we need to call the regular clock
4025       // method once round the loop to make sure it's called
4026       // frequently enough.
4027       regular_clock_call();
4028     }
4029 
4030     if (!has_aborted() && _curr_region == NULL) {
4031       assert(_cm->out_of_regions(),
4032              "at this point we should be out of regions");
4033     }
4034   } while ( _curr_region != NULL && !has_aborted());
4035 
4036   if (!has_aborted()) {
4037     // We cannot check whether the global stack is empty, since other
4038     // tasks might be pushing objects to it concurrently.
4039     assert(_cm->out_of_regions(),
4040            "at this point we should be out of regions");
4041 
4042     if (_cm->verbose_low()) {
4043       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4044     }
4045 
4046     // Try to reduce the number of available SATB buffers so that
4047     // remark has less work to do.
4048     drain_satb_buffers();
4049   }
4050 
4051   // Since we've done everything else, we can now totally drain the
4052   // local queue and global stack.
4053   drain_local_queue(false);
4054   drain_global_stack(false);
4055 
4056   // Attempt at work stealing from other task's queues.
4057   if (do_stealing && !has_aborted()) {
4058     // We have not aborted. This means that we have finished all that
4059     // we could. Let's try to do some stealing...
4060 
4061     // We cannot check whether the global stack is empty, since other
4062     // tasks might be pushing objects to it concurrently.
4063     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4064            "only way to reach here");
4065 
4066     if (_cm->verbose_low()) {
4067       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4068     }
4069 
4070     while (!has_aborted()) {
4071       oop obj;
4072       statsOnly( ++_steal_attempts );
4073 
4074       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4075         if (_cm->verbose_medium()) {
4076           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4077                                  _task_id, (void*) obj);
4078         }
4079 
4080         statsOnly( ++_steals );
4081 
4082         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4083                "any stolen object should be marked");
4084         scan_object(obj);
4085 
4086         // And since we're towards the end, let's totally drain the
4087         // local queue and global stack.
4088         drain_local_queue(false);
4089         drain_global_stack(false);
4090       } else {
4091         break;
4092       }
4093     }
4094   }
4095 
4096   // If we are about to wrap up and go into termination, check if we
4097   // should raise the overflow flag.
4098   if (do_termination && !has_aborted()) {
4099     if (_cm->force_overflow()->should_force()) {
4100       _cm->set_has_overflown();
4101       regular_clock_call();
4102     }
4103   }
4104 
4105   // We still haven't aborted. Now, let's try to get into the
4106   // termination protocol.
4107   if (do_termination && !has_aborted()) {
4108     // We cannot check whether the global stack is empty, since other
4109     // tasks might be concurrently pushing objects on it.
4110     // Separated the asserts so that we know which one fires.
4111     assert(_cm->out_of_regions(), "only way to reach here");
4112     assert(_task_queue->size() == 0, "only way to reach here");
4113 
4114     if (_cm->verbose_low()) {
4115       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4116     }
4117 
4118     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4119     // The CMTask class also extends the TerminatorTerminator class,
4120     // hence its should_exit_termination() method will also decide
4121     // whether to exit the termination protocol or not.
4122     bool finished = _cm->terminator()->offer_termination(this);
4123     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4124     _termination_time_ms +=
4125       termination_end_time_ms - _termination_start_time_ms;
4126 
4127     if (finished) {
4128       // We're all done.
4129 
4130       if (_task_id == 0) {
4131         // let's allow task 0 to do this
4132         if (concurrent()) {
4133           assert(_cm->concurrent_marking_in_progress(), "invariant");
4134           // we need to set this to false before the next
4135           // safepoint. This way we ensure that the marking phase
4136           // doesn't observe any more heap expansions.
4137           _cm->clear_concurrent_marking_in_progress();
4138         }
4139       }
4140 
4141       // We can now guarantee that the global stack is empty, since
4142       // all other tasks have finished. We separated the guarantees so
4143       // that, if a condition is false, we can immediately find out
4144       // which one.
4145       guarantee(_cm->out_of_regions(), "only way to reach here");
4146       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4147       guarantee(_task_queue->size() == 0, "only way to reach here");
4148       guarantee(!_cm->has_overflown(), "only way to reach here");
4149       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4150 
4151       if (_cm->verbose_low()) {
4152         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4153       }
4154     } else {
4155       // Apparently there's more work to do. Let's abort this task. It
4156       // will restart it and we can hopefully find more things to do.
4157 
4158       if (_cm->verbose_low()) {
4159         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4160                                _task_id);
4161       }
4162 
4163       set_has_aborted();
4164       statsOnly( ++_aborted_termination );
4165     }
4166   }
4167 
4168   // Mainly for debugging purposes to make sure that a pointer to the
4169   // closure which was statically allocated in this frame doesn't
4170   // escape it by accident.
4171   set_cm_oop_closure(NULL);
4172   double end_time_ms = os::elapsedVTime() * 1000.0;
4173   double elapsed_time_ms = end_time_ms - _start_time_ms;
4174   // Update the step history.
4175   _step_times_ms.add(elapsed_time_ms);
4176 
4177   if (has_aborted()) {
4178     // The task was aborted for some reason.
4179 
4180     statsOnly( ++_aborted );
4181 
4182     if (_has_timed_out) {
4183       double diff_ms = elapsed_time_ms - _time_target_ms;
4184       // Keep statistics of how well we did with respect to hitting
4185       // our target only if we actually timed out (if we aborted for
4186       // other reasons, then the results might get skewed).
4187       _marking_step_diffs_ms.add(diff_ms);
4188     }
4189 
4190     if (_cm->has_overflown()) {
4191       // This is the interesting one. We aborted because a global
4192       // overflow was raised. This means we have to restart the
4193       // marking phase and start iterating over regions. However, in
4194       // order to do this we have to make sure that all tasks stop
4195       // what they are doing and re-initialise in a safe manner. We
4196       // will achieve this with the use of two barrier sync points.
4197 
4198       if (_cm->verbose_low()) {
4199         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4200       }
4201 
4202       _cm->enter_first_sync_barrier(_task_id);
4203       // When we exit this sync barrier we know that all tasks have
4204       // stopped doing marking work. So, it's now safe to
4205       // re-initialise our data structures. At the end of this method,
4206       // task 0 will clear the global data structures.
4207 
4208       statsOnly( ++_aborted_overflow );
4209 
4210       // We clear the local state of this task...
4211       clear_region_fields();
4212 
4213       // ...and enter the second barrier.
4214       _cm->enter_second_sync_barrier(_task_id);
4215       // At this point everything has bee re-initialised and we're
4216       // ready to restart.
4217     }
4218 
4219     if (_cm->verbose_low()) {
4220       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4221                              "elapsed = %1.2lfms <<<<<<<<<<",
4222                              _task_id, _time_target_ms, elapsed_time_ms);
4223       if (_cm->has_aborted()) {
4224         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4225                                _task_id);
4226       }
4227     }
4228   } else {
4229     if (_cm->verbose_low()) {
4230       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4231                              "elapsed = %1.2lfms <<<<<<<<<<",
4232                              _task_id, _time_target_ms, elapsed_time_ms);
4233     }
4234   }
4235 
4236   _claimed = false;
4237 }
4238 
4239 CMTask::CMTask(int task_id,
4240                ConcurrentMark* cm,
4241                size_t* marked_bytes,
4242                BitMap* card_bm,
4243                CMTaskQueue* task_queue,
4244                CMTaskQueueSet* task_queues)
4245   : _g1h(G1CollectedHeap::heap()),
4246     _task_id(task_id), _cm(cm),
4247     _claimed(false),
4248     _nextMarkBitMap(NULL), _hash_seed(17),
4249     _task_queue(task_queue),
4250     _task_queues(task_queues),
4251     _cm_oop_closure(NULL),
4252     _marked_bytes_array(marked_bytes),
4253     _card_bm(card_bm) {
4254   guarantee(task_queue != NULL, "invariant");
4255   guarantee(task_queues != NULL, "invariant");
4256 
4257   statsOnly( _clock_due_to_scanning = 0;
4258              _clock_due_to_marking  = 0 );
4259 
4260   _marking_step_diffs_ms.add(0.5);
4261 }
4262 
4263 // These are formatting macros that are used below to ensure
4264 // consistent formatting. The *_H_* versions are used to format the
4265 // header for a particular value and they should be kept consistent
4266 // with the corresponding macro. Also note that most of the macros add
4267 // the necessary white space (as a prefix) which makes them a bit
4268 // easier to compose.
4269 
4270 // All the output lines are prefixed with this string to be able to
4271 // identify them easily in a large log file.
4272 #define G1PPRL_LINE_PREFIX            "###"
4273 
4274 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4275 #ifdef _LP64
4276 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4277 #else // _LP64
4278 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4279 #endif // _LP64
4280 
4281 // For per-region info
4282 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4283 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4284 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4285 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4286 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4287 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4288 
4289 // For summary info
4290 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4291 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4292 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4293 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4294 
4295 G1PrintRegionLivenessInfoClosure::
4296 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4297   : _out(out),
4298     _total_used_bytes(0), _total_capacity_bytes(0),
4299     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4300     _hum_used_bytes(0), _hum_capacity_bytes(0),
4301     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4302   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4303   MemRegion g1_committed = g1h->g1_committed();
4304   MemRegion g1_reserved = g1h->g1_reserved();
4305   double now = os::elapsedTime();
4306 
4307   // Print the header of the output.
4308   _out->cr();
4309   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4310   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4311                  G1PPRL_SUM_ADDR_FORMAT("committed")
4312                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4313                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4314                  g1_committed.start(), g1_committed.end(),
4315                  g1_reserved.start(), g1_reserved.end(),
4316                  HeapRegion::GrainBytes);
4317   _out->print_cr(G1PPRL_LINE_PREFIX);
4318   _out->print_cr(G1PPRL_LINE_PREFIX
4319                  G1PPRL_TYPE_H_FORMAT
4320                  G1PPRL_ADDR_BASE_H_FORMAT
4321                  G1PPRL_BYTE_H_FORMAT
4322                  G1PPRL_BYTE_H_FORMAT
4323                  G1PPRL_BYTE_H_FORMAT
4324                  G1PPRL_DOUBLE_H_FORMAT,
4325                  "type", "address-range",
4326                  "used", "prev-live", "next-live", "gc-eff");
4327   _out->print_cr(G1PPRL_LINE_PREFIX
4328                  G1PPRL_TYPE_H_FORMAT
4329                  G1PPRL_ADDR_BASE_H_FORMAT
4330                  G1PPRL_BYTE_H_FORMAT
4331                  G1PPRL_BYTE_H_FORMAT
4332                  G1PPRL_BYTE_H_FORMAT
4333                  G1PPRL_DOUBLE_H_FORMAT,
4334                  "", "",
4335                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4336 }
4337 
4338 // It takes as a parameter a reference to one of the _hum_* fields, it
4339 // deduces the corresponding value for a region in a humongous region
4340 // series (either the region size, or what's left if the _hum_* field
4341 // is < the region size), and updates the _hum_* field accordingly.
4342 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4343   size_t bytes = 0;
4344   // The > 0 check is to deal with the prev and next live bytes which
4345   // could be 0.
4346   if (*hum_bytes > 0) {
4347     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4348     *hum_bytes -= bytes;
4349   }
4350   return bytes;
4351 }
4352 
4353 // It deduces the values for a region in a humongous region series
4354 // from the _hum_* fields and updates those accordingly. It assumes
4355 // that that _hum_* fields have already been set up from the "starts
4356 // humongous" region and we visit the regions in address order.
4357 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4358                                                      size_t* capacity_bytes,
4359                                                      size_t* prev_live_bytes,
4360                                                      size_t* next_live_bytes) {
4361   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4362   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4363   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4364   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4365   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4366 }
4367 
4368 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4369   const char* type = "";
4370   HeapWord* bottom       = r->bottom();
4371   HeapWord* end          = r->end();
4372   size_t capacity_bytes  = r->capacity();
4373   size_t used_bytes      = r->used();
4374   size_t prev_live_bytes = r->live_bytes();
4375   size_t next_live_bytes = r->next_live_bytes();
4376   double gc_eff          = r->gc_efficiency();
4377   if (r->used() == 0) {
4378     type = "FREE";
4379   } else if (r->is_survivor()) {
4380     type = "SURV";
4381   } else if (r->is_young()) {
4382     type = "EDEN";
4383   } else if (r->startsHumongous()) {
4384     type = "HUMS";
4385 
4386     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4387            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4388            "they should have been zeroed after the last time we used them");
4389     // Set up the _hum_* fields.
4390     _hum_capacity_bytes  = capacity_bytes;
4391     _hum_used_bytes      = used_bytes;
4392     _hum_prev_live_bytes = prev_live_bytes;
4393     _hum_next_live_bytes = next_live_bytes;
4394     get_hum_bytes(&used_bytes, &capacity_bytes,
4395                   &prev_live_bytes, &next_live_bytes);
4396     end = bottom + HeapRegion::GrainWords;
4397   } else if (r->continuesHumongous()) {
4398     type = "HUMC";
4399     get_hum_bytes(&used_bytes, &capacity_bytes,
4400                   &prev_live_bytes, &next_live_bytes);
4401     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4402   } else {
4403     type = "OLD";
4404   }
4405 
4406   _total_used_bytes      += used_bytes;
4407   _total_capacity_bytes  += capacity_bytes;
4408   _total_prev_live_bytes += prev_live_bytes;
4409   _total_next_live_bytes += next_live_bytes;
4410 
4411   // Print a line for this particular region.
4412   _out->print_cr(G1PPRL_LINE_PREFIX
4413                  G1PPRL_TYPE_FORMAT
4414                  G1PPRL_ADDR_BASE_FORMAT
4415                  G1PPRL_BYTE_FORMAT
4416                  G1PPRL_BYTE_FORMAT
4417                  G1PPRL_BYTE_FORMAT
4418                  G1PPRL_DOUBLE_FORMAT,
4419                  type, bottom, end,
4420                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4421 
4422   return false;
4423 }
4424 
4425 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4426   // Print the footer of the output.
4427   _out->print_cr(G1PPRL_LINE_PREFIX);
4428   _out->print_cr(G1PPRL_LINE_PREFIX
4429                  " SUMMARY"
4430                  G1PPRL_SUM_MB_FORMAT("capacity")
4431                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4432                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4433                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4434                  bytes_to_mb(_total_capacity_bytes),
4435                  bytes_to_mb(_total_used_bytes),
4436                  perc(_total_used_bytes, _total_capacity_bytes),
4437                  bytes_to_mb(_total_prev_live_bytes),
4438                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4439                  bytes_to_mb(_total_next_live_bytes),
4440                  perc(_total_next_live_bytes, _total_capacity_bytes));
4441   _out->cr();
4442 }