Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  50   _bm((uintptr_t*)NULL,0),
  51   _shifter(shifter) {
  52   _bmStartWord = (HeapWord*)(rs.base());
  53   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  54   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  55                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  56 
  57   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  58 
  59   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  60   // For now we'll just commit all of the bit map up fromt.
  61   // Later on we'll try to be more parsimonious with swap.
  62   guarantee(_virtual_space.initialize(brs, brs.size()),
  63             "couldn't reseve backing store for concurrent marking bit map");
  64   assert(_virtual_space.committed_size() == brs.size(),
  65          "didn't reserve backing store for all of concurrent marking bit map?");
  66   _bm.set_map((uintptr_t*)_virtual_space.low());
  67   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  68          _bmWordSize, "inconsistency in bit map sizing");
  69   _bm.set_size(_bmWordSize >> _shifter);
  70 }
  71 
  72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  73                                                HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = (HeapWord*)align_size_up((intptr_t)addr,
  76                                   HeapWordSize << _shifter);
  77   size_t addrOffset = heapWordToOffset(addr);
  78   if (limit == NULL) {
  79     limit = _bmStartWord + _bmWordSize;
  80   }
  81   size_t limitOffset = heapWordToOffset(limit);
  82   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  83   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  84   assert(nextAddr >= addr, "get_next_one postcondition");
  85   assert(nextAddr == limit || isMarked(nextAddr),
  86          "get_next_one postcondition");
  87   return nextAddr;
  88 }
  89 
  90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  91                                                  HeapWord* limit) const {
  92   size_t addrOffset = heapWordToOffset(addr);
  93   if (limit == NULL) {
  94     limit = _bmStartWord + _bmWordSize;
  95   }
  96   size_t limitOffset = heapWordToOffset(limit);
  97   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  98   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  99   assert(nextAddr >= addr, "get_next_one postcondition");
 100   assert(nextAddr == limit || !isMarked(nextAddr),
 101          "get_next_one postcondition");
 102   return nextAddr;
 103 }
 104 
 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 106   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 107   return (int) (diff >> _shifter);
 108 }
 109 
 110 #ifndef PRODUCT
 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
 112   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 113   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 114          "size inconsistency");
 115   return _bmStartWord == (HeapWord*)(rs.base()) &&
 116          _bmWordSize  == rs.size()>>LogHeapWordSize;
 117 }
 118 #endif
 119 
 120 void CMBitMap::clearAll() {
 121   _bm.clear();
 122   return;
 123 }
 124 
 125 void CMBitMap::markRange(MemRegion mr) {
 126   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 127   assert(!mr.is_empty(), "unexpected empty region");
 128   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 129           ((HeapWord *) mr.end())),
 130          "markRange memory region end is not card aligned");
 131   // convert address range into offset range
 132   _bm.at_put_range(heapWordToOffset(mr.start()),
 133                    heapWordToOffset(mr.end()), true);
 134 }
 135 
 136 void CMBitMap::clearRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   // convert address range into offset range
 140   _bm.at_put_range(heapWordToOffset(mr.start()),
 141                    heapWordToOffset(mr.end()), false);
 142 }
 143 
 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 145                                             HeapWord* end_addr) {
 146   HeapWord* start = getNextMarkedWordAddress(addr);
 147   start = MIN2(start, end_addr);
 148   HeapWord* end   = getNextUnmarkedWordAddress(start);
 149   end = MIN2(end, end_addr);
 150   assert(start <= end, "Consistency check");
 151   MemRegion mr(start, end);
 152   if (!mr.is_empty()) {
 153     clearRange(mr);
 154   }
 155   return mr;
 156 }
 157 
 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 159   _base(NULL), _cm(cm)
 160 #ifdef ASSERT
 161   , _drain_in_progress(false)
 162   , _drain_in_progress_yields(false)
 163 #endif
 164 {}
 165 
 166 void CMMarkStack::allocate(size_t size) {
 167   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 168   if (_base == NULL) {
 169     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 170   }
 171   _index = 0;
 172   _capacity = (jint) size;
 173   _saved_index = -1;
 174   NOT_PRODUCT(_max_depth = 0);
 175 }
 176 
 177 CMMarkStack::~CMMarkStack() {
 178   if (_base != NULL) {
 179     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 180   }
 181 }
 182 
 183 void CMMarkStack::par_push(oop ptr) {
 184   while (true) {
 185     if (isFull()) {
 186       _overflow = true;
 187       return;
 188     }
 189     // Otherwise...
 190     jint index = _index;
 191     jint next_index = index+1;
 192     jint res = Atomic::cmpxchg(next_index, &_index, index);
 193     if (res == index) {
 194       _base[index] = ptr;
 195       // Note that we don't maintain this atomically.  We could, but it
 196       // doesn't seem necessary.
 197       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 198       return;
 199     }
 200     // Otherwise, we need to try again.
 201   }
 202 }
 203 
 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 205   while (true) {
 206     if (isFull()) {
 207       _overflow = true;
 208       return;
 209     }
 210     // Otherwise...
 211     jint index = _index;
 212     jint next_index = index + n;
 213     if (next_index > _capacity) {
 214       _overflow = true;
 215       return;
 216     }
 217     jint res = Atomic::cmpxchg(next_index, &_index, index);
 218     if (res == index) {
 219       for (int i = 0; i < n; i++) {
 220         int ind = index + i;
 221         assert(ind < _capacity, "By overflow test above.");
 222         _base[ind] = ptr_arr[i];
 223       }
 224       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 225       return;
 226     }
 227     // Otherwise, we need to try again.
 228   }
 229 }
 230 
 231 
 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234   jint start = _index;
 235   jint next_index = start + n;
 236   if (next_index > _capacity) {
 237     _overflow = true;
 238     return;
 239   }
 240   // Otherwise.
 241   _index = next_index;
 242   for (int i = 0; i < n; i++) {
 243     int ind = start + i;
 244     assert(ind < _capacity, "By overflow test above.");
 245     _base[ind] = ptr_arr[i];
 246   }
 247 }
 248 
 249 
 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint index = _index;
 253   if (index == 0) {
 254     *n = 0;
 255     return false;
 256   } else {
 257     int k = MIN2(max, index);
 258     jint new_ind = index - k;
 259     for (int j = 0; j < k; j++) {
 260       ptr_arr[j] = _base[new_ind + j];
 261     }
 262     _index = new_ind;
 263     *n = k;
 264     return true;
 265   }
 266 }
 267 
 268 template<class OopClosureClass>
 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 270   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 271          || SafepointSynchronize::is_at_safepoint(),
 272          "Drain recursion must be yield-safe.");
 273   bool res = true;
 274   debug_only(_drain_in_progress = true);
 275   debug_only(_drain_in_progress_yields = yield_after);
 276   while (!isEmpty()) {
 277     oop newOop = pop();
 278     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 279     assert(newOop->is_oop(), "Expected an oop");
 280     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 281            "only grey objects on this stack");
 282     newOop->oop_iterate(cl);
 283     if (yield_after && _cm->do_yield_check()) {
 284       res = false;
 285       break;
 286     }
 287   }
 288   debug_only(_drain_in_progress = false);
 289   return res;
 290 }
 291 
 292 void CMMarkStack::note_start_of_gc() {
 293   assert(_saved_index == -1,
 294          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 295   _saved_index = _index;
 296 }
 297 
 298 void CMMarkStack::note_end_of_gc() {
 299   // This is intentionally a guarantee, instead of an assert. If we
 300   // accidentally add something to the mark stack during GC, it
 301   // will be a correctness issue so it's better if we crash. we'll
 302   // only check this once per GC anyway, so it won't be a performance
 303   // issue in any way.
 304   guarantee(_saved_index == _index,
 305             err_msg("saved index: %d index: %d", _saved_index, _index));
 306   _saved_index = -1;
 307 }
 308 
 309 void CMMarkStack::oops_do(OopClosure* f) {
 310   assert(_saved_index == _index,
 311          err_msg("saved index: %d index: %d", _saved_index, _index));
 312   for (int i = 0; i < _index; i += 1) {
 313     f->do_oop(&_base[i]);
 314   }
 315 }
 316 
 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
 318   return _g1h->is_obj_ill(obj);
 319 }
 320 
 321 CMRootRegions::CMRootRegions() :
 322   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 323   _should_abort(false),  _next_survivor(NULL) { }
 324 
 325 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 326   _young_list = g1h->young_list();
 327   _cm = cm;
 328 }
 329 
 330 void CMRootRegions::prepare_for_scan() {
 331   assert(!scan_in_progress(), "pre-condition");
 332 
 333   // Currently, only survivors can be root regions.
 334   assert(_next_survivor == NULL, "pre-condition");
 335   _next_survivor = _young_list->first_survivor_region();
 336   _scan_in_progress = (_next_survivor != NULL);
 337   _should_abort = false;
 338 }
 339 
 340 HeapRegion* CMRootRegions::claim_next() {
 341   if (_should_abort) {
 342     // If someone has set the should_abort flag, we return NULL to
 343     // force the caller to bail out of their loop.
 344     return NULL;
 345   }
 346 
 347   // Currently, only survivors can be root regions.
 348   HeapRegion* res = _next_survivor;
 349   if (res != NULL) {
 350     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 351     // Read it again in case it changed while we were waiting for the lock.
 352     res = _next_survivor;
 353     if (res != NULL) {
 354       if (res == _young_list->last_survivor_region()) {
 355         // We just claimed the last survivor so store NULL to indicate
 356         // that we're done.
 357         _next_survivor = NULL;
 358       } else {
 359         _next_survivor = res->get_next_young_region();
 360       }
 361     } else {
 362       // Someone else claimed the last survivor while we were trying
 363       // to take the lock so nothing else to do.
 364     }
 365   }
 366   assert(res == NULL || res->is_survivor(), "post-condition");
 367 
 368   return res;
 369 }
 370 
 371 void CMRootRegions::scan_finished() {
 372   assert(scan_in_progress(), "pre-condition");
 373 
 374   // Currently, only survivors can be root regions.
 375   if (!_should_abort) {
 376     assert(_next_survivor == NULL, "we should have claimed all survivors");
 377   }
 378   _next_survivor = NULL;
 379 
 380   {
 381     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 382     _scan_in_progress = false;
 383     RootRegionScan_lock->notify_all();
 384   }
 385 }
 386 
 387 bool CMRootRegions::wait_until_scan_finished() {
 388   if (!scan_in_progress()) return false;
 389 
 390   {
 391     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 392     while (scan_in_progress()) {
 393       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 394     }
 395   }
 396   return true;
 397 }
 398 
 399 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 400 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 401 #endif // _MSC_VER
 402 
 403 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 404   return MAX2((n_par_threads + 2) / 4, 1U);
 405 }
 406 
 407 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 408   _markBitMap1(rs, MinObjAlignment - 1),
 409   _markBitMap2(rs, MinObjAlignment - 1),
 410 
 411   _parallel_marking_threads(0),
 412   _max_parallel_marking_threads(0),
 413   _sleep_factor(0.0),
 414   _marking_task_overhead(1.0),
 415   _cleanup_sleep_factor(0.0),
 416   _cleanup_task_overhead(1.0),
 417   _cleanup_list("Cleanup List"),
 418   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 419   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 420            CardTableModRefBS::card_shift,
 421            false /* in_resource_area*/),
 422 
 423   _prevMarkBitMap(&_markBitMap1),
 424   _nextMarkBitMap(&_markBitMap2),
 425 
 426   _markStack(this),
 427   // _finger set in set_non_marking_state
 428 
 429   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 430   // _active_tasks set in set_non_marking_state
 431   // _tasks set inside the constructor
 432   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 433   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 434 
 435   _has_overflown(false),
 436   _concurrent(false),
 437   _has_aborted(false),
 438   _restart_for_overflow(false),
 439   _concurrent_marking_in_progress(false),
 440 
 441   // _verbose_level set below
 442 
 443   _init_times(),
 444   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 445   _cleanup_times(),
 446   _total_counting_time(0.0),
 447   _total_rs_scrub_time(0.0),
 448 
 449   _parallel_workers(NULL),
 450 
 451   _count_card_bitmaps(NULL),
 452   _count_marked_bytes(NULL) {
 453   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 454   if (verbose_level < no_verbose) {
 455     verbose_level = no_verbose;
 456   }
 457   if (verbose_level > high_verbose) {
 458     verbose_level = high_verbose;
 459   }
 460   _verbose_level = verbose_level;
 461 
 462   if (verbose_low()) {
 463     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 464                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 465   }
 466 
 467   _markStack.allocate(MarkStackSize);
 468 
 469   // Create & start a ConcurrentMark thread.
 470   _cmThread = new ConcurrentMarkThread(this);
 471   assert(cmThread() != NULL, "CM Thread should have been created");
 472   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 473 
 474   _g1h = G1CollectedHeap::heap();
 475   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 476   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 477   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 478 
 479   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 480   satb_qs.set_buffer_size(G1SATBBufferSize);
 481 
 482   _root_regions.init(_g1h, this);
 483 
 484   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 485   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 486 
 487   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 488   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 489 
 490   BitMap::idx_t card_bm_size = _card_bm.size();
 491 
 492   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 493   _active_tasks = _max_task_num;
 494   for (int i = 0; i < (int) _max_task_num; ++i) {
 495     CMTaskQueue* task_queue = new CMTaskQueue();
 496     task_queue->initialize();
 497     _task_queues->register_queue(i, task_queue);
 498 
 499     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 500     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 501 
 502     _tasks[i] = new CMTask(i, this,
 503                            _count_marked_bytes[i],
 504                            &_count_card_bitmaps[i],
 505                            task_queue, _task_queues);
 506 
 507     _accum_task_vtime[i] = 0.0;
 508   }
 509 
 510   // Calculate the card number for the bottom of the heap. Used
 511   // in biasing indexes into the accounting card bitmaps.
 512   _heap_bottom_card_num =
 513     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 514                                 CardTableModRefBS::card_shift);
 515 
 516   // Clear all the liveness counting data
 517   clear_all_count_data();
 518 
 519   if (ConcGCThreads > ParallelGCThreads) {
 520     vm_exit_during_initialization("Can't have more ConcGCThreads "
 521                                   "than ParallelGCThreads.");
 522   }
 523   if (ParallelGCThreads == 0) {
 524     // if we are not running with any parallel GC threads we will not
 525     // spawn any marking threads either
 526     _parallel_marking_threads =       0;
 527     _max_parallel_marking_threads =   0;
 528     _sleep_factor             =     0.0;
 529     _marking_task_overhead    =     1.0;
 530   } else {
 531     if (ConcGCThreads > 0) {
 532       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 533       // if both are set
 534 
 535       _parallel_marking_threads = (uint) ConcGCThreads;
 536       _max_parallel_marking_threads = _parallel_marking_threads;
 537       _sleep_factor             = 0.0;
 538       _marking_task_overhead    = 1.0;
 539     } else if (G1MarkingOverheadPercent > 0) {
 540       // we will calculate the number of parallel marking threads
 541       // based on a target overhead with respect to the soft real-time
 542       // goal
 543 
 544       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 545       double overall_cm_overhead =
 546         (double) MaxGCPauseMillis * marking_overhead /
 547         (double) GCPauseIntervalMillis;
 548       double cpu_ratio = 1.0 / (double) os::processor_count();
 549       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 550       double marking_task_overhead =
 551         overall_cm_overhead / marking_thread_num *
 552                                                 (double) os::processor_count();
 553       double sleep_factor =
 554                          (1.0 - marking_task_overhead) / marking_task_overhead;
 555 
 556       _parallel_marking_threads = (uint) marking_thread_num;
 557       _max_parallel_marking_threads = _parallel_marking_threads;
 558       _sleep_factor             = sleep_factor;
 559       _marking_task_overhead    = marking_task_overhead;
 560     } else {
 561       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 562       _max_parallel_marking_threads = _parallel_marking_threads;
 563       _sleep_factor             = 0.0;
 564       _marking_task_overhead    = 1.0;
 565     }
 566 
 567     if (parallel_marking_threads() > 1) {
 568       _cleanup_task_overhead = 1.0;
 569     } else {
 570       _cleanup_task_overhead = marking_task_overhead();
 571     }
 572     _cleanup_sleep_factor =
 573                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 574 
 575 #if 0
 576     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 577     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 578     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 579     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 580     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 581 #endif
 582 
 583     guarantee(parallel_marking_threads() > 0, "peace of mind");
 584     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 585          _max_parallel_marking_threads, false, true);
 586     if (_parallel_workers == NULL) {
 587       vm_exit_during_initialization("Failed necessary allocation.");
 588     } else {
 589       _parallel_workers->initialize_workers();
 590     }
 591   }
 592 
 593   // so that the call below can read a sensible value
 594   _heap_start = (HeapWord*) rs.base();
 595   set_non_marking_state();
 596 }
 597 
 598 void ConcurrentMark::update_g1_committed(bool force) {
 599   // If concurrent marking is not in progress, then we do not need to
 600   // update _heap_end.
 601   if (!concurrent_marking_in_progress() && !force) return;
 602 
 603   MemRegion committed = _g1h->g1_committed();
 604   assert(committed.start() == _heap_start, "start shouldn't change");
 605   HeapWord* new_end = committed.end();
 606   if (new_end > _heap_end) {
 607     // The heap has been expanded.
 608 
 609     _heap_end = new_end;
 610   }
 611   // Notice that the heap can also shrink. However, this only happens
 612   // during a Full GC (at least currently) and the entire marking
 613   // phase will bail out and the task will not be restarted. So, let's
 614   // do nothing.
 615 }
 616 
 617 void ConcurrentMark::reset() {
 618   // Starting values for these two. This should be called in a STW
 619   // phase. CM will be notified of any future g1_committed expansions
 620   // will be at the end of evacuation pauses, when tasks are
 621   // inactive.
 622   MemRegion committed = _g1h->g1_committed();
 623   _heap_start = committed.start();
 624   _heap_end   = committed.end();
 625 
 626   // Separated the asserts so that we know which one fires.
 627   assert(_heap_start != NULL, "heap bounds should look ok");
 628   assert(_heap_end != NULL, "heap bounds should look ok");
 629   assert(_heap_start < _heap_end, "heap bounds should look ok");
 630 
 631   // reset all the marking data structures and any necessary flags
 632   clear_marking_state();
 633 
 634   if (verbose_low()) {
 635     gclog_or_tty->print_cr("[global] resetting");
 636   }
 637 
 638   // We do reset all of them, since different phases will use
 639   // different number of active threads. So, it's easiest to have all
 640   // of them ready.
 641   for (int i = 0; i < (int) _max_task_num; ++i) {
 642     _tasks[i]->reset(_nextMarkBitMap);
 643   }
 644 
 645   // we need this to make sure that the flag is on during the evac
 646   // pause with initial mark piggy-backed
 647   set_concurrent_marking_in_progress();
 648 }
 649 
 650 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 651   assert(active_tasks <= _max_task_num, "we should not have more");
 652 
 653   _active_tasks = active_tasks;
 654   // Need to update the three data structures below according to the
 655   // number of active threads for this phase.
 656   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 657   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659 
 660   _concurrent = concurrent;
 661   // We propagate this to all tasks, not just the active ones.
 662   for (int i = 0; i < (int) _max_task_num; ++i)
 663     _tasks[i]->set_concurrent(concurrent);
 664 
 665   if (concurrent) {
 666     set_concurrent_marking_in_progress();
 667   } else {
 668     // We currently assume that the concurrent flag has been set to
 669     // false before we start remark. At this point we should also be
 670     // in a STW phase.
 671     assert(!concurrent_marking_in_progress(), "invariant");
 672     assert(_finger == _heap_end, "only way to get here");
 673     update_g1_committed(true);
 674   }
 675 }
 676 
 677 void ConcurrentMark::set_non_marking_state() {
 678   // We set the global marking state to some default values when we're
 679   // not doing marking.
 680   clear_marking_state();
 681   _active_tasks = 0;
 682   clear_concurrent_marking_in_progress();
 683 }
 684 
 685 ConcurrentMark::~ConcurrentMark() {
 686   // The ConcurrentMark instance is never freed.
 687   ShouldNotReachHere();
 688 }
 689 
 690 void ConcurrentMark::clearNextBitmap() {
 691   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 692   G1CollectorPolicy* g1p = g1h->g1_policy();
 693 
 694   // Make sure that the concurrent mark thread looks to still be in
 695   // the current cycle.
 696   guarantee(cmThread()->during_cycle(), "invariant");
 697 
 698   // We are finishing up the current cycle by clearing the next
 699   // marking bitmap and getting it ready for the next cycle. During
 700   // this time no other cycle can start. So, let's make sure that this
 701   // is the case.
 702   guarantee(!g1h->mark_in_progress(), "invariant");
 703 
 704   // clear the mark bitmap (no grey objects to start with).
 705   // We need to do this in chunks and offer to yield in between
 706   // each chunk.
 707   HeapWord* start  = _nextMarkBitMap->startWord();
 708   HeapWord* end    = _nextMarkBitMap->endWord();
 709   HeapWord* cur    = start;
 710   size_t chunkSize = M;
 711   while (cur < end) {
 712     HeapWord* next = cur + chunkSize;
 713     if (next > end) {
 714       next = end;
 715     }
 716     MemRegion mr(cur,next);
 717     _nextMarkBitMap->clearRange(mr);
 718     cur = next;
 719     do_yield_check();
 720 
 721     // Repeat the asserts from above. We'll do them as asserts here to
 722     // minimize their overhead on the product. However, we'll have
 723     // them as guarantees at the beginning / end of the bitmap
 724     // clearing to get some checking in the product.
 725     assert(cmThread()->during_cycle(), "invariant");
 726     assert(!g1h->mark_in_progress(), "invariant");
 727   }
 728 
 729   // Clear the liveness counting data
 730   clear_all_count_data();
 731 
 732   // Repeat the asserts from above.
 733   guarantee(cmThread()->during_cycle(), "invariant");
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 }
 736 
 737 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 738 public:
 739   bool doHeapRegion(HeapRegion* r) {
 740     if (!r->continuesHumongous()) {
 741       r->note_start_of_marking();
 742     }
 743     return false;
 744   }
 745 };
 746 
 747 void ConcurrentMark::checkpointRootsInitialPre() {
 748   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 749   G1CollectorPolicy* g1p = g1h->g1_policy();
 750 
 751   _has_aborted = false;
 752 
 753 #ifndef PRODUCT
 754   if (G1PrintReachableAtInitialMark) {
 755     print_reachable("at-cycle-start",
 756                     VerifyOption_G1UsePrevMarking, true /* all */);
 757   }
 758 #endif
 759 
 760   // Initialise marking structures. This has to be done in a STW phase.
 761   reset();
 762 
 763   // For each region note start of marking.
 764   NoteStartOfMarkHRClosure startcl;
 765   g1h->heap_region_iterate(&startcl);
 766 }
 767 
 768 
 769 void ConcurrentMark::checkpointRootsInitialPost() {
 770   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771 
 772   // If we force an overflow during remark, the remark operation will
 773   // actually abort and we'll restart concurrent marking. If we always
 774   // force an oveflow during remark we'll never actually complete the
 775   // marking phase. So, we initilize this here, at the start of the
 776   // cycle, so that at the remaining overflow number will decrease at
 777   // every remark and we'll eventually not need to cause one.
 778   force_overflow_stw()->init();
 779 
 780   // Start Concurrent Marking weak-reference discovery.
 781   ReferenceProcessor* rp = g1h->ref_processor_cm();
 782   // enable ("weak") refs discovery
 783   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 784   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 785 
 786   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 787   // This is the start of  the marking cycle, we're expected all
 788   // threads to have SATB queues with active set to false.
 789   satb_mq_set.set_active_all_threads(true, /* new active value */
 790                                      false /* expected_active */);
 791 
 792   _root_regions.prepare_for_scan();
 793 
 794   // update_g1_committed() will be called at the end of an evac pause
 795   // when marking is on. So, it's also called at the end of the
 796   // initial-mark pause to update the heap end, if the heap expands
 797   // during it. No need to call it here.
 798 }
 799 
 800 /*
 801  * Notice that in the next two methods, we actually leave the STS
 802  * during the barrier sync and join it immediately afterwards. If we
 803  * do not do this, the following deadlock can occur: one thread could
 804  * be in the barrier sync code, waiting for the other thread to also
 805  * sync up, whereas another one could be trying to yield, while also
 806  * waiting for the other threads to sync up too.
 807  *
 808  * Note, however, that this code is also used during remark and in
 809  * this case we should not attempt to leave / enter the STS, otherwise
 810  * we'll either hit an asseert (debug / fastdebug) or deadlock
 811  * (product). So we should only leave / enter the STS if we are
 812  * operating concurrently.
 813  *
 814  * Because the thread that does the sync barrier has left the STS, it
 815  * is possible to be suspended for a Full GC or an evacuation pause
 816  * could occur. This is actually safe, since the entering the sync
 817  * barrier is one of the last things do_marking_step() does, and it
 818  * doesn't manipulate any data structures afterwards.
 819  */
 820 
 821 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 822   if (verbose_low()) {
 823     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 824   }
 825 
 826   if (concurrent()) {
 827     ConcurrentGCThread::stsLeave();
 828   }
 829   _first_overflow_barrier_sync.enter();
 830   if (concurrent()) {
 831     ConcurrentGCThread::stsJoin();
 832   }
 833   // at this point everyone should have synced up and not be doing any
 834   // more work
 835 
 836   if (verbose_low()) {
 837     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 838   }
 839 
 840   // let task 0 do this
 841   if (task_num == 0) {
 842     // task 0 is responsible for clearing the global data structures
 843     // We should be here because of an overflow. During STW we should
 844     // not clear the overflow flag since we rely on it being true when
 845     // we exit this method to abort the pause and restart concurent
 846     // marking.
 847     clear_marking_state(concurrent() /* clear_overflow */);
 848     force_overflow()->update();
 849 
 850     if (G1Log::fine()) {
 851       gclog_or_tty->date_stamp(PrintGCDateStamps);
 852       gclog_or_tty->stamp(PrintGCTimeStamps);
 853       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 854     }
 855   }
 856 
 857   // after this, each task should reset its own data structures then
 858   // then go into the second barrier
 859 }
 860 
 861 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 862   if (verbose_low()) {
 863     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 864   }
 865 
 866   if (concurrent()) {
 867     ConcurrentGCThread::stsLeave();
 868   }
 869   _second_overflow_barrier_sync.enter();
 870   if (concurrent()) {
 871     ConcurrentGCThread::stsJoin();
 872   }
 873   // at this point everything should be re-initialised and ready to go
 874 
 875   if (verbose_low()) {
 876     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 877   }
 878 }
 879 
 880 #ifndef PRODUCT
 881 void ForceOverflowSettings::init() {
 882   _num_remaining = G1ConcMarkForceOverflow;
 883   _force = false;
 884   update();
 885 }
 886 
 887 void ForceOverflowSettings::update() {
 888   if (_num_remaining > 0) {
 889     _num_remaining -= 1;
 890     _force = true;
 891   } else {
 892     _force = false;
 893   }
 894 }
 895 
 896 bool ForceOverflowSettings::should_force() {
 897   if (_force) {
 898     _force = false;
 899     return true;
 900   } else {
 901     return false;
 902   }
 903 }
 904 #endif // !PRODUCT
 905 
 906 class CMConcurrentMarkingTask: public AbstractGangTask {
 907 private:
 908   ConcurrentMark*       _cm;
 909   ConcurrentMarkThread* _cmt;
 910 
 911 public:
 912   void work(uint worker_id) {
 913     assert(Thread::current()->is_ConcurrentGC_thread(),
 914            "this should only be done by a conc GC thread");
 915     ResourceMark rm;
 916 
 917     double start_vtime = os::elapsedVTime();
 918 
 919     ConcurrentGCThread::stsJoin();
 920 
 921     assert(worker_id < _cm->active_tasks(), "invariant");
 922     CMTask* the_task = _cm->task(worker_id);
 923     the_task->record_start_time();
 924     if (!_cm->has_aborted()) {
 925       do {
 926         double start_vtime_sec = os::elapsedVTime();
 927         double start_time_sec = os::elapsedTime();
 928         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 929 
 930         the_task->do_marking_step(mark_step_duration_ms,
 931                                   true /* do_stealing    */,
 932                                   true /* do_termination */);
 933 
 934         double end_time_sec = os::elapsedTime();
 935         double end_vtime_sec = os::elapsedVTime();
 936         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 937         double elapsed_time_sec = end_time_sec - start_time_sec;
 938         _cm->clear_has_overflown();
 939 
 940         bool ret = _cm->do_yield_check(worker_id);
 941 
 942         jlong sleep_time_ms;
 943         if (!_cm->has_aborted() && the_task->has_aborted()) {
 944           sleep_time_ms =
 945             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 946           ConcurrentGCThread::stsLeave();
 947           os::sleep(Thread::current(), sleep_time_ms, false);
 948           ConcurrentGCThread::stsJoin();
 949         }
 950         double end_time2_sec = os::elapsedTime();
 951         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 952 
 953 #if 0
 954           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 955                                  "overhead %1.4lf",
 956                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 957                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 958           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 959                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 960 #endif
 961       } while (!_cm->has_aborted() && the_task->has_aborted());
 962     }
 963     the_task->record_end_time();
 964     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 965 
 966     ConcurrentGCThread::stsLeave();
 967 
 968     double end_vtime = os::elapsedVTime();
 969     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 970   }
 971 
 972   CMConcurrentMarkingTask(ConcurrentMark* cm,
 973                           ConcurrentMarkThread* cmt) :
 974       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 975 
 976   ~CMConcurrentMarkingTask() { }
 977 };
 978 
 979 // Calculates the number of active workers for a concurrent
 980 // phase.
 981 uint ConcurrentMark::calc_parallel_marking_threads() {
 982   if (G1CollectedHeap::use_parallel_gc_threads()) {
 983     uint n_conc_workers = 0;
 984     if (!UseDynamicNumberOfGCThreads ||
 985         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 986          !ForceDynamicNumberOfGCThreads)) {
 987       n_conc_workers = max_parallel_marking_threads();
 988     } else {
 989       n_conc_workers =
 990         AdaptiveSizePolicy::calc_default_active_workers(
 991                                      max_parallel_marking_threads(),
 992                                      1, /* Minimum workers */
 993                                      parallel_marking_threads(),
 994                                      Threads::number_of_non_daemon_threads());
 995       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 996       // that scaling has already gone into "_max_parallel_marking_threads".
 997     }
 998     assert(n_conc_workers > 0, "Always need at least 1");
 999     return n_conc_workers;
1000   }
1001   // If we are not running with any parallel GC threads we will not
1002   // have spawned any marking threads either. Hence the number of
1003   // concurrent workers should be 0.
1004   return 0;
1005 }
1006 
1007 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1008   // Currently, only survivors can be root regions.
1009   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1010   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1011 
1012   const uintx interval = PrefetchScanIntervalInBytes;
1013   HeapWord* curr = hr->bottom();
1014   const HeapWord* end = hr->top();
1015   while (curr < end) {
1016     Prefetch::read(curr, interval);
1017     oop obj = oop(curr);
1018     int size = obj->oop_iterate(&cl);
1019     assert(size == obj->size(), "sanity");
1020     curr += size;
1021   }
1022 }
1023 
1024 class CMRootRegionScanTask : public AbstractGangTask {
1025 private:
1026   ConcurrentMark* _cm;
1027 
1028 public:
1029   CMRootRegionScanTask(ConcurrentMark* cm) :
1030     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1031 
1032   void work(uint worker_id) {
1033     assert(Thread::current()->is_ConcurrentGC_thread(),
1034            "this should only be done by a conc GC thread");
1035 
1036     CMRootRegions* root_regions = _cm->root_regions();
1037     HeapRegion* hr = root_regions->claim_next();
1038     while (hr != NULL) {
1039       _cm->scanRootRegion(hr, worker_id);
1040       hr = root_regions->claim_next();
1041     }
1042   }
1043 };
1044 
1045 void ConcurrentMark::scanRootRegions() {
1046   // scan_in_progress() will have been set to true only if there was
1047   // at least one root region to scan. So, if it's false, we
1048   // should not attempt to do any further work.
1049   if (root_regions()->scan_in_progress()) {
1050     _parallel_marking_threads = calc_parallel_marking_threads();
1051     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1052            "Maximum number of marking threads exceeded");
1053     uint active_workers = MAX2(1U, parallel_marking_threads());
1054 
1055     CMRootRegionScanTask task(this);
1056     if (parallel_marking_threads() > 0) {
1057       _parallel_workers->set_active_workers((int) active_workers);
1058       _parallel_workers->run_task(&task);
1059     } else {
1060       task.work(0);
1061     }
1062 
1063     // It's possible that has_aborted() is true here without actually
1064     // aborting the survivor scan earlier. This is OK as it's
1065     // mainly used for sanity checking.
1066     root_regions()->scan_finished();
1067   }
1068 }
1069 
1070 void ConcurrentMark::markFromRoots() {
1071   // we might be tempted to assert that:
1072   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1073   //        "inconsistent argument?");
1074   // However that wouldn't be right, because it's possible that
1075   // a safepoint is indeed in progress as a younger generation
1076   // stop-the-world GC happens even as we mark in this generation.
1077 
1078   _restart_for_overflow = false;
1079   force_overflow_conc()->init();
1080 
1081   // _g1h has _n_par_threads
1082   _parallel_marking_threads = calc_parallel_marking_threads();
1083   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084     "Maximum number of marking threads exceeded");
1085 
1086   uint active_workers = MAX2(1U, parallel_marking_threads());
1087 
1088   // Parallel task terminator is set in "set_phase()"
1089   set_phase(active_workers, true /* concurrent */);
1090 
1091   CMConcurrentMarkingTask markingTask(this, cmThread());
1092   if (parallel_marking_threads() > 0) {
1093     _parallel_workers->set_active_workers((int)active_workers);
1094     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1095     // and the decisions on that MT processing is made elsewhere.
1096     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1097     _parallel_workers->run_task(&markingTask);
1098   } else {
1099     markingTask.work(0);
1100   }
1101   print_stats();
1102 }
1103 
1104 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1105   // world is stopped at this checkpoint
1106   assert(SafepointSynchronize::is_at_safepoint(),
1107          "world should be stopped");
1108 
1109   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1110 
1111   // If a full collection has happened, we shouldn't do this.
1112   if (has_aborted()) {
1113     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1114     return;
1115   }
1116 
1117   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1118 
1119   if (VerifyDuringGC) {
1120     HandleMark hm;  // handle scope
1121     gclog_or_tty->print(" VerifyDuringGC:(before)");
1122     Universe::heap()->prepare_for_verify();
1123     Universe::verify(/* silent      */ false,
1124                      /* option      */ VerifyOption_G1UsePrevMarking);
1125   }
1126 
1127   G1CollectorPolicy* g1p = g1h->g1_policy();
1128   g1p->record_concurrent_mark_remark_start();
1129 
1130   double start = os::elapsedTime();
1131 
1132   checkpointRootsFinalWork();
1133 
1134   double mark_work_end = os::elapsedTime();
1135 
1136   weakRefsWork(clear_all_soft_refs);
1137 
1138   if (has_overflown()) {
1139     // Oops.  We overflowed.  Restart concurrent marking.
1140     _restart_for_overflow = true;
1141     // Clear the flag. We do not need it any more.
1142     clear_has_overflown();
1143     if (G1TraceMarkStackOverflow) {
1144       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1145     }
1146   } else {
1147     // Aggregate the per-task counting data that we have accumulated
1148     // while marking.
1149     aggregate_count_data();
1150 
1151     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1152     // We're done with marking.
1153     // This is the end of  the marking cycle, we're expected all
1154     // threads to have SATB queues with active set to true.
1155     satb_mq_set.set_active_all_threads(false, /* new active value */
1156                                        true /* expected_active */);
1157 
1158     if (VerifyDuringGC) {
1159       HandleMark hm;  // handle scope
1160       gclog_or_tty->print(" VerifyDuringGC:(after)");
1161       Universe::heap()->prepare_for_verify();
1162       Universe::verify(/* silent      */ false,
1163                        /* option      */ VerifyOption_G1UseNextMarking);
1164     }
1165     assert(!restart_for_overflow(), "sanity");
1166   }
1167 
1168   // Reset the marking state if marking completed
1169   if (!restart_for_overflow()) {
1170     set_non_marking_state();
1171   }
1172 
1173 #if VERIFY_OBJS_PROCESSED
1174   _scan_obj_cl.objs_processed = 0;
1175   ThreadLocalObjQueue::objs_enqueued = 0;
1176 #endif
1177 
1178   // Statistics
1179   double now = os::elapsedTime();
1180   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1181   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1182   _remark_times.add((now - start) * 1000.0);
1183 
1184   g1p->record_concurrent_mark_remark_end();
1185 }
1186 
1187 // Base class of the closures that finalize and verify the
1188 // liveness counting data.
1189 class CMCountDataClosureBase: public HeapRegionClosure {
1190 protected:
1191   ConcurrentMark* _cm;
1192   BitMap* _region_bm;
1193   BitMap* _card_bm;
1194 
1195   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1196     assert(start_idx <= last_idx, "sanity");
1197 
1198     // Set the inclusive bit range [start_idx, last_idx].
1199     // For small ranges (up to 8 cards) use a simple loop; otherwise
1200     // use par_at_put_range.
1201     if ((last_idx - start_idx) < 8) {
1202       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1203         _card_bm->par_set_bit(i);
1204       }
1205     } else {
1206       assert(last_idx < _card_bm->size(), "sanity");
1207       // Note BitMap::par_at_put_range() is exclusive.
1208       BitMap::idx_t max_idx = MAX2(last_idx+1, _card_bm->size());
1209       _card_bm->par_at_put_range(start_idx, max_idx, true);
1210     }
1211   }
1212 
1213   // It takes a region that's not empty (i.e., it has at least one
1214   // live object in it and sets its corresponding bit on the region
1215   // bitmap to 1. If the region is "starts humongous" it will also set
1216   // to 1 the bits on the region bitmap that correspond to its
1217   // associated "continues humongous" regions.
1218   void set_bit_for_region(HeapRegion* hr) {
1219     assert(!hr->continuesHumongous(), "should have filtered those out");
1220 
1221     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1222     if (!hr->startsHumongous()) {
1223       // Normal (non-humongous) case: just set the bit.
1224       _region_bm->par_at_put(index, true);
1225     } else {
1226       // Starts humongous case: calculate how many regions are part of
1227       // this humongous region and then set the bit range.
1228       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1229       _region_bm->par_at_put_range(index, end_index, true);
1230     }
1231   }
1232 
1233 public:
1234   CMCountDataClosureBase(ConcurrentMark *cm,
1235                          BitMap* region_bm, BitMap* card_bm):
1236     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
1237 };
1238 
1239 // Closure that calculates the # live objects per region. Used
1240 // for verification purposes during the cleanup pause.
1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1242   CMBitMapRO* _bm;
1243   size_t _region_marked_bytes;
1244 
1245 public:
1246   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1247                          BitMap* region_bm, BitMap* card_bm) :
1248     CMCountDataClosureBase(cm, region_bm, card_bm),
1249     _bm(bm), _region_marked_bytes(0) { }
1250 
1251   bool doHeapRegion(HeapRegion* hr) {
1252 
1253     if (hr->continuesHumongous()) {
1254       // We will ignore these here and process them when their
1255       // associated "starts humongous" region is processed (see
1256       // set_bit_for_heap_region()). Note that we cannot rely on their
1257       // associated "starts humongous" region to have their bit set to
1258       // 1 since, due to the region chunking in the parallel region
1259       // iteration, a "continues humongous" region might be visited
1260       // before its associated "starts humongous".
1261       return false;
1262     }
1263 
1264     HeapWord* nextTop = hr->next_top_at_mark_start();
1265     HeapWord* start   = hr->bottom();
1266 
1267     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1268            err_msg("Preconditions not met - "
1269                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1270                    start, nextTop, hr->end()));
1271 
1272     // Find the first marked object at or after "start".
1273     start = _bm->getNextMarkedWordAddress(start, nextTop);
1274 
1275     size_t marked_bytes = 0;
1276 
1277     while (start < nextTop) {
1278       oop obj = oop(start);
1279       int obj_sz = obj->size();
1280       HeapWord* obj_last = start + obj_sz - 1;
1281 
1282       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1283       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
1284 
1285       // Set the bits in the card BM for this object (inclusive).
1286       set_card_bitmap_range(start_idx, last_idx);
1287 
1288       // Add the size of this object to the number of marked bytes.
1289       marked_bytes += (size_t)obj_sz * HeapWordSize;
1290 
1291       // Find the next marked object after this one.
1292       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1293     }
1294 
1295     // Mark the allocated-since-marking portion...
1296     HeapWord* top = hr->top();
1297     if (nextTop < top) {
1298       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1299       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1300 
1301       set_card_bitmap_range(start_idx, last_idx);
1302 
1303       // This definitely means the region has live objects.
1304       set_bit_for_region(hr);
1305     }
1306 
1307     // Update the live region bitmap.
1308     if (marked_bytes > 0) {
1309       set_bit_for_region(hr);
1310     }
1311 
1312     // Set the marked bytes for the current region so that
1313     // it can be queried by a calling verificiation routine
1314     _region_marked_bytes = marked_bytes;
1315 
1316     return false;
1317   }
1318 
1319   size_t region_marked_bytes() const { return _region_marked_bytes; }
1320 };
1321 
1322 // Heap region closure used for verifying the counting data
1323 // that was accumulated concurrently and aggregated during
1324 // the remark pause. This closure is applied to the heap
1325 // regions during the STW cleanup pause.
1326 
1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1328   ConcurrentMark* _cm;
1329   CalcLiveObjectsClosure _calc_cl;
1330   BitMap* _region_bm;   // Region BM to be verified
1331   BitMap* _card_bm;     // Card BM to be verified
1332   bool _verbose;        // verbose output?
1333 
1334   BitMap* _exp_region_bm; // Expected Region BM values
1335   BitMap* _exp_card_bm;   // Expected card BM values
1336 
1337   int _failures;
1338 
1339 public:
1340   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1341                                 BitMap* region_bm,
1342                                 BitMap* card_bm,
1343                                 BitMap* exp_region_bm,
1344                                 BitMap* exp_card_bm,
1345                                 bool verbose) :
1346     _cm(cm),
1347     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1348     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1349     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1350     _failures(0) { }
1351 
1352   int failures() const { return _failures; }
1353 
1354   bool doHeapRegion(HeapRegion* hr) {
1355     if (hr->continuesHumongous()) {
1356       // We will ignore these here and process them when their
1357       // associated "starts humongous" region is processed (see
1358       // set_bit_for_heap_region()). Note that we cannot rely on their
1359       // associated "starts humongous" region to have their bit set to
1360       // 1 since, due to the region chunking in the parallel region
1361       // iteration, a "continues humongous" region might be visited
1362       // before its associated "starts humongous".
1363       return false;
1364     }
1365 
1366     int failures = 0;
1367 
1368     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1369     // this region and set the corresponding bits in the expected region
1370     // and card bitmaps.
1371     bool res = _calc_cl.doHeapRegion(hr);
1372     assert(res == false, "should be continuing");
1373 
1374     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1375                     Mutex::_no_safepoint_check_flag);
1376 
1377     // Verify the marked bytes for this region.
1378     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1379     size_t act_marked_bytes = hr->next_marked_bytes();
1380 
1381     // We're not OK if expected marked bytes > actual marked bytes. It means
1382     // we have missed accounting some objects during the actual marking.
1383     if (exp_marked_bytes > act_marked_bytes) {
1384       if (_verbose) {
1385         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1386                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1387                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1388       }
1389       failures += 1;
1390     }
1391 
1392     // Verify the bit, for this region, in the actual and expected
1393     // (which was just calculated) region bit maps.
1394     // We're not OK if the bit in the calculated expected region
1395     // bitmap is set and the bit in the actual region bitmap is not.
1396     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1397 
1398     bool expected = _exp_region_bm->at(index);
1399     bool actual = _region_bm->at(index);
1400     if (expected && !actual) {
1401       if (_verbose) {
1402         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1403                                "expected: %s, actual: %s",
1404                                hr->hrs_index(),
1405                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1406       }
1407       failures += 1;
1408     }
1409 
1410     // Verify that the card bit maps for the cards spanned by the current
1411     // region match. We have an error if we have a set bit in the expected
1412     // bit map and the corresponding bit in the actual bitmap is not set.
1413 
1414     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1415     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1416 
1417     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1418       expected = _exp_card_bm->at(i);
1419       actual = _card_bm->at(i);
1420 
1421       if (expected && !actual) {
1422         if (_verbose) {
1423           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1424                                  "expected: %s, actual: %s",
1425                                  hr->hrs_index(), i,
1426                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1427         }
1428         failures += 1;
1429       }
1430     }
1431 
1432     if (failures > 0 && _verbose)  {
1433       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1434                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1435                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1436                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1437     }
1438 
1439     _failures += failures;
1440 
1441     // We could stop iteration over the heap when we
1442     // find the first violating region by returning true.
1443     return false;
1444   }
1445 };
1446 
1447 
1448 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1449 protected:
1450   G1CollectedHeap* _g1h;
1451   ConcurrentMark* _cm;
1452   BitMap* _actual_region_bm;
1453   BitMap* _actual_card_bm;
1454 
1455   uint    _n_workers;
1456 
1457   BitMap* _expected_region_bm;
1458   BitMap* _expected_card_bm;
1459 
1460   int  _failures;
1461   bool _verbose;
1462 
1463 public:
1464   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1465                             BitMap* region_bm, BitMap* card_bm,
1466                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1467     : AbstractGangTask("G1 verify final counting"),
1468       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1469       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1470       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1471       _failures(0), _verbose(false),
1472       _n_workers(0) {
1473     assert(VerifyDuringGC, "don't call this otherwise");
1474 
1475     // Use the value already set as the number of active threads
1476     // in the call to run_task().
1477     if (G1CollectedHeap::use_parallel_gc_threads()) {
1478       assert( _g1h->workers()->active_workers() > 0,
1479         "Should have been previously set");
1480       _n_workers = _g1h->workers()->active_workers();
1481     } else {
1482       _n_workers = 1;
1483     }
1484 
1485     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1486     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1487 
1488     _verbose = _cm->verbose_medium();
1489   }
1490 
1491   void work(uint worker_id) {
1492     assert(worker_id < _n_workers, "invariant");
1493 
1494     VerifyLiveObjectDataHRClosure verify_cl(_cm,
1495                                             _actual_region_bm, _actual_card_bm,
1496                                             _expected_region_bm,
1497                                             _expected_card_bm,
1498                                             _verbose);
1499 
1500     if (G1CollectedHeap::use_parallel_gc_threads()) {
1501       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1502                                             worker_id,
1503                                             _n_workers,
1504                                             HeapRegion::VerifyCountClaimValue);
1505     } else {
1506       _g1h->heap_region_iterate(&verify_cl);
1507     }
1508 
1509     Atomic::add(verify_cl.failures(), &_failures);
1510   }
1511 
1512   int failures() const { return _failures; }
1513 };
1514 
1515 // Closure that finalizes the liveness counting data.
1516 // Used during the cleanup pause.
1517 // Sets the bits corresponding to the interval [NTAMS, top]
1518 // (which contains the implicitly live objects) in the
1519 // card liveness bitmap. Also sets the bit for each region,
1520 // containing live data, in the region liveness bitmap.
1521 
1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1523  public:
1524   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1525                               BitMap* region_bm,
1526                               BitMap* card_bm) :
1527     CMCountDataClosureBase(cm, region_bm, card_bm) { }
1528 
1529   bool doHeapRegion(HeapRegion* hr) {
1530 
1531     if (hr->continuesHumongous()) {
1532       // We will ignore these here and process them when their
1533       // associated "starts humongous" region is processed (see
1534       // set_bit_for_heap_region()). Note that we cannot rely on their
1535       // associated "starts humongous" region to have their bit set to
1536       // 1 since, due to the region chunking in the parallel region
1537       // iteration, a "continues humongous" region might be visited
1538       // before its associated "starts humongous".
1539       return false;
1540     }
1541 
1542     HeapWord* ntams = hr->next_top_at_mark_start();
1543     HeapWord* top   = hr->top();
1544 
1545     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1546 
1547     // Mark the allocated-since-marking portion...
1548     if (ntams < top) {
1549       // This definitely means the region has live objects.
1550       set_bit_for_region(hr);
1551     }
1552 
1553     // Now set the bits for [ntams, top]
1554     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1555     // set_card_bitmap_range() expects the last_idx to be with
1556     // the range of the bit map (see assertion in set_card_bitmap_range()),
1557     // so limit it to that range with this application of MIN2.
1558     BitMap::idx_t last_idx = MIN2(_cm->card_bitmap_index_for(top),
1559                                   _card_bm->size()-1);
1560     if (start_idx < _card_bm->size()) {
1561     set_card_bitmap_range(start_idx, last_idx);
1562     } else {
1563       // To reach here start_idx must be beyond the end of
1564       // the bit map and last_idx must have been limited by
1565       // the MIN2().
1566       assert(start_idx == last_idx + 1,
1567         err_msg("Not beyond end start_idx " SIZE_FORMAT " last_idx "
1568                 SIZE_FORMAT, start_idx, last_idx));
1569     }
1570 
1571     // Set the bit for the region if it contains live data
1572     if (hr->next_marked_bytes() > 0) {
1573       set_bit_for_region(hr);
1574     }
1575 
1576     return false;
1577   }
1578 };
1579 
1580 class G1ParFinalCountTask: public AbstractGangTask {
1581 protected:
1582   G1CollectedHeap* _g1h;
1583   ConcurrentMark* _cm;
1584   BitMap* _actual_region_bm;
1585   BitMap* _actual_card_bm;
1586 
1587   uint    _n_workers;
1588 
1589 public:
1590   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1591     : AbstractGangTask("G1 final counting"),
1592       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1593       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1594       _n_workers(0) {
1595     // Use the value already set as the number of active threads
1596     // in the call to run_task().
1597     if (G1CollectedHeap::use_parallel_gc_threads()) {
1598       assert( _g1h->workers()->active_workers() > 0,
1599         "Should have been previously set");
1600       _n_workers = _g1h->workers()->active_workers();
1601     } else {
1602       _n_workers = 1;
1603     }
1604   }
1605 
1606   void work(uint worker_id) {
1607     assert(worker_id < _n_workers, "invariant");
1608 
1609     FinalCountDataUpdateClosure final_update_cl(_cm,
1610                                                 _actual_region_bm,
1611                                                 _actual_card_bm);
1612 
1613     if (G1CollectedHeap::use_parallel_gc_threads()) {
1614       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1615                                             worker_id,
1616                                             _n_workers,
1617                                             HeapRegion::FinalCountClaimValue);
1618     } else {
1619       _g1h->heap_region_iterate(&final_update_cl);
1620     }
1621   }
1622 };
1623 
1624 class G1ParNoteEndTask;
1625 
1626 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1627   G1CollectedHeap* _g1;
1628   int _worker_num;
1629   size_t _max_live_bytes;
1630   uint _regions_claimed;
1631   size_t _freed_bytes;
1632   FreeRegionList* _local_cleanup_list;
1633   OldRegionSet* _old_proxy_set;
1634   HumongousRegionSet* _humongous_proxy_set;
1635   HRRSCleanupTask* _hrrs_cleanup_task;
1636   double _claimed_region_time;
1637   double _max_region_time;
1638 
1639 public:
1640   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1641                              int worker_num,
1642                              FreeRegionList* local_cleanup_list,
1643                              OldRegionSet* old_proxy_set,
1644                              HumongousRegionSet* humongous_proxy_set,
1645                              HRRSCleanupTask* hrrs_cleanup_task) :
1646     _g1(g1), _worker_num(worker_num),
1647     _max_live_bytes(0), _regions_claimed(0),
1648     _freed_bytes(0),
1649     _claimed_region_time(0.0), _max_region_time(0.0),
1650     _local_cleanup_list(local_cleanup_list),
1651     _old_proxy_set(old_proxy_set),
1652     _humongous_proxy_set(humongous_proxy_set),
1653     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1654 
1655   size_t freed_bytes() { return _freed_bytes; }
1656 
1657   bool doHeapRegion(HeapRegion *hr) {
1658     if (hr->continuesHumongous()) {
1659       return false;
1660     }
1661     // We use a claim value of zero here because all regions
1662     // were claimed with value 1 in the FinalCount task.
1663     _g1->reset_gc_time_stamps(hr);
1664     double start = os::elapsedTime();
1665     _regions_claimed++;
1666     hr->note_end_of_marking();
1667     _max_live_bytes += hr->max_live_bytes();
1668     _g1->free_region_if_empty(hr,
1669                               &_freed_bytes,
1670                               _local_cleanup_list,
1671                               _old_proxy_set,
1672                               _humongous_proxy_set,
1673                               _hrrs_cleanup_task,
1674                               true /* par */);
1675     double region_time = (os::elapsedTime() - start);
1676     _claimed_region_time += region_time;
1677     if (region_time > _max_region_time) {
1678       _max_region_time = region_time;
1679     }
1680     return false;
1681   }
1682 
1683   size_t max_live_bytes() { return _max_live_bytes; }
1684   uint regions_claimed() { return _regions_claimed; }
1685   double claimed_region_time_sec() { return _claimed_region_time; }
1686   double max_region_time_sec() { return _max_region_time; }
1687 };
1688 
1689 class G1ParNoteEndTask: public AbstractGangTask {
1690   friend class G1NoteEndOfConcMarkClosure;
1691 
1692 protected:
1693   G1CollectedHeap* _g1h;
1694   size_t _max_live_bytes;
1695   size_t _freed_bytes;
1696   FreeRegionList* _cleanup_list;
1697 
1698 public:
1699   G1ParNoteEndTask(G1CollectedHeap* g1h,
1700                    FreeRegionList* cleanup_list) :
1701     AbstractGangTask("G1 note end"), _g1h(g1h),
1702     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1703 
1704   void work(uint worker_id) {
1705     double start = os::elapsedTime();
1706     FreeRegionList local_cleanup_list("Local Cleanup List");
1707     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1708     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1709     HRRSCleanupTask hrrs_cleanup_task;
1710     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1711                                            &old_proxy_set,
1712                                            &humongous_proxy_set,
1713                                            &hrrs_cleanup_task);
1714     if (G1CollectedHeap::use_parallel_gc_threads()) {
1715       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1716                                             _g1h->workers()->active_workers(),
1717                                             HeapRegion::NoteEndClaimValue);
1718     } else {
1719       _g1h->heap_region_iterate(&g1_note_end);
1720     }
1721     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1722 
1723     // Now update the lists
1724     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1725                                             NULL /* free_list */,
1726                                             &old_proxy_set,
1727                                             &humongous_proxy_set,
1728                                             true /* par */);
1729     {
1730       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1731       _max_live_bytes += g1_note_end.max_live_bytes();
1732       _freed_bytes += g1_note_end.freed_bytes();
1733 
1734       // If we iterate over the global cleanup list at the end of
1735       // cleanup to do this printing we will not guarantee to only
1736       // generate output for the newly-reclaimed regions (the list
1737       // might not be empty at the beginning of cleanup; we might
1738       // still be working on its previous contents). So we do the
1739       // printing here, before we append the new regions to the global
1740       // cleanup list.
1741 
1742       G1HRPrinter* hr_printer = _g1h->hr_printer();
1743       if (hr_printer->is_active()) {
1744         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1745         while (iter.more_available()) {
1746           HeapRegion* hr = iter.get_next();
1747           hr_printer->cleanup(hr);
1748         }
1749       }
1750 
1751       _cleanup_list->add_as_tail(&local_cleanup_list);
1752       assert(local_cleanup_list.is_empty(), "post-condition");
1753 
1754       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1755     }
1756   }
1757   size_t max_live_bytes() { return _max_live_bytes; }
1758   size_t freed_bytes() { return _freed_bytes; }
1759 };
1760 
1761 class G1ParScrubRemSetTask: public AbstractGangTask {
1762 protected:
1763   G1RemSet* _g1rs;
1764   BitMap* _region_bm;
1765   BitMap* _card_bm;
1766 public:
1767   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1768                        BitMap* region_bm, BitMap* card_bm) :
1769     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1770     _region_bm(region_bm), _card_bm(card_bm) { }
1771 
1772   void work(uint worker_id) {
1773     if (G1CollectedHeap::use_parallel_gc_threads()) {
1774       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1775                        HeapRegion::ScrubRemSetClaimValue);
1776     } else {
1777       _g1rs->scrub(_region_bm, _card_bm);
1778     }
1779   }
1780 
1781 };
1782 
1783 void ConcurrentMark::cleanup() {
1784   // world is stopped at this checkpoint
1785   assert(SafepointSynchronize::is_at_safepoint(),
1786          "world should be stopped");
1787   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1788 
1789   // If a full collection has happened, we shouldn't do this.
1790   if (has_aborted()) {
1791     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1792     return;
1793   }
1794 
1795   HRSPhaseSetter x(HRSPhaseCleanup);
1796   g1h->verify_region_sets_optional();
1797 
1798   if (VerifyDuringGC) {
1799     HandleMark hm;  // handle scope
1800     gclog_or_tty->print(" VerifyDuringGC:(before)");
1801     Universe::heap()->prepare_for_verify();
1802     Universe::verify(/* silent      */ false,
1803                      /* option      */ VerifyOption_G1UsePrevMarking);
1804   }
1805 
1806   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1807   g1p->record_concurrent_mark_cleanup_start();
1808 
1809   double start = os::elapsedTime();
1810 
1811   HeapRegionRemSet::reset_for_cleanup_tasks();
1812 
1813   uint n_workers;
1814 
1815   // Do counting once more with the world stopped for good measure.
1816   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1817 
1818   if (G1CollectedHeap::use_parallel_gc_threads()) {
1819    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1820            "sanity check");
1821 
1822     g1h->set_par_threads();
1823     n_workers = g1h->n_par_threads();
1824     assert(g1h->n_par_threads() == n_workers,
1825            "Should not have been reset");
1826     g1h->workers()->run_task(&g1_par_count_task);
1827     // Done with the parallel phase so reset to 0.
1828     g1h->set_par_threads(0);
1829 
1830     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1831            "sanity check");
1832   } else {
1833     n_workers = 1;
1834     g1_par_count_task.work(0);
1835   }
1836 
1837   if (VerifyDuringGC) {
1838     // Verify that the counting data accumulated during marking matches
1839     // that calculated by walking the marking bitmap.
1840 
1841     // Bitmaps to hold expected values
1842     BitMap expected_region_bm(_region_bm.size(), false);
1843     BitMap expected_card_bm(_card_bm.size(), false);
1844 
1845     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1846                                                  &_region_bm,
1847                                                  &_card_bm,
1848                                                  &expected_region_bm,
1849                                                  &expected_card_bm);
1850 
1851     if (G1CollectedHeap::use_parallel_gc_threads()) {
1852       g1h->set_par_threads((int)n_workers);
1853       g1h->workers()->run_task(&g1_par_verify_task);
1854       // Done with the parallel phase so reset to 0.
1855       g1h->set_par_threads(0);
1856 
1857       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1858              "sanity check");
1859     } else {
1860       g1_par_verify_task.work(0);
1861     }
1862 
1863     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1864   }
1865 
1866   size_t start_used_bytes = g1h->used();
1867   g1h->set_marking_complete();
1868 
1869   double count_end = os::elapsedTime();
1870   double this_final_counting_time = (count_end - start);
1871   _total_counting_time += this_final_counting_time;
1872 
1873   if (G1PrintRegionLivenessInfo) {
1874     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1875     _g1h->heap_region_iterate(&cl);
1876   }
1877 
1878   // Install newly created mark bitMap as "prev".
1879   swapMarkBitMaps();
1880 
1881   g1h->reset_gc_time_stamp();
1882 
1883   // Note end of marking in all heap regions.
1884   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1885   if (G1CollectedHeap::use_parallel_gc_threads()) {
1886     g1h->set_par_threads((int)n_workers);
1887     g1h->workers()->run_task(&g1_par_note_end_task);
1888     g1h->set_par_threads(0);
1889 
1890     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1891            "sanity check");
1892   } else {
1893     g1_par_note_end_task.work(0);
1894   }
1895   g1h->check_gc_time_stamps();
1896 
1897   if (!cleanup_list_is_empty()) {
1898     // The cleanup list is not empty, so we'll have to process it
1899     // concurrently. Notify anyone else that might be wanting free
1900     // regions that there will be more free regions coming soon.
1901     g1h->set_free_regions_coming();
1902   }
1903 
1904   // call below, since it affects the metric by which we sort the heap
1905   // regions.
1906   if (G1ScrubRemSets) {
1907     double rs_scrub_start = os::elapsedTime();
1908     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1909     if (G1CollectedHeap::use_parallel_gc_threads()) {
1910       g1h->set_par_threads((int)n_workers);
1911       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1912       g1h->set_par_threads(0);
1913 
1914       assert(g1h->check_heap_region_claim_values(
1915                                             HeapRegion::ScrubRemSetClaimValue),
1916              "sanity check");
1917     } else {
1918       g1_par_scrub_rs_task.work(0);
1919     }
1920 
1921     double rs_scrub_end = os::elapsedTime();
1922     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1923     _total_rs_scrub_time += this_rs_scrub_time;
1924   }
1925 
1926   // this will also free any regions totally full of garbage objects,
1927   // and sort the regions.
1928   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1929 
1930   // Statistics.
1931   double end = os::elapsedTime();
1932   _cleanup_times.add((end - start) * 1000.0);
1933 
1934   if (G1Log::fine()) {
1935     g1h->print_size_transition(gclog_or_tty,
1936                                start_used_bytes,
1937                                g1h->used(),
1938                                g1h->capacity());
1939   }
1940 
1941   // Clean up will have freed any regions completely full of garbage.
1942   // Update the soft reference policy with the new heap occupancy.
1943   Universe::update_heap_info_at_gc();
1944 
1945   // We need to make this be a "collection" so any collection pause that
1946   // races with it goes around and waits for completeCleanup to finish.
1947   g1h->increment_total_collections();
1948 
1949   // We reclaimed old regions so we should calculate the sizes to make
1950   // sure we update the old gen/space data.
1951   g1h->g1mm()->update_sizes();
1952 
1953   if (VerifyDuringGC) {
1954     HandleMark hm;  // handle scope
1955     gclog_or_tty->print(" VerifyDuringGC:(after)");
1956     Universe::heap()->prepare_for_verify();
1957     Universe::verify(/* silent      */ false,
1958                      /* option      */ VerifyOption_G1UsePrevMarking);
1959   }
1960 
1961   g1h->verify_region_sets_optional();
1962 }
1963 
1964 void ConcurrentMark::completeCleanup() {
1965   if (has_aborted()) return;
1966 
1967   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1968 
1969   _cleanup_list.verify_optional();
1970   FreeRegionList tmp_free_list("Tmp Free List");
1971 
1972   if (G1ConcRegionFreeingVerbose) {
1973     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1974                            "cleanup list has %u entries",
1975                            _cleanup_list.length());
1976   }
1977 
1978   // Noone else should be accessing the _cleanup_list at this point,
1979   // so it's not necessary to take any locks
1980   while (!_cleanup_list.is_empty()) {
1981     HeapRegion* hr = _cleanup_list.remove_head();
1982     assert(hr != NULL, "the list was not empty");
1983     hr->par_clear();
1984     tmp_free_list.add_as_tail(hr);
1985 
1986     // Instead of adding one region at a time to the secondary_free_list,
1987     // we accumulate them in the local list and move them a few at a
1988     // time. This also cuts down on the number of notify_all() calls
1989     // we do during this process. We'll also append the local list when
1990     // _cleanup_list is empty (which means we just removed the last
1991     // region from the _cleanup_list).
1992     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1993         _cleanup_list.is_empty()) {
1994       if (G1ConcRegionFreeingVerbose) {
1995         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1996                                "appending %u entries to the secondary_free_list, "
1997                                "cleanup list still has %u entries",
1998                                tmp_free_list.length(),
1999                                _cleanup_list.length());
2000       }
2001 
2002       {
2003         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2004         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2005         SecondaryFreeList_lock->notify_all();
2006       }
2007 
2008       if (G1StressConcRegionFreeing) {
2009         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2010           os::sleep(Thread::current(), (jlong) 1, false);
2011         }
2012       }
2013     }
2014   }
2015   assert(tmp_free_list.is_empty(), "post-condition");
2016 }
2017 
2018 // Support closures for reference procssing in G1
2019 
2020 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2021   HeapWord* addr = (HeapWord*)obj;
2022   return addr != NULL &&
2023          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2024 }
2025 
2026 class G1CMKeepAliveClosure: public ExtendedOopClosure {
2027   G1CollectedHeap* _g1;
2028   ConcurrentMark*  _cm;
2029  public:
2030   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2031     _g1(g1), _cm(cm) {
2032     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2033   }
2034 
2035   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2036   virtual void do_oop(      oop* p) { do_oop_work(p); }
2037 
2038   template <class T> void do_oop_work(T* p) {
2039     oop obj = oopDesc::load_decode_heap_oop(p);
2040     HeapWord* addr = (HeapWord*)obj;
2041 
2042     if (_cm->verbose_high()) {
2043       gclog_or_tty->print_cr("\t[0] we're looking at location "
2044                              "*"PTR_FORMAT" = "PTR_FORMAT,
2045                              p, (void*) obj);
2046     }
2047 
2048     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2049       _cm->mark_and_count(obj);
2050       _cm->mark_stack_push(obj);
2051     }
2052   }
2053 };
2054 
2055 class G1CMDrainMarkingStackClosure: public VoidClosure {
2056   ConcurrentMark*               _cm;
2057   CMMarkStack*                  _markStack;
2058   G1CMKeepAliveClosure*         _oopClosure;
2059  public:
2060   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2061                                G1CMKeepAliveClosure* oopClosure) :
2062     _cm(cm),
2063     _markStack(markStack),
2064     _oopClosure(oopClosure) { }
2065 
2066   void do_void() {
2067     _markStack->drain(_oopClosure, _cm->nextMarkBitMap(), false);
2068   }
2069 };
2070 
2071 // 'Keep Alive' closure used by parallel reference processing.
2072 // An instance of this closure is used in the parallel reference processing
2073 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2074 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2075 // placed on to discovered ref lists once so we can mark and push with no
2076 // need to check whether the object has already been marked. Using the
2077 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2078 // operating on the global mark stack. This means that an individual
2079 // worker would be doing lock-free pushes while it processes its own
2080 // discovered ref list followed by drain call. If the discovered ref lists
2081 // are unbalanced then this could cause interference with the other
2082 // workers. Using a CMTask (and its embedded local data structures)
2083 // avoids that potential interference.
2084 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2085   ConcurrentMark*  _cm;
2086   CMTask*          _task;
2087   int              _ref_counter_limit;
2088   int              _ref_counter;
2089  public:
2090   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2091     _cm(cm), _task(task),
2092     _ref_counter_limit(G1RefProcDrainInterval) {
2093     assert(_ref_counter_limit > 0, "sanity");
2094     _ref_counter = _ref_counter_limit;
2095   }
2096 
2097   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2098   virtual void do_oop(      oop* p) { do_oop_work(p); }
2099 
2100   template <class T> void do_oop_work(T* p) {
2101     if (!_cm->has_overflown()) {
2102       oop obj = oopDesc::load_decode_heap_oop(p);
2103       if (_cm->verbose_high()) {
2104         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2105                                "*"PTR_FORMAT" = "PTR_FORMAT,
2106                                _task->task_id(), p, (void*) obj);
2107       }
2108 
2109       _task->deal_with_reference(obj);
2110       _ref_counter--;
2111 
2112       if (_ref_counter == 0) {
2113         // We have dealt with _ref_counter_limit references, pushing them and objects
2114         // reachable from them on to the local stack (and possibly the global stack).
2115         // Call do_marking_step() to process these entries. We call the routine in a
2116         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2117         // with the entries that we've pushed as a result of the deal_with_reference
2118         // calls above) or we overflow.
2119         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2120         // while there may still be some work to do. (See the comment at the
2121         // beginning of CMTask::do_marking_step() for those conditions - one of which
2122         // is reaching the specified time target.) It is only when
2123         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2124         // that the marking has completed.
2125         do {
2126           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2127           _task->do_marking_step(mark_step_duration_ms,
2128                                  false /* do_stealing    */,
2129                                  false /* do_termination */);
2130         } while (_task->has_aborted() && !_cm->has_overflown());
2131         _ref_counter = _ref_counter_limit;
2132       }
2133     } else {
2134       if (_cm->verbose_high()) {
2135          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2136       }
2137     }
2138   }
2139 };
2140 
2141 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2142   ConcurrentMark* _cm;
2143   CMTask* _task;
2144  public:
2145   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2146     _cm(cm), _task(task) { }
2147 
2148   void do_void() {
2149     do {
2150       if (_cm->verbose_high()) {
2151         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2152                                _task->task_id());
2153       }
2154 
2155       // We call CMTask::do_marking_step() to completely drain the local and
2156       // global marking stacks. The routine is called in a loop, which we'll
2157       // exit if there's nothing more to do (i.e. we'completely drained the
2158       // entries that were pushed as a result of applying the
2159       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2160       // lists above) or we overflow the global marking stack.
2161       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2162       // while there may still be some work to do. (See the comment at the
2163       // beginning of CMTask::do_marking_step() for those conditions - one of which
2164       // is reaching the specified time target.) It is only when
2165       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2166       // that the marking has completed.
2167 
2168       _task->do_marking_step(1000000000.0 /* something very large */,
2169                              true /* do_stealing    */,
2170                              true /* do_termination */);
2171     } while (_task->has_aborted() && !_cm->has_overflown());
2172   }
2173 };
2174 
2175 // Implementation of AbstractRefProcTaskExecutor for parallel
2176 // reference processing at the end of G1 concurrent marking
2177 
2178 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2179 private:
2180   G1CollectedHeap* _g1h;
2181   ConcurrentMark*  _cm;
2182   WorkGang*        _workers;
2183   int              _active_workers;
2184 
2185 public:
2186   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2187                         ConcurrentMark* cm,
2188                         WorkGang* workers,
2189                         int n_workers) :
2190     _g1h(g1h), _cm(cm),
2191     _workers(workers), _active_workers(n_workers) { }
2192 
2193   // Executes the given task using concurrent marking worker threads.
2194   virtual void execute(ProcessTask& task);
2195   virtual void execute(EnqueueTask& task);
2196 };
2197 
2198 class G1CMRefProcTaskProxy: public AbstractGangTask {
2199   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2200   ProcessTask&     _proc_task;
2201   G1CollectedHeap* _g1h;
2202   ConcurrentMark*  _cm;
2203 
2204 public:
2205   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2206                      G1CollectedHeap* g1h,
2207                      ConcurrentMark* cm) :
2208     AbstractGangTask("Process reference objects in parallel"),
2209     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2210 
2211   virtual void work(uint worker_id) {
2212     CMTask* marking_task = _cm->task(worker_id);
2213     G1CMIsAliveClosure g1_is_alive(_g1h);
2214     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2215     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2216 
2217     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2218   }
2219 };
2220 
2221 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2222   assert(_workers != NULL, "Need parallel worker threads.");
2223 
2224   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2225 
2226   // We need to reset the phase for each task execution so that
2227   // the termination protocol of CMTask::do_marking_step works.
2228   _cm->set_phase(_active_workers, false /* concurrent */);
2229   _g1h->set_par_threads(_active_workers);
2230   _workers->run_task(&proc_task_proxy);
2231   _g1h->set_par_threads(0);
2232 }
2233 
2234 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2235   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2236   EnqueueTask& _enq_task;
2237 
2238 public:
2239   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2240     AbstractGangTask("Enqueue reference objects in parallel"),
2241     _enq_task(enq_task) { }
2242 
2243   virtual void work(uint worker_id) {
2244     _enq_task.work(worker_id);
2245   }
2246 };
2247 
2248 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2249   assert(_workers != NULL, "Need parallel worker threads.");
2250 
2251   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2252 
2253   _g1h->set_par_threads(_active_workers);
2254   _workers->run_task(&enq_task_proxy);
2255   _g1h->set_par_threads(0);
2256 }
2257 
2258 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2259   ResourceMark rm;
2260   HandleMark   hm;
2261 
2262   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2263 
2264   // Is alive closure.
2265   G1CMIsAliveClosure g1_is_alive(g1h);
2266 
2267   // Inner scope to exclude the cleaning of the string and symbol
2268   // tables from the displayed time.
2269   {
2270     if (G1Log::finer()) {
2271       gclog_or_tty->put(' ');
2272     }
2273     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2274 
2275     ReferenceProcessor* rp = g1h->ref_processor_cm();
2276 
2277     // See the comment in G1CollectedHeap::ref_processing_init()
2278     // about how reference processing currently works in G1.
2279 
2280     // Process weak references.
2281     rp->setup_policy(clear_all_soft_refs);
2282     assert(_markStack.isEmpty(), "mark stack should be empty");
2283 
2284     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2285     G1CMDrainMarkingStackClosure
2286       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2287 
2288     // We use the work gang from the G1CollectedHeap and we utilize all
2289     // the worker threads.
2290     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2291     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2292 
2293     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2294                                               g1h->workers(), active_workers);
2295 
2296     if (rp->processing_is_mt()) {
2297       // Set the degree of MT here.  If the discovery is done MT, there
2298       // may have been a different number of threads doing the discovery
2299       // and a different number of discovered lists may have Ref objects.
2300       // That is OK as long as the Reference lists are balanced (see
2301       // balance_all_queues() and balance_queues()).
2302       rp->set_active_mt_degree(active_workers);
2303 
2304       rp->process_discovered_references(&g1_is_alive,
2305                                       &g1_keep_alive,
2306                                       &g1_drain_mark_stack,
2307                                       &par_task_executor);
2308 
2309       // The work routines of the parallel keep_alive and drain_marking_stack
2310       // will set the has_overflown flag if we overflow the global marking
2311       // stack.
2312     } else {
2313       rp->process_discovered_references(&g1_is_alive,
2314                                         &g1_keep_alive,
2315                                         &g1_drain_mark_stack,
2316                                         NULL);
2317     }
2318 
2319     assert(_markStack.overflow() || _markStack.isEmpty(),
2320             "mark stack should be empty (unless it overflowed)");
2321     if (_markStack.overflow()) {
2322       // Should have been done already when we tried to push an
2323       // entry on to the global mark stack. But let's do it again.
2324       set_has_overflown();
2325     }
2326 
2327     if (rp->processing_is_mt()) {
2328       assert(rp->num_q() == active_workers, "why not");
2329       rp->enqueue_discovered_references(&par_task_executor);
2330     } else {
2331       rp->enqueue_discovered_references();
2332     }
2333 
2334     rp->verify_no_references_recorded();
2335     assert(!rp->discovery_enabled(), "Post condition");
2336   }
2337 
2338   // Now clean up stale oops in StringTable
2339   StringTable::unlink(&g1_is_alive);
2340   // Clean up unreferenced symbols in symbol table.
2341   SymbolTable::unlink();
2342 }
2343 
2344 void ConcurrentMark::swapMarkBitMaps() {
2345   CMBitMapRO* temp = _prevMarkBitMap;
2346   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2347   _nextMarkBitMap  = (CMBitMap*)  temp;
2348 }
2349 
2350 class CMRemarkTask: public AbstractGangTask {
2351 private:
2352   ConcurrentMark *_cm;
2353 
2354 public:
2355   void work(uint worker_id) {
2356     // Since all available tasks are actually started, we should
2357     // only proceed if we're supposed to be actived.
2358     if (worker_id < _cm->active_tasks()) {
2359       CMTask* task = _cm->task(worker_id);
2360       task->record_start_time();
2361       do {
2362         task->do_marking_step(1000000000.0 /* something very large */,
2363                               true /* do_stealing    */,
2364                               true /* do_termination */);
2365       } while (task->has_aborted() && !_cm->has_overflown());
2366       // If we overflow, then we do not want to restart. We instead
2367       // want to abort remark and do concurrent marking again.
2368       task->record_end_time();
2369     }
2370   }
2371 
2372   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2373     AbstractGangTask("Par Remark"), _cm(cm) {
2374     _cm->terminator()->reset_for_reuse(active_workers);
2375   }
2376 };
2377 
2378 void ConcurrentMark::checkpointRootsFinalWork() {
2379   ResourceMark rm;
2380   HandleMark   hm;
2381   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2382 
2383   g1h->ensure_parsability(false);
2384 
2385   if (G1CollectedHeap::use_parallel_gc_threads()) {
2386     G1CollectedHeap::StrongRootsScope srs(g1h);
2387     // this is remark, so we'll use up all active threads
2388     uint active_workers = g1h->workers()->active_workers();
2389     if (active_workers == 0) {
2390       assert(active_workers > 0, "Should have been set earlier");
2391       active_workers = (uint) ParallelGCThreads;
2392       g1h->workers()->set_active_workers(active_workers);
2393     }
2394     set_phase(active_workers, false /* concurrent */);
2395     // Leave _parallel_marking_threads at it's
2396     // value originally calculated in the ConcurrentMark
2397     // constructor and pass values of the active workers
2398     // through the gang in the task.
2399 
2400     CMRemarkTask remarkTask(this, active_workers);
2401     g1h->set_par_threads(active_workers);
2402     g1h->workers()->run_task(&remarkTask);
2403     g1h->set_par_threads(0);
2404   } else {
2405     G1CollectedHeap::StrongRootsScope srs(g1h);
2406     // this is remark, so we'll use up all available threads
2407     uint active_workers = 1;
2408     set_phase(active_workers, false /* concurrent */);
2409 
2410     CMRemarkTask remarkTask(this, active_workers);
2411     // We will start all available threads, even if we decide that the
2412     // active_workers will be fewer. The extra ones will just bail out
2413     // immediately.
2414     remarkTask.work(0);
2415   }
2416   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2417   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2418 
2419   print_stats();
2420 
2421 #if VERIFY_OBJS_PROCESSED
2422   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2423     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2424                            _scan_obj_cl.objs_processed,
2425                            ThreadLocalObjQueue::objs_enqueued);
2426     guarantee(_scan_obj_cl.objs_processed ==
2427               ThreadLocalObjQueue::objs_enqueued,
2428               "Different number of objs processed and enqueued.");
2429   }
2430 #endif
2431 }
2432 
2433 #ifndef PRODUCT
2434 
2435 class PrintReachableOopClosure: public OopClosure {
2436 private:
2437   G1CollectedHeap* _g1h;
2438   outputStream*    _out;
2439   VerifyOption     _vo;
2440   bool             _all;
2441 
2442 public:
2443   PrintReachableOopClosure(outputStream* out,
2444                            VerifyOption  vo,
2445                            bool          all) :
2446     _g1h(G1CollectedHeap::heap()),
2447     _out(out), _vo(vo), _all(all) { }
2448 
2449   void do_oop(narrowOop* p) { do_oop_work(p); }
2450   void do_oop(      oop* p) { do_oop_work(p); }
2451 
2452   template <class T> void do_oop_work(T* p) {
2453     oop         obj = oopDesc::load_decode_heap_oop(p);
2454     const char* str = NULL;
2455     const char* str2 = "";
2456 
2457     if (obj == NULL) {
2458       str = "";
2459     } else if (!_g1h->is_in_g1_reserved(obj)) {
2460       str = " O";
2461     } else {
2462       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2463       guarantee(hr != NULL, "invariant");
2464       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2465       bool marked = _g1h->is_marked(obj, _vo);
2466 
2467       if (over_tams) {
2468         str = " >";
2469         if (marked) {
2470           str2 = " AND MARKED";
2471         }
2472       } else if (marked) {
2473         str = " M";
2474       } else {
2475         str = " NOT";
2476       }
2477     }
2478 
2479     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2480                    p, (void*) obj, str, str2);
2481   }
2482 };
2483 
2484 class PrintReachableObjectClosure : public ObjectClosure {
2485 private:
2486   G1CollectedHeap* _g1h;
2487   outputStream*    _out;
2488   VerifyOption     _vo;
2489   bool             _all;
2490   HeapRegion*      _hr;
2491 
2492 public:
2493   PrintReachableObjectClosure(outputStream* out,
2494                               VerifyOption  vo,
2495                               bool          all,
2496                               HeapRegion*   hr) :
2497     _g1h(G1CollectedHeap::heap()),
2498     _out(out), _vo(vo), _all(all), _hr(hr) { }
2499 
2500   void do_object(oop o) {
2501     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2502     bool marked = _g1h->is_marked(o, _vo);
2503     bool print_it = _all || over_tams || marked;
2504 
2505     if (print_it) {
2506       _out->print_cr(" "PTR_FORMAT"%s",
2507                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2508       PrintReachableOopClosure oopCl(_out, _vo, _all);
2509       o->oop_iterate_no_header(&oopCl);
2510     }
2511   }
2512 };
2513 
2514 class PrintReachableRegionClosure : public HeapRegionClosure {
2515 private:
2516   G1CollectedHeap* _g1h;
2517   outputStream*    _out;
2518   VerifyOption     _vo;
2519   bool             _all;
2520 
2521 public:
2522   bool doHeapRegion(HeapRegion* hr) {
2523     HeapWord* b = hr->bottom();
2524     HeapWord* e = hr->end();
2525     HeapWord* t = hr->top();
2526     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2527     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2528                    "TAMS: "PTR_FORMAT, b, e, t, p);
2529     _out->cr();
2530 
2531     HeapWord* from = b;
2532     HeapWord* to   = t;
2533 
2534     if (to > from) {
2535       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2536       _out->cr();
2537       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2538       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2539       _out->cr();
2540     }
2541 
2542     return false;
2543   }
2544 
2545   PrintReachableRegionClosure(outputStream* out,
2546                               VerifyOption  vo,
2547                               bool          all) :
2548     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2549 };
2550 
2551 void ConcurrentMark::print_reachable(const char* str,
2552                                      VerifyOption vo,
2553                                      bool all) {
2554   gclog_or_tty->cr();
2555   gclog_or_tty->print_cr("== Doing heap dump... ");
2556 
2557   if (G1PrintReachableBaseFile == NULL) {
2558     gclog_or_tty->print_cr("  #### error: no base file defined");
2559     return;
2560   }
2561 
2562   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2563       (JVM_MAXPATHLEN - 1)) {
2564     gclog_or_tty->print_cr("  #### error: file name too long");
2565     return;
2566   }
2567 
2568   char file_name[JVM_MAXPATHLEN];
2569   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2570   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2571 
2572   fileStream fout(file_name);
2573   if (!fout.is_open()) {
2574     gclog_or_tty->print_cr("  #### error: could not open file");
2575     return;
2576   }
2577 
2578   outputStream* out = &fout;
2579   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2580   out->cr();
2581 
2582   out->print_cr("--- ITERATING OVER REGIONS");
2583   out->cr();
2584   PrintReachableRegionClosure rcl(out, vo, all);
2585   _g1h->heap_region_iterate(&rcl);
2586   out->cr();
2587 
2588   gclog_or_tty->print_cr("  done");
2589   gclog_or_tty->flush();
2590 }
2591 
2592 #endif // PRODUCT
2593 
2594 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2595   // Note we are overriding the read-only view of the prev map here, via
2596   // the cast.
2597   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2598 }
2599 
2600 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2601   _nextMarkBitMap->clearRange(mr);
2602 }
2603 
2604 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2605   clearRangePrevBitmap(mr);
2606   clearRangeNextBitmap(mr);
2607 }
2608 
2609 HeapRegion*
2610 ConcurrentMark::claim_region(int task_num) {
2611   // "checkpoint" the finger
2612   HeapWord* finger = _finger;
2613 
2614   // _heap_end will not change underneath our feet; it only changes at
2615   // yield points.
2616   while (finger < _heap_end) {
2617     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2618 
2619     // Note on how this code handles humongous regions. In the
2620     // normal case the finger will reach the start of a "starts
2621     // humongous" (SH) region. Its end will either be the end of the
2622     // last "continues humongous" (CH) region in the sequence, or the
2623     // standard end of the SH region (if the SH is the only region in
2624     // the sequence). That way claim_region() will skip over the CH
2625     // regions. However, there is a subtle race between a CM thread
2626     // executing this method and a mutator thread doing a humongous
2627     // object allocation. The two are not mutually exclusive as the CM
2628     // thread does not need to hold the Heap_lock when it gets
2629     // here. So there is a chance that claim_region() will come across
2630     // a free region that's in the progress of becoming a SH or a CH
2631     // region. In the former case, it will either
2632     //   a) Miss the update to the region's end, in which case it will
2633     //      visit every subsequent CH region, will find their bitmaps
2634     //      empty, and do nothing, or
2635     //   b) Will observe the update of the region's end (in which case
2636     //      it will skip the subsequent CH regions).
2637     // If it comes across a region that suddenly becomes CH, the
2638     // scenario will be similar to b). So, the race between
2639     // claim_region() and a humongous object allocation might force us
2640     // to do a bit of unnecessary work (due to some unnecessary bitmap
2641     // iterations) but it should not introduce and correctness issues.
2642     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2643     HeapWord*   bottom        = curr_region->bottom();
2644     HeapWord*   end           = curr_region->end();
2645     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2646 
2647     if (verbose_low()) {
2648       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2649                              "["PTR_FORMAT", "PTR_FORMAT"), "
2650                              "limit = "PTR_FORMAT,
2651                              task_num, curr_region, bottom, end, limit);
2652     }
2653 
2654     // Is the gap between reading the finger and doing the CAS too long?
2655     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2656     if (res == finger) {
2657       // we succeeded
2658 
2659       // notice that _finger == end cannot be guaranteed here since,
2660       // someone else might have moved the finger even further
2661       assert(_finger >= end, "the finger should have moved forward");
2662 
2663       if (verbose_low()) {
2664         gclog_or_tty->print_cr("[%d] we were successful with region = "
2665                                PTR_FORMAT, task_num, curr_region);
2666       }
2667 
2668       if (limit > bottom) {
2669         if (verbose_low()) {
2670           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2671                                  "returning it ", task_num, curr_region);
2672         }
2673         return curr_region;
2674       } else {
2675         assert(limit == bottom,
2676                "the region limit should be at bottom");
2677         if (verbose_low()) {
2678           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2679                                  "returning NULL", task_num, curr_region);
2680         }
2681         // we return NULL and the caller should try calling
2682         // claim_region() again.
2683         return NULL;
2684       }
2685     } else {
2686       assert(_finger > finger, "the finger should have moved forward");
2687       if (verbose_low()) {
2688         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2689                                "global finger = "PTR_FORMAT", "
2690                                "our finger = "PTR_FORMAT,
2691                                task_num, _finger, finger);
2692       }
2693 
2694       // read it again
2695       finger = _finger;
2696     }
2697   }
2698 
2699   return NULL;
2700 }
2701 
2702 #ifndef PRODUCT
2703 enum VerifyNoCSetOopsPhase {
2704   VerifyNoCSetOopsStack,
2705   VerifyNoCSetOopsQueues,
2706   VerifyNoCSetOopsSATBCompleted,
2707   VerifyNoCSetOopsSATBThread
2708 };
2709 
2710 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2711 private:
2712   G1CollectedHeap* _g1h;
2713   VerifyNoCSetOopsPhase _phase;
2714   int _info;
2715 
2716   const char* phase_str() {
2717     switch (_phase) {
2718     case VerifyNoCSetOopsStack:         return "Stack";
2719     case VerifyNoCSetOopsQueues:        return "Queue";
2720     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2721     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2722     default:                            ShouldNotReachHere();
2723     }
2724     return NULL;
2725   }
2726 
2727   void do_object_work(oop obj) {
2728     guarantee(!_g1h->obj_in_cs(obj),
2729               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2730                       (void*) obj, phase_str(), _info));
2731   }
2732 
2733 public:
2734   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2735 
2736   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2737     _phase = phase;
2738     _info = info;
2739   }
2740 
2741   virtual void do_oop(oop* p) {
2742     oop obj = oopDesc::load_decode_heap_oop(p);
2743     do_object_work(obj);
2744   }
2745 
2746   virtual void do_oop(narrowOop* p) {
2747     // We should not come across narrow oops while scanning marking
2748     // stacks and SATB buffers.
2749     ShouldNotReachHere();
2750   }
2751 
2752   virtual void do_object(oop obj) {
2753     do_object_work(obj);
2754   }
2755 };
2756 
2757 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2758                                          bool verify_enqueued_buffers,
2759                                          bool verify_thread_buffers,
2760                                          bool verify_fingers) {
2761   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2762   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2763     return;
2764   }
2765 
2766   VerifyNoCSetOopsClosure cl;
2767 
2768   if (verify_stacks) {
2769     // Verify entries on the global mark stack
2770     cl.set_phase(VerifyNoCSetOopsStack);
2771     _markStack.oops_do(&cl);
2772 
2773     // Verify entries on the task queues
2774     for (int i = 0; i < (int) _max_task_num; i += 1) {
2775       cl.set_phase(VerifyNoCSetOopsQueues, i);
2776       OopTaskQueue* queue = _task_queues->queue(i);
2777       queue->oops_do(&cl);
2778     }
2779   }
2780 
2781   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2782 
2783   // Verify entries on the enqueued SATB buffers
2784   if (verify_enqueued_buffers) {
2785     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2786     satb_qs.iterate_completed_buffers_read_only(&cl);
2787   }
2788 
2789   // Verify entries on the per-thread SATB buffers
2790   if (verify_thread_buffers) {
2791     cl.set_phase(VerifyNoCSetOopsSATBThread);
2792     satb_qs.iterate_thread_buffers_read_only(&cl);
2793   }
2794 
2795   if (verify_fingers) {
2796     // Verify the global finger
2797     HeapWord* global_finger = finger();
2798     if (global_finger != NULL && global_finger < _heap_end) {
2799       // The global finger always points to a heap region boundary. We
2800       // use heap_region_containing_raw() to get the containing region
2801       // given that the global finger could be pointing to a free region
2802       // which subsequently becomes continues humongous. If that
2803       // happens, heap_region_containing() will return the bottom of the
2804       // corresponding starts humongous region and the check below will
2805       // not hold any more.
2806       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2807       guarantee(global_finger == global_hr->bottom(),
2808                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2809                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2810     }
2811 
2812     // Verify the task fingers
2813     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2814     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2815       CMTask* task = _tasks[i];
2816       HeapWord* task_finger = task->finger();
2817       if (task_finger != NULL && task_finger < _heap_end) {
2818         // See above note on the global finger verification.
2819         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2820         guarantee(task_finger == task_hr->bottom() ||
2821                   !task_hr->in_collection_set(),
2822                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2823                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2824       }
2825     }
2826   }
2827 }
2828 #endif // PRODUCT
2829 
2830 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2831   _markStack.setEmpty();
2832   _markStack.clear_overflow();
2833   if (clear_overflow) {
2834     clear_has_overflown();
2835   } else {
2836     assert(has_overflown(), "pre-condition");
2837   }
2838   _finger = _heap_start;
2839 
2840   for (int i = 0; i < (int)_max_task_num; ++i) {
2841     OopTaskQueue* queue = _task_queues->queue(i);
2842     queue->set_empty();
2843   }
2844 }
2845 
2846 // Aggregate the counting data that was constructed concurrently
2847 // with marking.
2848 class AggregateCountDataHRClosure: public HeapRegionClosure {
2849   ConcurrentMark* _cm;
2850   BitMap* _cm_card_bm;
2851   size_t _max_task_num;
2852 
2853  public:
2854   AggregateCountDataHRClosure(ConcurrentMark *cm,
2855                               BitMap* cm_card_bm,
2856                               size_t max_task_num) :
2857     _cm(cm), _cm_card_bm(cm_card_bm),
2858     _max_task_num(max_task_num) { }
2859 
2860   bool is_card_aligned(HeapWord* p) {
2861     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2862   }
2863 
2864   bool doHeapRegion(HeapRegion* hr) {
2865     if (hr->continuesHumongous()) {
2866       // We will ignore these here and process them when their
2867       // associated "starts humongous" region is processed.
2868       // Note that we cannot rely on their associated
2869       // "starts humongous" region to have their bit set to 1
2870       // since, due to the region chunking in the parallel region
2871       // iteration, a "continues humongous" region might be visited
2872       // before its associated "starts humongous".
2873       return false;
2874     }
2875 
2876     HeapWord* start = hr->bottom();
2877     HeapWord* limit = hr->next_top_at_mark_start();
2878     HeapWord* end = hr->end();
2879 
2880     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2881            err_msg("Preconditions not met - "
2882                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2883                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2884                    start, limit, hr->top(), hr->end()));
2885 
2886     assert(hr->next_marked_bytes() == 0, "Precondition");
2887 
2888     if (start == limit) {
2889       // NTAMS of this region has not been set so nothing to do.
2890       return false;
2891     }
2892 
2893     assert(is_card_aligned(start), "sanity");
2894     assert(is_card_aligned(end), "sanity");
2895 
2896     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2897     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2898     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2899 
2900     // If ntams is not card aligned then we bump the index for
2901     // limit so that we get the card spanning ntams.
2902     if (!is_card_aligned(limit)) {
2903       limit_idx += 1;
2904     }
2905 
2906     assert(limit_idx <= end_idx, "or else use atomics");
2907 
2908     // Aggregate the "stripe" in the count data associated with hr.
2909     uint hrs_index = hr->hrs_index();
2910     size_t marked_bytes = 0;
2911 
2912     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2913       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2914       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2915 
2916       // Fetch the marked_bytes in this region for task i and
2917       // add it to the running total for this region.
2918       marked_bytes += marked_bytes_array[hrs_index];
2919 
2920       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2921       // into the global card bitmap.
2922       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2923 
2924       while (scan_idx < limit_idx) {
2925         assert(task_card_bm->at(scan_idx) == true, "should be");
2926         _cm_card_bm->set_bit(scan_idx);
2927         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2928 
2929         // BitMap::get_next_one_offset() can handle the case when
2930         // its left_offset parameter is greater than its right_offset
2931         // parameter. If does, however, have an early exit if
2932         // left_offset == right_offset. So let's limit the value
2933         // passed in for left offset here.
2934         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2935         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2936       }
2937     }
2938 
2939     // Update the marked bytes for this region.
2940     hr->add_to_marked_bytes(marked_bytes);
2941 
2942     // Next heap region
2943     return false;
2944   }
2945 };
2946 
2947 class G1AggregateCountDataTask: public AbstractGangTask {
2948 protected:
2949   G1CollectedHeap* _g1h;
2950   ConcurrentMark* _cm;
2951   BitMap* _cm_card_bm;
2952   size_t _max_task_num;
2953   int _active_workers;
2954 
2955 public:
2956   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2957                            ConcurrentMark* cm,
2958                            BitMap* cm_card_bm,
2959                            size_t max_task_num,
2960                            int n_workers) :
2961     AbstractGangTask("Count Aggregation"),
2962     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2963     _max_task_num(max_task_num),
2964     _active_workers(n_workers) { }
2965 
2966   void work(uint worker_id) {
2967     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
2968 
2969     if (G1CollectedHeap::use_parallel_gc_threads()) {
2970       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2971                                             _active_workers,
2972                                             HeapRegion::AggregateCountClaimValue);
2973     } else {
2974       _g1h->heap_region_iterate(&cl);
2975     }
2976   }
2977 };
2978 
2979 
2980 void ConcurrentMark::aggregate_count_data() {
2981   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2982                         _g1h->workers()->active_workers() :
2983                         1);
2984 
2985   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2986                                            _max_task_num, n_workers);
2987 
2988   if (G1CollectedHeap::use_parallel_gc_threads()) {
2989     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2990            "sanity check");
2991     _g1h->set_par_threads(n_workers);
2992     _g1h->workers()->run_task(&g1_par_agg_task);
2993     _g1h->set_par_threads(0);
2994 
2995     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
2996            "sanity check");
2997     _g1h->reset_heap_region_claim_values();
2998   } else {
2999     g1_par_agg_task.work(0);
3000   }
3001 }
3002 
3003 // Clear the per-worker arrays used to store the per-region counting data
3004 void ConcurrentMark::clear_all_count_data() {
3005   // Clear the global card bitmap - it will be filled during
3006   // liveness count aggregation (during remark) and the
3007   // final counting task.
3008   _card_bm.clear();
3009 
3010   // Clear the global region bitmap - it will be filled as part
3011   // of the final counting task.
3012   _region_bm.clear();
3013 
3014   uint max_regions = _g1h->max_regions();
3015   assert(_max_task_num != 0, "unitialized");
3016 
3017   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3018     BitMap* task_card_bm = count_card_bitmap_for(i);
3019     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3020 
3021     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3022     assert(marked_bytes_array != NULL, "uninitialized");
3023 
3024     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3025     task_card_bm->clear();
3026   }
3027 }
3028 
3029 void ConcurrentMark::print_stats() {
3030   if (verbose_stats()) {
3031     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3032     for (size_t i = 0; i < _active_tasks; ++i) {
3033       _tasks[i]->print_stats();
3034       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3035     }
3036   }
3037 }
3038 
3039 // abandon current marking iteration due to a Full GC
3040 void ConcurrentMark::abort() {
3041   // Clear all marks to force marking thread to do nothing
3042   _nextMarkBitMap->clearAll();
3043   // Clear the liveness counting data
3044   clear_all_count_data();
3045   // Empty mark stack
3046   clear_marking_state();
3047   for (int i = 0; i < (int)_max_task_num; ++i) {
3048     _tasks[i]->clear_region_fields();
3049   }
3050   _has_aborted = true;
3051 
3052   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3053   satb_mq_set.abandon_partial_marking();
3054   // This can be called either during or outside marking, we'll read
3055   // the expected_active value from the SATB queue set.
3056   satb_mq_set.set_active_all_threads(
3057                                  false, /* new active value */
3058                                  satb_mq_set.is_active() /* expected_active */);
3059 }
3060 
3061 static void print_ms_time_info(const char* prefix, const char* name,
3062                                NumberSeq& ns) {
3063   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3064                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3065   if (ns.num() > 0) {
3066     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3067                            prefix, ns.sd(), ns.maximum());
3068   }
3069 }
3070 
3071 void ConcurrentMark::print_summary_info() {
3072   gclog_or_tty->print_cr(" Concurrent marking:");
3073   print_ms_time_info("  ", "init marks", _init_times);
3074   print_ms_time_info("  ", "remarks", _remark_times);
3075   {
3076     print_ms_time_info("     ", "final marks", _remark_mark_times);
3077     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3078 
3079   }
3080   print_ms_time_info("  ", "cleanups", _cleanup_times);
3081   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3082                          _total_counting_time,
3083                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3084                           (double)_cleanup_times.num()
3085                          : 0.0));
3086   if (G1ScrubRemSets) {
3087     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3088                            _total_rs_scrub_time,
3089                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3090                             (double)_cleanup_times.num()
3091                            : 0.0));
3092   }
3093   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3094                          (_init_times.sum() + _remark_times.sum() +
3095                           _cleanup_times.sum())/1000.0);
3096   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3097                 "(%8.2f s marking).",
3098                 cmThread()->vtime_accum(),
3099                 cmThread()->vtime_mark_accum());
3100 }
3101 
3102 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3103   _parallel_workers->print_worker_threads_on(st);
3104 }
3105 
3106 // We take a break if someone is trying to stop the world.
3107 bool ConcurrentMark::do_yield_check(uint worker_id) {
3108   if (should_yield()) {
3109     if (worker_id == 0) {
3110       _g1h->g1_policy()->record_concurrent_pause();
3111     }
3112     cmThread()->yield();
3113     return true;
3114   } else {
3115     return false;
3116   }
3117 }
3118 
3119 bool ConcurrentMark::should_yield() {
3120   return cmThread()->should_yield();
3121 }
3122 
3123 bool ConcurrentMark::containing_card_is_marked(void* p) {
3124   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3125   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3126 }
3127 
3128 bool ConcurrentMark::containing_cards_are_marked(void* start,
3129                                                  void* last) {
3130   return containing_card_is_marked(start) &&
3131          containing_card_is_marked(last);
3132 }
3133 
3134 #ifndef PRODUCT
3135 // for debugging purposes
3136 void ConcurrentMark::print_finger() {
3137   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3138                          _heap_start, _heap_end, _finger);
3139   for (int i = 0; i < (int) _max_task_num; ++i) {
3140     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3141   }
3142   gclog_or_tty->print_cr("");
3143 }
3144 #endif
3145 
3146 void CMTask::scan_object(oop obj) {
3147   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3148 
3149   if (_cm->verbose_high()) {
3150     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3151                            _task_id, (void*) obj);
3152   }
3153 
3154   size_t obj_size = obj->size();
3155   _words_scanned += obj_size;
3156 
3157   obj->oop_iterate(_cm_oop_closure);
3158   statsOnly( ++_objs_scanned );
3159   check_limits();
3160 }
3161 
3162 // Closure for iteration over bitmaps
3163 class CMBitMapClosure : public BitMapClosure {
3164 private:
3165   // the bitmap that is being iterated over
3166   CMBitMap*                   _nextMarkBitMap;
3167   ConcurrentMark*             _cm;
3168   CMTask*                     _task;
3169 
3170 public:
3171   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3172     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3173 
3174   bool do_bit(size_t offset) {
3175     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3176     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3177     assert( addr < _cm->finger(), "invariant");
3178 
3179     statsOnly( _task->increase_objs_found_on_bitmap() );
3180     assert(addr >= _task->finger(), "invariant");
3181 
3182     // We move that task's local finger along.
3183     _task->move_finger_to(addr);
3184 
3185     _task->scan_object(oop(addr));
3186     // we only partially drain the local queue and global stack
3187     _task->drain_local_queue(true);
3188     _task->drain_global_stack(true);
3189 
3190     // if the has_aborted flag has been raised, we need to bail out of
3191     // the iteration
3192     return !_task->has_aborted();
3193   }
3194 };
3195 
3196 // Closure for iterating over objects, currently only used for
3197 // processing SATB buffers.
3198 class CMObjectClosure : public ObjectClosure {
3199 private:
3200   CMTask* _task;
3201 
3202 public:
3203   void do_object(oop obj) {
3204     _task->deal_with_reference(obj);
3205   }
3206 
3207   CMObjectClosure(CMTask* task) : _task(task) { }
3208 };
3209 
3210 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3211                                ConcurrentMark* cm,
3212                                CMTask* task)
3213   : _g1h(g1h), _cm(cm), _task(task) {
3214   assert(_ref_processor == NULL, "should be initialized to NULL");
3215 
3216   if (G1UseConcMarkReferenceProcessing) {
3217     _ref_processor = g1h->ref_processor_cm();
3218     assert(_ref_processor != NULL, "should not be NULL");
3219   }
3220 }
3221 
3222 void CMTask::setup_for_region(HeapRegion* hr) {
3223   // Separated the asserts so that we know which one fires.
3224   assert(hr != NULL,
3225         "claim_region() should have filtered out continues humongous regions");
3226   assert(!hr->continuesHumongous(),
3227         "claim_region() should have filtered out continues humongous regions");
3228 
3229   if (_cm->verbose_low()) {
3230     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3231                            _task_id, hr);
3232   }
3233 
3234   _curr_region  = hr;
3235   _finger       = hr->bottom();
3236   update_region_limit();
3237 }
3238 
3239 void CMTask::update_region_limit() {
3240   HeapRegion* hr            = _curr_region;
3241   HeapWord* bottom          = hr->bottom();
3242   HeapWord* limit           = hr->next_top_at_mark_start();
3243 
3244   if (limit == bottom) {
3245     if (_cm->verbose_low()) {
3246       gclog_or_tty->print_cr("[%d] found an empty region "
3247                              "["PTR_FORMAT", "PTR_FORMAT")",
3248                              _task_id, bottom, limit);
3249     }
3250     // The region was collected underneath our feet.
3251     // We set the finger to bottom to ensure that the bitmap
3252     // iteration that will follow this will not do anything.
3253     // (this is not a condition that holds when we set the region up,
3254     // as the region is not supposed to be empty in the first place)
3255     _finger = bottom;
3256   } else if (limit >= _region_limit) {
3257     assert(limit >= _finger, "peace of mind");
3258   } else {
3259     assert(limit < _region_limit, "only way to get here");
3260     // This can happen under some pretty unusual circumstances.  An
3261     // evacuation pause empties the region underneath our feet (NTAMS
3262     // at bottom). We then do some allocation in the region (NTAMS
3263     // stays at bottom), followed by the region being used as a GC
3264     // alloc region (NTAMS will move to top() and the objects
3265     // originally below it will be grayed). All objects now marked in
3266     // the region are explicitly grayed, if below the global finger,
3267     // and we do not need in fact to scan anything else. So, we simply
3268     // set _finger to be limit to ensure that the bitmap iteration
3269     // doesn't do anything.
3270     _finger = limit;
3271   }
3272 
3273   _region_limit = limit;
3274 }
3275 
3276 void CMTask::giveup_current_region() {
3277   assert(_curr_region != NULL, "invariant");
3278   if (_cm->verbose_low()) {
3279     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3280                            _task_id, _curr_region);
3281   }
3282   clear_region_fields();
3283 }
3284 
3285 void CMTask::clear_region_fields() {
3286   // Values for these three fields that indicate that we're not
3287   // holding on to a region.
3288   _curr_region   = NULL;
3289   _finger        = NULL;
3290   _region_limit  = NULL;
3291 }
3292 
3293 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3294   if (cm_oop_closure == NULL) {
3295     assert(_cm_oop_closure != NULL, "invariant");
3296   } else {
3297     assert(_cm_oop_closure == NULL, "invariant");
3298   }
3299   _cm_oop_closure = cm_oop_closure;
3300 }
3301 
3302 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3303   guarantee(nextMarkBitMap != NULL, "invariant");
3304 
3305   if (_cm->verbose_low()) {
3306     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3307   }
3308 
3309   _nextMarkBitMap                = nextMarkBitMap;
3310   clear_region_fields();
3311 
3312   _calls                         = 0;
3313   _elapsed_time_ms               = 0.0;
3314   _termination_time_ms           = 0.0;
3315   _termination_start_time_ms     = 0.0;
3316 
3317 #if _MARKING_STATS_
3318   _local_pushes                  = 0;
3319   _local_pops                    = 0;
3320   _local_max_size                = 0;
3321   _objs_scanned                  = 0;
3322   _global_pushes                 = 0;
3323   _global_pops                   = 0;
3324   _global_max_size               = 0;
3325   _global_transfers_to           = 0;
3326   _global_transfers_from         = 0;
3327   _regions_claimed               = 0;
3328   _objs_found_on_bitmap          = 0;
3329   _satb_buffers_processed        = 0;
3330   _steal_attempts                = 0;
3331   _steals                        = 0;
3332   _aborted                       = 0;
3333   _aborted_overflow              = 0;
3334   _aborted_cm_aborted            = 0;
3335   _aborted_yield                 = 0;
3336   _aborted_timed_out             = 0;
3337   _aborted_satb                  = 0;
3338   _aborted_termination           = 0;
3339 #endif // _MARKING_STATS_
3340 }
3341 
3342 bool CMTask::should_exit_termination() {
3343   regular_clock_call();
3344   // This is called when we are in the termination protocol. We should
3345   // quit if, for some reason, this task wants to abort or the global
3346   // stack is not empty (this means that we can get work from it).
3347   return !_cm->mark_stack_empty() || has_aborted();
3348 }
3349 
3350 void CMTask::reached_limit() {
3351   assert(_words_scanned >= _words_scanned_limit ||
3352          _refs_reached >= _refs_reached_limit ,
3353          "shouldn't have been called otherwise");
3354   regular_clock_call();
3355 }
3356 
3357 void CMTask::regular_clock_call() {
3358   if (has_aborted()) return;
3359 
3360   // First, we need to recalculate the words scanned and refs reached
3361   // limits for the next clock call.
3362   recalculate_limits();
3363 
3364   // During the regular clock call we do the following
3365 
3366   // (1) If an overflow has been flagged, then we abort.
3367   if (_cm->has_overflown()) {
3368     set_has_aborted();
3369     return;
3370   }
3371 
3372   // If we are not concurrent (i.e. we're doing remark) we don't need
3373   // to check anything else. The other steps are only needed during
3374   // the concurrent marking phase.
3375   if (!concurrent()) return;
3376 
3377   // (2) If marking has been aborted for Full GC, then we also abort.
3378   if (_cm->has_aborted()) {
3379     set_has_aborted();
3380     statsOnly( ++_aborted_cm_aborted );
3381     return;
3382   }
3383 
3384   double curr_time_ms = os::elapsedVTime() * 1000.0;
3385 
3386   // (3) If marking stats are enabled, then we update the step history.
3387 #if _MARKING_STATS_
3388   if (_words_scanned >= _words_scanned_limit) {
3389     ++_clock_due_to_scanning;
3390   }
3391   if (_refs_reached >= _refs_reached_limit) {
3392     ++_clock_due_to_marking;
3393   }
3394 
3395   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3396   _interval_start_time_ms = curr_time_ms;
3397   _all_clock_intervals_ms.add(last_interval_ms);
3398 
3399   if (_cm->verbose_medium()) {
3400       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3401                         "scanned = %d%s, refs reached = %d%s",
3402                         _task_id, last_interval_ms,
3403                         _words_scanned,
3404                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3405                         _refs_reached,
3406                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3407   }
3408 #endif // _MARKING_STATS_
3409 
3410   // (4) We check whether we should yield. If we have to, then we abort.
3411   if (_cm->should_yield()) {
3412     // We should yield. To do this we abort the task. The caller is
3413     // responsible for yielding.
3414     set_has_aborted();
3415     statsOnly( ++_aborted_yield );
3416     return;
3417   }
3418 
3419   // (5) We check whether we've reached our time quota. If we have,
3420   // then we abort.
3421   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3422   if (elapsed_time_ms > _time_target_ms) {
3423     set_has_aborted();
3424     _has_timed_out = true;
3425     statsOnly( ++_aborted_timed_out );
3426     return;
3427   }
3428 
3429   // (6) Finally, we check whether there are enough completed STAB
3430   // buffers available for processing. If there are, we abort.
3431   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3432   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3433     if (_cm->verbose_low()) {
3434       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3435                              _task_id);
3436     }
3437     // we do need to process SATB buffers, we'll abort and restart
3438     // the marking task to do so
3439     set_has_aborted();
3440     statsOnly( ++_aborted_satb );
3441     return;
3442   }
3443 }
3444 
3445 void CMTask::recalculate_limits() {
3446   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3447   _words_scanned_limit      = _real_words_scanned_limit;
3448 
3449   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3450   _refs_reached_limit       = _real_refs_reached_limit;
3451 }
3452 
3453 void CMTask::decrease_limits() {
3454   // This is called when we believe that we're going to do an infrequent
3455   // operation which will increase the per byte scanned cost (i.e. move
3456   // entries to/from the global stack). It basically tries to decrease the
3457   // scanning limit so that the clock is called earlier.
3458 
3459   if (_cm->verbose_medium()) {
3460     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3461   }
3462 
3463   _words_scanned_limit = _real_words_scanned_limit -
3464     3 * words_scanned_period / 4;
3465   _refs_reached_limit  = _real_refs_reached_limit -
3466     3 * refs_reached_period / 4;
3467 }
3468 
3469 void CMTask::move_entries_to_global_stack() {
3470   // local array where we'll store the entries that will be popped
3471   // from the local queue
3472   oop buffer[global_stack_transfer_size];
3473 
3474   int n = 0;
3475   oop obj;
3476   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3477     buffer[n] = obj;
3478     ++n;
3479   }
3480 
3481   if (n > 0) {
3482     // we popped at least one entry from the local queue
3483 
3484     statsOnly( ++_global_transfers_to; _local_pops += n );
3485 
3486     if (!_cm->mark_stack_push(buffer, n)) {
3487       if (_cm->verbose_low()) {
3488         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3489                                _task_id);
3490       }
3491       set_has_aborted();
3492     } else {
3493       // the transfer was successful
3494 
3495       if (_cm->verbose_medium()) {
3496         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3497                                _task_id, n);
3498       }
3499       statsOnly( int tmp_size = _cm->mark_stack_size();
3500                  if (tmp_size > _global_max_size) {
3501                    _global_max_size = tmp_size;
3502                  }
3503                  _global_pushes += n );
3504     }
3505   }
3506 
3507   // this operation was quite expensive, so decrease the limits
3508   decrease_limits();
3509 }
3510 
3511 void CMTask::get_entries_from_global_stack() {
3512   // local array where we'll store the entries that will be popped
3513   // from the global stack.
3514   oop buffer[global_stack_transfer_size];
3515   int n;
3516   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3517   assert(n <= global_stack_transfer_size,
3518          "we should not pop more than the given limit");
3519   if (n > 0) {
3520     // yes, we did actually pop at least one entry
3521 
3522     statsOnly( ++_global_transfers_from; _global_pops += n );
3523     if (_cm->verbose_medium()) {
3524       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3525                              _task_id, n);
3526     }
3527     for (int i = 0; i < n; ++i) {
3528       bool success = _task_queue->push(buffer[i]);
3529       // We only call this when the local queue is empty or under a
3530       // given target limit. So, we do not expect this push to fail.
3531       assert(success, "invariant");
3532     }
3533 
3534     statsOnly( int tmp_size = _task_queue->size();
3535                if (tmp_size > _local_max_size) {
3536                  _local_max_size = tmp_size;
3537                }
3538                _local_pushes += n );
3539   }
3540 
3541   // this operation was quite expensive, so decrease the limits
3542   decrease_limits();
3543 }
3544 
3545 void CMTask::drain_local_queue(bool partially) {
3546   if (has_aborted()) return;
3547 
3548   // Decide what the target size is, depending whether we're going to
3549   // drain it partially (so that other tasks can steal if they run out
3550   // of things to do) or totally (at the very end).
3551   size_t target_size;
3552   if (partially) {
3553     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3554   } else {
3555     target_size = 0;
3556   }
3557 
3558   if (_task_queue->size() > target_size) {
3559     if (_cm->verbose_high()) {
3560       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3561                              _task_id, target_size);
3562     }
3563 
3564     oop obj;
3565     bool ret = _task_queue->pop_local(obj);
3566     while (ret) {
3567       statsOnly( ++_local_pops );
3568 
3569       if (_cm->verbose_high()) {
3570         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3571                                (void*) obj);
3572       }
3573 
3574       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3575       assert(!_g1h->is_on_master_free_list(
3576                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3577 
3578       scan_object(obj);
3579 
3580       if (_task_queue->size() <= target_size || has_aborted()) {
3581         ret = false;
3582       } else {
3583         ret = _task_queue->pop_local(obj);
3584       }
3585     }
3586 
3587     if (_cm->verbose_high()) {
3588       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3589                              _task_id, _task_queue->size());
3590     }
3591   }
3592 }
3593 
3594 void CMTask::drain_global_stack(bool partially) {
3595   if (has_aborted()) return;
3596 
3597   // We have a policy to drain the local queue before we attempt to
3598   // drain the global stack.
3599   assert(partially || _task_queue->size() == 0, "invariant");
3600 
3601   // Decide what the target size is, depending whether we're going to
3602   // drain it partially (so that other tasks can steal if they run out
3603   // of things to do) or totally (at the very end).  Notice that,
3604   // because we move entries from the global stack in chunks or
3605   // because another task might be doing the same, we might in fact
3606   // drop below the target. But, this is not a problem.
3607   size_t target_size;
3608   if (partially) {
3609     target_size = _cm->partial_mark_stack_size_target();
3610   } else {
3611     target_size = 0;
3612   }
3613 
3614   if (_cm->mark_stack_size() > target_size) {
3615     if (_cm->verbose_low()) {
3616       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3617                              _task_id, target_size);
3618     }
3619 
3620     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3621       get_entries_from_global_stack();
3622       drain_local_queue(partially);
3623     }
3624 
3625     if (_cm->verbose_low()) {
3626       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3627                              _task_id, _cm->mark_stack_size());
3628     }
3629   }
3630 }
3631 
3632 // SATB Queue has several assumptions on whether to call the par or
3633 // non-par versions of the methods. this is why some of the code is
3634 // replicated. We should really get rid of the single-threaded version
3635 // of the code to simplify things.
3636 void CMTask::drain_satb_buffers() {
3637   if (has_aborted()) return;
3638 
3639   // We set this so that the regular clock knows that we're in the
3640   // middle of draining buffers and doesn't set the abort flag when it
3641   // notices that SATB buffers are available for draining. It'd be
3642   // very counter productive if it did that. :-)
3643   _draining_satb_buffers = true;
3644 
3645   CMObjectClosure oc(this);
3646   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3647   if (G1CollectedHeap::use_parallel_gc_threads()) {
3648     satb_mq_set.set_par_closure(_task_id, &oc);
3649   } else {
3650     satb_mq_set.set_closure(&oc);
3651   }
3652 
3653   // This keeps claiming and applying the closure to completed buffers
3654   // until we run out of buffers or we need to abort.
3655   if (G1CollectedHeap::use_parallel_gc_threads()) {
3656     while (!has_aborted() &&
3657            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3658       if (_cm->verbose_medium()) {
3659         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3660       }
3661       statsOnly( ++_satb_buffers_processed );
3662       regular_clock_call();
3663     }
3664   } else {
3665     while (!has_aborted() &&
3666            satb_mq_set.apply_closure_to_completed_buffer()) {
3667       if (_cm->verbose_medium()) {
3668         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3669       }
3670       statsOnly( ++_satb_buffers_processed );
3671       regular_clock_call();
3672     }
3673   }
3674 
3675   if (!concurrent() && !has_aborted()) {
3676     // We should only do this during remark.
3677     if (G1CollectedHeap::use_parallel_gc_threads()) {
3678       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3679     } else {
3680       satb_mq_set.iterate_closure_all_threads();
3681     }
3682   }
3683 
3684   _draining_satb_buffers = false;
3685 
3686   assert(has_aborted() ||
3687          concurrent() ||
3688          satb_mq_set.completed_buffers_num() == 0, "invariant");
3689 
3690   if (G1CollectedHeap::use_parallel_gc_threads()) {
3691     satb_mq_set.set_par_closure(_task_id, NULL);
3692   } else {
3693     satb_mq_set.set_closure(NULL);
3694   }
3695 
3696   // again, this was a potentially expensive operation, decrease the
3697   // limits to get the regular clock call early
3698   decrease_limits();
3699 }
3700 
3701 void CMTask::print_stats() {
3702   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3703                          _task_id, _calls);
3704   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3705                          _elapsed_time_ms, _termination_time_ms);
3706   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3707                          _step_times_ms.num(), _step_times_ms.avg(),
3708                          _step_times_ms.sd());
3709   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3710                          _step_times_ms.maximum(), _step_times_ms.sum());
3711 
3712 #if _MARKING_STATS_
3713   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3714                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3715                          _all_clock_intervals_ms.sd());
3716   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3717                          _all_clock_intervals_ms.maximum(),
3718                          _all_clock_intervals_ms.sum());
3719   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3720                          _clock_due_to_scanning, _clock_due_to_marking);
3721   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3722                          _objs_scanned, _objs_found_on_bitmap);
3723   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3724                          _local_pushes, _local_pops, _local_max_size);
3725   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3726                          _global_pushes, _global_pops, _global_max_size);
3727   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3728                          _global_transfers_to,_global_transfers_from);
3729   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3730   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3731   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3732                          _steal_attempts, _steals);
3733   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3734   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3735                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3736   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3737                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3738 #endif // _MARKING_STATS_
3739 }
3740 
3741 /*****************************************************************************
3742 
3743     The do_marking_step(time_target_ms) method is the building block
3744     of the parallel marking framework. It can be called in parallel
3745     with other invocations of do_marking_step() on different tasks
3746     (but only one per task, obviously) and concurrently with the
3747     mutator threads, or during remark, hence it eliminates the need
3748     for two versions of the code. When called during remark, it will
3749     pick up from where the task left off during the concurrent marking
3750     phase. Interestingly, tasks are also claimable during evacuation
3751     pauses too, since do_marking_step() ensures that it aborts before
3752     it needs to yield.
3753 
3754     The data structures that is uses to do marking work are the
3755     following:
3756 
3757       (1) Marking Bitmap. If there are gray objects that appear only
3758       on the bitmap (this happens either when dealing with an overflow
3759       or when the initial marking phase has simply marked the roots
3760       and didn't push them on the stack), then tasks claim heap
3761       regions whose bitmap they then scan to find gray objects. A
3762       global finger indicates where the end of the last claimed region
3763       is. A local finger indicates how far into the region a task has
3764       scanned. The two fingers are used to determine how to gray an
3765       object (i.e. whether simply marking it is OK, as it will be
3766       visited by a task in the future, or whether it needs to be also
3767       pushed on a stack).
3768 
3769       (2) Local Queue. The local queue of the task which is accessed
3770       reasonably efficiently by the task. Other tasks can steal from
3771       it when they run out of work. Throughout the marking phase, a
3772       task attempts to keep its local queue short but not totally
3773       empty, so that entries are available for stealing by other
3774       tasks. Only when there is no more work, a task will totally
3775       drain its local queue.
3776 
3777       (3) Global Mark Stack. This handles local queue overflow. During
3778       marking only sets of entries are moved between it and the local
3779       queues, as access to it requires a mutex and more fine-grain
3780       interaction with it which might cause contention. If it
3781       overflows, then the marking phase should restart and iterate
3782       over the bitmap to identify gray objects. Throughout the marking
3783       phase, tasks attempt to keep the global mark stack at a small
3784       length but not totally empty, so that entries are available for
3785       popping by other tasks. Only when there is no more work, tasks
3786       will totally drain the global mark stack.
3787 
3788       (4) SATB Buffer Queue. This is where completed SATB buffers are
3789       made available. Buffers are regularly removed from this queue
3790       and scanned for roots, so that the queue doesn't get too
3791       long. During remark, all completed buffers are processed, as
3792       well as the filled in parts of any uncompleted buffers.
3793 
3794     The do_marking_step() method tries to abort when the time target
3795     has been reached. There are a few other cases when the
3796     do_marking_step() method also aborts:
3797 
3798       (1) When the marking phase has been aborted (after a Full GC).
3799 
3800       (2) When a global overflow (on the global stack) has been
3801       triggered. Before the task aborts, it will actually sync up with
3802       the other tasks to ensure that all the marking data structures
3803       (local queues, stacks, fingers etc.)  are re-initialised so that
3804       when do_marking_step() completes, the marking phase can
3805       immediately restart.
3806 
3807       (3) When enough completed SATB buffers are available. The
3808       do_marking_step() method only tries to drain SATB buffers right
3809       at the beginning. So, if enough buffers are available, the
3810       marking step aborts and the SATB buffers are processed at
3811       the beginning of the next invocation.
3812 
3813       (4) To yield. when we have to yield then we abort and yield
3814       right at the end of do_marking_step(). This saves us from a lot
3815       of hassle as, by yielding we might allow a Full GC. If this
3816       happens then objects will be compacted underneath our feet, the
3817       heap might shrink, etc. We save checking for this by just
3818       aborting and doing the yield right at the end.
3819 
3820     From the above it follows that the do_marking_step() method should
3821     be called in a loop (or, otherwise, regularly) until it completes.
3822 
3823     If a marking step completes without its has_aborted() flag being
3824     true, it means it has completed the current marking phase (and
3825     also all other marking tasks have done so and have all synced up).
3826 
3827     A method called regular_clock_call() is invoked "regularly" (in
3828     sub ms intervals) throughout marking. It is this clock method that
3829     checks all the abort conditions which were mentioned above and
3830     decides when the task should abort. A work-based scheme is used to
3831     trigger this clock method: when the number of object words the
3832     marking phase has scanned or the number of references the marking
3833     phase has visited reach a given limit. Additional invocations to
3834     the method clock have been planted in a few other strategic places
3835     too. The initial reason for the clock method was to avoid calling
3836     vtime too regularly, as it is quite expensive. So, once it was in
3837     place, it was natural to piggy-back all the other conditions on it
3838     too and not constantly check them throughout the code.
3839 
3840  *****************************************************************************/
3841 
3842 void CMTask::do_marking_step(double time_target_ms,
3843                              bool do_stealing,
3844                              bool do_termination) {
3845   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3846   assert(concurrent() == _cm->concurrent(), "they should be the same");
3847 
3848   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3849   assert(_task_queues != NULL, "invariant");
3850   assert(_task_queue != NULL, "invariant");
3851   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3852 
3853   assert(!_claimed,
3854          "only one thread should claim this task at any one time");
3855 
3856   // OK, this doesn't safeguard again all possible scenarios, as it is
3857   // possible for two threads to set the _claimed flag at the same
3858   // time. But it is only for debugging purposes anyway and it will
3859   // catch most problems.
3860   _claimed = true;
3861 
3862   _start_time_ms = os::elapsedVTime() * 1000.0;
3863   statsOnly( _interval_start_time_ms = _start_time_ms );
3864 
3865   double diff_prediction_ms =
3866     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3867   _time_target_ms = time_target_ms - diff_prediction_ms;
3868 
3869   // set up the variables that are used in the work-based scheme to
3870   // call the regular clock method
3871   _words_scanned = 0;
3872   _refs_reached  = 0;
3873   recalculate_limits();
3874 
3875   // clear all flags
3876   clear_has_aborted();
3877   _has_timed_out = false;
3878   _draining_satb_buffers = false;
3879 
3880   ++_calls;
3881 
3882   if (_cm->verbose_low()) {
3883     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3884                            "target = %1.2lfms >>>>>>>>>>",
3885                            _task_id, _calls, _time_target_ms);
3886   }
3887 
3888   // Set up the bitmap and oop closures. Anything that uses them is
3889   // eventually called from this method, so it is OK to allocate these
3890   // statically.
3891   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3892   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3893   set_cm_oop_closure(&cm_oop_closure);
3894 
3895   if (_cm->has_overflown()) {
3896     // This can happen if the mark stack overflows during a GC pause
3897     // and this task, after a yield point, restarts. We have to abort
3898     // as we need to get into the overflow protocol which happens
3899     // right at the end of this task.
3900     set_has_aborted();
3901   }
3902 
3903   // First drain any available SATB buffers. After this, we will not
3904   // look at SATB buffers before the next invocation of this method.
3905   // If enough completed SATB buffers are queued up, the regular clock
3906   // will abort this task so that it restarts.
3907   drain_satb_buffers();
3908   // ...then partially drain the local queue and the global stack
3909   drain_local_queue(true);
3910   drain_global_stack(true);
3911 
3912   do {
3913     if (!has_aborted() && _curr_region != NULL) {
3914       // This means that we're already holding on to a region.
3915       assert(_finger != NULL, "if region is not NULL, then the finger "
3916              "should not be NULL either");
3917 
3918       // We might have restarted this task after an evacuation pause
3919       // which might have evacuated the region we're holding on to
3920       // underneath our feet. Let's read its limit again to make sure
3921       // that we do not iterate over a region of the heap that
3922       // contains garbage (update_region_limit() will also move
3923       // _finger to the start of the region if it is found empty).
3924       update_region_limit();
3925       // We will start from _finger not from the start of the region,
3926       // as we might be restarting this task after aborting half-way
3927       // through scanning this region. In this case, _finger points to
3928       // the address where we last found a marked object. If this is a
3929       // fresh region, _finger points to start().
3930       MemRegion mr = MemRegion(_finger, _region_limit);
3931 
3932       if (_cm->verbose_low()) {
3933         gclog_or_tty->print_cr("[%d] we're scanning part "
3934                                "["PTR_FORMAT", "PTR_FORMAT") "
3935                                "of region "PTR_FORMAT,
3936                                _task_id, _finger, _region_limit, _curr_region);
3937       }
3938 
3939       // Let's iterate over the bitmap of the part of the
3940       // region that is left.
3941       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3942         // We successfully completed iterating over the region. Now,
3943         // let's give up the region.
3944         giveup_current_region();
3945         regular_clock_call();
3946       } else {
3947         assert(has_aborted(), "currently the only way to do so");
3948         // The only way to abort the bitmap iteration is to return
3949         // false from the do_bit() method. However, inside the
3950         // do_bit() method we move the _finger to point to the
3951         // object currently being looked at. So, if we bail out, we
3952         // have definitely set _finger to something non-null.
3953         assert(_finger != NULL, "invariant");
3954 
3955         // Region iteration was actually aborted. So now _finger
3956         // points to the address of the object we last scanned. If we
3957         // leave it there, when we restart this task, we will rescan
3958         // the object. It is easy to avoid this. We move the finger by
3959         // enough to point to the next possible object header (the
3960         // bitmap knows by how much we need to move it as it knows its
3961         // granularity).
3962         assert(_finger < _region_limit, "invariant");
3963         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3964         // Check if bitmap iteration was aborted while scanning the last object
3965         if (new_finger >= _region_limit) {
3966           giveup_current_region();
3967         } else {
3968           move_finger_to(new_finger);
3969         }
3970       }
3971     }
3972     // At this point we have either completed iterating over the
3973     // region we were holding on to, or we have aborted.
3974 
3975     // We then partially drain the local queue and the global stack.
3976     // (Do we really need this?)
3977     drain_local_queue(true);
3978     drain_global_stack(true);
3979 
3980     // Read the note on the claim_region() method on why it might
3981     // return NULL with potentially more regions available for
3982     // claiming and why we have to check out_of_regions() to determine
3983     // whether we're done or not.
3984     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3985       // We are going to try to claim a new region. We should have
3986       // given up on the previous one.
3987       // Separated the asserts so that we know which one fires.
3988       assert(_curr_region  == NULL, "invariant");
3989       assert(_finger       == NULL, "invariant");
3990       assert(_region_limit == NULL, "invariant");
3991       if (_cm->verbose_low()) {
3992         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3993       }
3994       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3995       if (claimed_region != NULL) {
3996         // Yes, we managed to claim one
3997         statsOnly( ++_regions_claimed );
3998 
3999         if (_cm->verbose_low()) {
4000           gclog_or_tty->print_cr("[%d] we successfully claimed "
4001                                  "region "PTR_FORMAT,
4002                                  _task_id, claimed_region);
4003         }
4004 
4005         setup_for_region(claimed_region);
4006         assert(_curr_region == claimed_region, "invariant");
4007       }
4008       // It is important to call the regular clock here. It might take
4009       // a while to claim a region if, for example, we hit a large
4010       // block of empty regions. So we need to call the regular clock
4011       // method once round the loop to make sure it's called
4012       // frequently enough.
4013       regular_clock_call();
4014     }
4015 
4016     if (!has_aborted() && _curr_region == NULL) {
4017       assert(_cm->out_of_regions(),
4018              "at this point we should be out of regions");
4019     }
4020   } while ( _curr_region != NULL && !has_aborted());
4021 
4022   if (!has_aborted()) {
4023     // We cannot check whether the global stack is empty, since other
4024     // tasks might be pushing objects to it concurrently.
4025     assert(_cm->out_of_regions(),
4026            "at this point we should be out of regions");
4027 
4028     if (_cm->verbose_low()) {
4029       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4030     }
4031 
4032     // Try to reduce the number of available SATB buffers so that
4033     // remark has less work to do.
4034     drain_satb_buffers();
4035   }
4036 
4037   // Since we've done everything else, we can now totally drain the
4038   // local queue and global stack.
4039   drain_local_queue(false);
4040   drain_global_stack(false);
4041 
4042   // Attempt at work stealing from other task's queues.
4043   if (do_stealing && !has_aborted()) {
4044     // We have not aborted. This means that we have finished all that
4045     // we could. Let's try to do some stealing...
4046 
4047     // We cannot check whether the global stack is empty, since other
4048     // tasks might be pushing objects to it concurrently.
4049     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4050            "only way to reach here");
4051 
4052     if (_cm->verbose_low()) {
4053       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4054     }
4055 
4056     while (!has_aborted()) {
4057       oop obj;
4058       statsOnly( ++_steal_attempts );
4059 
4060       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4061         if (_cm->verbose_medium()) {
4062           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4063                                  _task_id, (void*) obj);
4064         }
4065 
4066         statsOnly( ++_steals );
4067 
4068         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4069                "any stolen object should be marked");
4070         scan_object(obj);
4071 
4072         // And since we're towards the end, let's totally drain the
4073         // local queue and global stack.
4074         drain_local_queue(false);
4075         drain_global_stack(false);
4076       } else {
4077         break;
4078       }
4079     }
4080   }
4081 
4082   // If we are about to wrap up and go into termination, check if we
4083   // should raise the overflow flag.
4084   if (do_termination && !has_aborted()) {
4085     if (_cm->force_overflow()->should_force()) {
4086       _cm->set_has_overflown();
4087       regular_clock_call();
4088     }
4089   }
4090 
4091   // We still haven't aborted. Now, let's try to get into the
4092   // termination protocol.
4093   if (do_termination && !has_aborted()) {
4094     // We cannot check whether the global stack is empty, since other
4095     // tasks might be concurrently pushing objects on it.
4096     // Separated the asserts so that we know which one fires.
4097     assert(_cm->out_of_regions(), "only way to reach here");
4098     assert(_task_queue->size() == 0, "only way to reach here");
4099 
4100     if (_cm->verbose_low()) {
4101       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4102     }
4103 
4104     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4105     // The CMTask class also extends the TerminatorTerminator class,
4106     // hence its should_exit_termination() method will also decide
4107     // whether to exit the termination protocol or not.
4108     bool finished = _cm->terminator()->offer_termination(this);
4109     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4110     _termination_time_ms +=
4111       termination_end_time_ms - _termination_start_time_ms;
4112 
4113     if (finished) {
4114       // We're all done.
4115 
4116       if (_task_id == 0) {
4117         // let's allow task 0 to do this
4118         if (concurrent()) {
4119           assert(_cm->concurrent_marking_in_progress(), "invariant");
4120           // we need to set this to false before the next
4121           // safepoint. This way we ensure that the marking phase
4122           // doesn't observe any more heap expansions.
4123           _cm->clear_concurrent_marking_in_progress();
4124         }
4125       }
4126 
4127       // We can now guarantee that the global stack is empty, since
4128       // all other tasks have finished. We separated the guarantees so
4129       // that, if a condition is false, we can immediately find out
4130       // which one.
4131       guarantee(_cm->out_of_regions(), "only way to reach here");
4132       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4133       guarantee(_task_queue->size() == 0, "only way to reach here");
4134       guarantee(!_cm->has_overflown(), "only way to reach here");
4135       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4136 
4137       if (_cm->verbose_low()) {
4138         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4139       }
4140     } else {
4141       // Apparently there's more work to do. Let's abort this task. It
4142       // will restart it and we can hopefully find more things to do.
4143 
4144       if (_cm->verbose_low()) {
4145         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4146                                _task_id);
4147       }
4148 
4149       set_has_aborted();
4150       statsOnly( ++_aborted_termination );
4151     }
4152   }
4153 
4154   // Mainly for debugging purposes to make sure that a pointer to the
4155   // closure which was statically allocated in this frame doesn't
4156   // escape it by accident.
4157   set_cm_oop_closure(NULL);
4158   double end_time_ms = os::elapsedVTime() * 1000.0;
4159   double elapsed_time_ms = end_time_ms - _start_time_ms;
4160   // Update the step history.
4161   _step_times_ms.add(elapsed_time_ms);
4162 
4163   if (has_aborted()) {
4164     // The task was aborted for some reason.
4165 
4166     statsOnly( ++_aborted );
4167 
4168     if (_has_timed_out) {
4169       double diff_ms = elapsed_time_ms - _time_target_ms;
4170       // Keep statistics of how well we did with respect to hitting
4171       // our target only if we actually timed out (if we aborted for
4172       // other reasons, then the results might get skewed).
4173       _marking_step_diffs_ms.add(diff_ms);
4174     }
4175 
4176     if (_cm->has_overflown()) {
4177       // This is the interesting one. We aborted because a global
4178       // overflow was raised. This means we have to restart the
4179       // marking phase and start iterating over regions. However, in
4180       // order to do this we have to make sure that all tasks stop
4181       // what they are doing and re-initialise in a safe manner. We
4182       // will achieve this with the use of two barrier sync points.
4183 
4184       if (_cm->verbose_low()) {
4185         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4186       }
4187 
4188       _cm->enter_first_sync_barrier(_task_id);
4189       // When we exit this sync barrier we know that all tasks have
4190       // stopped doing marking work. So, it's now safe to
4191       // re-initialise our data structures. At the end of this method,
4192       // task 0 will clear the global data structures.
4193 
4194       statsOnly( ++_aborted_overflow );
4195 
4196       // We clear the local state of this task...
4197       clear_region_fields();
4198 
4199       // ...and enter the second barrier.
4200       _cm->enter_second_sync_barrier(_task_id);
4201       // At this point everything has bee re-initialised and we're
4202       // ready to restart.
4203     }
4204 
4205     if (_cm->verbose_low()) {
4206       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4207                              "elapsed = %1.2lfms <<<<<<<<<<",
4208                              _task_id, _time_target_ms, elapsed_time_ms);
4209       if (_cm->has_aborted()) {
4210         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4211                                _task_id);
4212       }
4213     }
4214   } else {
4215     if (_cm->verbose_low()) {
4216       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4217                              "elapsed = %1.2lfms <<<<<<<<<<",
4218                              _task_id, _time_target_ms, elapsed_time_ms);
4219     }
4220   }
4221 
4222   _claimed = false;
4223 }
4224 
4225 CMTask::CMTask(int task_id,
4226                ConcurrentMark* cm,
4227                size_t* marked_bytes,
4228                BitMap* card_bm,
4229                CMTaskQueue* task_queue,
4230                CMTaskQueueSet* task_queues)
4231   : _g1h(G1CollectedHeap::heap()),
4232     _task_id(task_id), _cm(cm),
4233     _claimed(false),
4234     _nextMarkBitMap(NULL), _hash_seed(17),
4235     _task_queue(task_queue),
4236     _task_queues(task_queues),
4237     _cm_oop_closure(NULL),
4238     _marked_bytes_array(marked_bytes),
4239     _card_bm(card_bm) {
4240   guarantee(task_queue != NULL, "invariant");
4241   guarantee(task_queues != NULL, "invariant");
4242 
4243   statsOnly( _clock_due_to_scanning = 0;
4244              _clock_due_to_marking  = 0 );
4245 
4246   _marking_step_diffs_ms.add(0.5);
4247 }
4248 
4249 // These are formatting macros that are used below to ensure
4250 // consistent formatting. The *_H_* versions are used to format the
4251 // header for a particular value and they should be kept consistent
4252 // with the corresponding macro. Also note that most of the macros add
4253 // the necessary white space (as a prefix) which makes them a bit
4254 // easier to compose.
4255 
4256 // All the output lines are prefixed with this string to be able to
4257 // identify them easily in a large log file.
4258 #define G1PPRL_LINE_PREFIX            "###"
4259 
4260 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4261 #ifdef _LP64
4262 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4263 #else // _LP64
4264 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4265 #endif // _LP64
4266 
4267 // For per-region info
4268 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4269 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4270 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4271 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4272 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4273 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4274 
4275 // For summary info
4276 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4277 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4278 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4279 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4280 
4281 G1PrintRegionLivenessInfoClosure::
4282 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4283   : _out(out),
4284     _total_used_bytes(0), _total_capacity_bytes(0),
4285     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4286     _hum_used_bytes(0), _hum_capacity_bytes(0),
4287     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4288   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4289   MemRegion g1_committed = g1h->g1_committed();
4290   MemRegion g1_reserved = g1h->g1_reserved();
4291   double now = os::elapsedTime();
4292 
4293   // Print the header of the output.
4294   _out->cr();
4295   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4296   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4297                  G1PPRL_SUM_ADDR_FORMAT("committed")
4298                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4299                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4300                  g1_committed.start(), g1_committed.end(),
4301                  g1_reserved.start(), g1_reserved.end(),
4302                  HeapRegion::GrainBytes);
4303   _out->print_cr(G1PPRL_LINE_PREFIX);
4304   _out->print_cr(G1PPRL_LINE_PREFIX
4305                  G1PPRL_TYPE_H_FORMAT
4306                  G1PPRL_ADDR_BASE_H_FORMAT
4307                  G1PPRL_BYTE_H_FORMAT
4308                  G1PPRL_BYTE_H_FORMAT
4309                  G1PPRL_BYTE_H_FORMAT
4310                  G1PPRL_DOUBLE_H_FORMAT,
4311                  "type", "address-range",
4312                  "used", "prev-live", "next-live", "gc-eff");
4313   _out->print_cr(G1PPRL_LINE_PREFIX
4314                  G1PPRL_TYPE_H_FORMAT
4315                  G1PPRL_ADDR_BASE_H_FORMAT
4316                  G1PPRL_BYTE_H_FORMAT
4317                  G1PPRL_BYTE_H_FORMAT
4318                  G1PPRL_BYTE_H_FORMAT
4319                  G1PPRL_DOUBLE_H_FORMAT,
4320                  "", "",
4321                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4322 }
4323 
4324 // It takes as a parameter a reference to one of the _hum_* fields, it
4325 // deduces the corresponding value for a region in a humongous region
4326 // series (either the region size, or what's left if the _hum_* field
4327 // is < the region size), and updates the _hum_* field accordingly.
4328 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4329   size_t bytes = 0;
4330   // The > 0 check is to deal with the prev and next live bytes which
4331   // could be 0.
4332   if (*hum_bytes > 0) {
4333     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4334     *hum_bytes -= bytes;
4335   }
4336   return bytes;
4337 }
4338 
4339 // It deduces the values for a region in a humongous region series
4340 // from the _hum_* fields and updates those accordingly. It assumes
4341 // that that _hum_* fields have already been set up from the "starts
4342 // humongous" region and we visit the regions in address order.
4343 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4344                                                      size_t* capacity_bytes,
4345                                                      size_t* prev_live_bytes,
4346                                                      size_t* next_live_bytes) {
4347   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4348   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4349   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4350   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4351   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4352 }
4353 
4354 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4355   const char* type = "";
4356   HeapWord* bottom       = r->bottom();
4357   HeapWord* end          = r->end();
4358   size_t capacity_bytes  = r->capacity();
4359   size_t used_bytes      = r->used();
4360   size_t prev_live_bytes = r->live_bytes();
4361   size_t next_live_bytes = r->next_live_bytes();
4362   double gc_eff          = r->gc_efficiency();
4363   if (r->used() == 0) {
4364     type = "FREE";
4365   } else if (r->is_survivor()) {
4366     type = "SURV";
4367   } else if (r->is_young()) {
4368     type = "EDEN";
4369   } else if (r->startsHumongous()) {
4370     type = "HUMS";
4371 
4372     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4373            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4374            "they should have been zeroed after the last time we used them");
4375     // Set up the _hum_* fields.
4376     _hum_capacity_bytes  = capacity_bytes;
4377     _hum_used_bytes      = used_bytes;
4378     _hum_prev_live_bytes = prev_live_bytes;
4379     _hum_next_live_bytes = next_live_bytes;
4380     get_hum_bytes(&used_bytes, &capacity_bytes,
4381                   &prev_live_bytes, &next_live_bytes);
4382     end = bottom + HeapRegion::GrainWords;
4383   } else if (r->continuesHumongous()) {
4384     type = "HUMC";
4385     get_hum_bytes(&used_bytes, &capacity_bytes,
4386                   &prev_live_bytes, &next_live_bytes);
4387     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4388   } else {
4389     type = "OLD";
4390   }
4391 
4392   _total_used_bytes      += used_bytes;
4393   _total_capacity_bytes  += capacity_bytes;
4394   _total_prev_live_bytes += prev_live_bytes;
4395   _total_next_live_bytes += next_live_bytes;
4396 
4397   // Print a line for this particular region.
4398   _out->print_cr(G1PPRL_LINE_PREFIX
4399                  G1PPRL_TYPE_FORMAT
4400                  G1PPRL_ADDR_BASE_FORMAT
4401                  G1PPRL_BYTE_FORMAT
4402                  G1PPRL_BYTE_FORMAT
4403                  G1PPRL_BYTE_FORMAT
4404                  G1PPRL_DOUBLE_FORMAT,
4405                  type, bottom, end,
4406                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4407 
4408   return false;
4409 }
4410 
4411 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4412   // Print the footer of the output.
4413   _out->print_cr(G1PPRL_LINE_PREFIX);
4414   _out->print_cr(G1PPRL_LINE_PREFIX
4415                  " SUMMARY"
4416                  G1PPRL_SUM_MB_FORMAT("capacity")
4417                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4418                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4419                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4420                  bytes_to_mb(_total_capacity_bytes),
4421                  bytes_to_mb(_total_used_bytes),
4422                  perc(_total_used_bytes, _total_capacity_bytes),
4423                  bytes_to_mb(_total_prev_live_bytes),
4424                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4425                  bytes_to_mb(_total_next_live_bytes),
4426                  perc(_total_next_live_bytes, _total_capacity_bytes));
4427   _out->cr();
4428 }