1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 
  46 // Concurrent marking bit map wrapper
  47 
  48 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  49   _bm((uintptr_t*)NULL,0),
  50   _shifter(shifter) {
  51   _bmStartWord = (HeapWord*)(rs.base());
  52   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  53   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  54                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  55 
  56   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  57   // For now we'll just commit all of the bit map up fromt.
  58   // Later on we'll try to be more parsimonious with swap.
  59   guarantee(_virtual_space.initialize(brs, brs.size()),
  60             "couldn't reseve backing store for concurrent marking bit map");
  61   assert(_virtual_space.committed_size() == brs.size(),
  62          "didn't reserve backing store for all of concurrent marking bit map?");
  63   _bm.set_map((uintptr_t*)_virtual_space.low());
  64   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  65          _bmWordSize, "inconsistency in bit map sizing");
  66   _bm.set_size(_bmWordSize >> _shifter);
  67 }
  68 
  69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  70                                                HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   if (limit == NULL) {
  76     limit = _bmStartWord + _bmWordSize;
  77   }
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  88                                                  HeapWord* limit) const {
  89   size_t addrOffset = heapWordToOffset(addr);
  90   if (limit == NULL) {
  91     limit = _bmStartWord + _bmWordSize;
  92   }
  93   size_t limitOffset = heapWordToOffset(limit);
  94   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  95   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  96   assert(nextAddr >= addr, "get_next_one postcondition");
  97   assert(nextAddr == limit || !isMarked(nextAddr),
  98          "get_next_one postcondition");
  99   return nextAddr;
 100 }
 101 
 102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 103   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 104   return (int) (diff >> _shifter);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(ReservedSpace rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(rs.base()) &&
 113          _bmWordSize  == rs.size()>>LogHeapWordSize;
 114 }
 115 #endif
 116 
 117 void CMBitMap::clearAll() {
 118   _bm.clear();
 119   return;
 120 }
 121 
 122 void CMBitMap::markRange(MemRegion mr) {
 123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 124   assert(!mr.is_empty(), "unexpected empty region");
 125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 126           ((HeapWord *) mr.end())),
 127          "markRange memory region end is not card aligned");
 128   // convert address range into offset range
 129   _bm.at_put_range(heapWordToOffset(mr.start()),
 130                    heapWordToOffset(mr.end()), true);
 131 }
 132 
 133 void CMBitMap::clearRange(MemRegion mr) {
 134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 135   assert(!mr.is_empty(), "unexpected empty region");
 136   // convert address range into offset range
 137   _bm.at_put_range(heapWordToOffset(mr.start()),
 138                    heapWordToOffset(mr.end()), false);
 139 }
 140 
 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 142                                             HeapWord* end_addr) {
 143   HeapWord* start = getNextMarkedWordAddress(addr);
 144   start = MIN2(start, end_addr);
 145   HeapWord* end   = getNextUnmarkedWordAddress(start);
 146   end = MIN2(end, end_addr);
 147   assert(start <= end, "Consistency check");
 148   MemRegion mr(start, end);
 149   if (!mr.is_empty()) {
 150     clearRange(mr);
 151   }
 152   return mr;
 153 }
 154 
 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 156   _base(NULL), _cm(cm)
 157 #ifdef ASSERT
 158   , _drain_in_progress(false)
 159   , _drain_in_progress_yields(false)
 160 #endif
 161 {}
 162 
 163 void CMMarkStack::allocate(size_t size) {
 164   _base = NEW_C_HEAP_ARRAY(oop, size);
 165   if (_base == NULL) {
 166     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 167   }
 168   _index = 0;
 169   _capacity = (jint) size;
 170   _saved_index = -1;
 171   NOT_PRODUCT(_max_depth = 0);
 172 }
 173 
 174 CMMarkStack::~CMMarkStack() {
 175   if (_base != NULL) {
 176     FREE_C_HEAP_ARRAY(oop, _base);
 177   }
 178 }
 179 
 180 void CMMarkStack::par_push(oop ptr) {
 181   while (true) {
 182     if (isFull()) {
 183       _overflow = true;
 184       return;
 185     }
 186     // Otherwise...
 187     jint index = _index;
 188     jint next_index = index+1;
 189     jint res = Atomic::cmpxchg(next_index, &_index, index);
 190     if (res == index) {
 191       _base[index] = ptr;
 192       // Note that we don't maintain this atomically.  We could, but it
 193       // doesn't seem necessary.
 194       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 195       return;
 196     }
 197     // Otherwise, we need to try again.
 198   }
 199 }
 200 
 201 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index + n;
 210     if (next_index > _capacity) {
 211       _overflow = true;
 212       return;
 213     }
 214     jint res = Atomic::cmpxchg(next_index, &_index, index);
 215     if (res == index) {
 216       for (int i = 0; i < n; i++) {
 217         int ind = index + i;
 218         assert(ind < _capacity, "By overflow test above.");
 219         _base[ind] = ptr_arr[i];
 220       }
 221       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 222       return;
 223     }
 224     // Otherwise, we need to try again.
 225   }
 226 }
 227 
 228 
 229 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 230   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 231   jint start = _index;
 232   jint next_index = start + n;
 233   if (next_index > _capacity) {
 234     _overflow = true;
 235     return;
 236   }
 237   // Otherwise.
 238   _index = next_index;
 239   for (int i = 0; i < n; i++) {
 240     int ind = start + i;
 241     assert(ind < _capacity, "By overflow test above.");
 242     _base[ind] = ptr_arr[i];
 243   }
 244 }
 245 
 246 
 247 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 248   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 249   jint index = _index;
 250   if (index == 0) {
 251     *n = 0;
 252     return false;
 253   } else {
 254     int k = MIN2(max, index);
 255     jint new_ind = index - k;
 256     for (int j = 0; j < k; j++) {
 257       ptr_arr[j] = _base[new_ind + j];
 258     }
 259     _index = new_ind;
 260     *n = k;
 261     return true;
 262   }
 263 }
 264 
 265 template<class OopClosureClass>
 266 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 267   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 268          || SafepointSynchronize::is_at_safepoint(),
 269          "Drain recursion must be yield-safe.");
 270   bool res = true;
 271   debug_only(_drain_in_progress = true);
 272   debug_only(_drain_in_progress_yields = yield_after);
 273   while (!isEmpty()) {
 274     oop newOop = pop();
 275     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 276     assert(newOop->is_oop(), "Expected an oop");
 277     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 278            "only grey objects on this stack");
 279     newOop->oop_iterate(cl);
 280     if (yield_after && _cm->do_yield_check()) {
 281       res = false;
 282       break;
 283     }
 284   }
 285   debug_only(_drain_in_progress = false);
 286   return res;
 287 }
 288 
 289 void CMMarkStack::note_start_of_gc() {
 290   assert(_saved_index == -1,
 291          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 292   _saved_index = _index;
 293 }
 294 
 295 void CMMarkStack::note_end_of_gc() {
 296   // This is intentionally a guarantee, instead of an assert. If we
 297   // accidentally add something to the mark stack during GC, it
 298   // will be a correctness issue so it's better if we crash. we'll
 299   // only check this once per GC anyway, so it won't be a performance
 300   // issue in any way.
 301   guarantee(_saved_index == _index,
 302             err_msg("saved index: %d index: %d", _saved_index, _index));
 303   _saved_index = -1;
 304 }
 305 
 306 void CMMarkStack::oops_do(OopClosure* f) {
 307   assert(_saved_index == _index,
 308          err_msg("saved index: %d index: %d", _saved_index, _index));
 309   for (int i = 0; i < _index; i += 1) {
 310     f->do_oop(&_base[i]);
 311   }
 312 }
 313 
 314 bool ConcurrentMark::not_yet_marked(oop obj) const {
 315   return (_g1h->is_obj_ill(obj)
 316           || (_g1h->is_in_permanent(obj)
 317               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 318 }
 319 
 320 CMRootRegions::CMRootRegions() :
 321   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 322   _should_abort(false),  _next_survivor(NULL) { }
 323 
 324 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 325   _young_list = g1h->young_list();
 326   _cm = cm;
 327 }
 328 
 329 void CMRootRegions::prepare_for_scan() {
 330   assert(!scan_in_progress(), "pre-condition");
 331 
 332   // Currently, only survivors can be root regions.
 333   assert(_next_survivor == NULL, "pre-condition");
 334   _next_survivor = _young_list->first_survivor_region();
 335   _scan_in_progress = (_next_survivor != NULL);
 336   _should_abort = false;
 337 }
 338 
 339 HeapRegion* CMRootRegions::claim_next() {
 340   if (_should_abort) {
 341     // If someone has set the should_abort flag, we return NULL to
 342     // force the caller to bail out of their loop.
 343     return NULL;
 344   }
 345 
 346   // Currently, only survivors can be root regions.
 347   HeapRegion* res = _next_survivor;
 348   if (res != NULL) {
 349     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 350     // Read it again in case it changed while we were waiting for the lock.
 351     res = _next_survivor;
 352     if (res != NULL) {
 353       if (res == _young_list->last_survivor_region()) {
 354         // We just claimed the last survivor so store NULL to indicate
 355         // that we're done.
 356         _next_survivor = NULL;
 357       } else {
 358         _next_survivor = res->get_next_young_region();
 359       }
 360     } else {
 361       // Someone else claimed the last survivor while we were trying
 362       // to take the lock so nothing else to do.
 363     }
 364   }
 365   assert(res == NULL || res->is_survivor(), "post-condition");
 366 
 367   return res;
 368 }
 369 
 370 void CMRootRegions::scan_finished() {
 371   assert(scan_in_progress(), "pre-condition");
 372 
 373   // Currently, only survivors can be root regions.
 374   if (!_should_abort) {
 375     assert(_next_survivor == NULL, "we should have claimed all survivors");
 376   }
 377   _next_survivor = NULL;
 378 
 379   {
 380     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 381     _scan_in_progress = false;
 382     RootRegionScan_lock->notify_all();
 383   }
 384 }
 385 
 386 bool CMRootRegions::wait_until_scan_finished() {
 387   if (!scan_in_progress()) return false;
 388 
 389   {
 390     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 391     while (scan_in_progress()) {
 392       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 393     }
 394   }
 395   return true;
 396 }
 397 
 398 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 399 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 400 #endif // _MSC_VER
 401 
 402 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 403   return MAX2((n_par_threads + 2) / 4, 1U);
 404 }
 405 
 406 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 407   _markBitMap1(rs, MinObjAlignment - 1),
 408   _markBitMap2(rs, MinObjAlignment - 1),
 409 
 410   _parallel_marking_threads(0),
 411   _max_parallel_marking_threads(0),
 412   _sleep_factor(0.0),
 413   _marking_task_overhead(1.0),
 414   _cleanup_sleep_factor(0.0),
 415   _cleanup_task_overhead(1.0),
 416   _cleanup_list("Cleanup List"),
 417   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 418   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 419            CardTableModRefBS::card_shift,
 420            false /* in_resource_area*/),
 421 
 422   _prevMarkBitMap(&_markBitMap1),
 423   _nextMarkBitMap(&_markBitMap2),
 424 
 425   _markStack(this),
 426   // _finger set in set_non_marking_state
 427 
 428   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 429   // _active_tasks set in set_non_marking_state
 430   // _tasks set inside the constructor
 431   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 432   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 433 
 434   _has_overflown(false),
 435   _concurrent(false),
 436   _has_aborted(false),
 437   _restart_for_overflow(false),
 438   _concurrent_marking_in_progress(false),
 439 
 440   // _verbose_level set below
 441 
 442   _init_times(),
 443   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 444   _cleanup_times(),
 445   _total_counting_time(0.0),
 446   _total_rs_scrub_time(0.0),
 447 
 448   _parallel_workers(NULL),
 449 
 450   _count_card_bitmaps(NULL),
 451   _count_marked_bytes(NULL) {
 452   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 453   if (verbose_level < no_verbose) {
 454     verbose_level = no_verbose;
 455   }
 456   if (verbose_level > high_verbose) {
 457     verbose_level = high_verbose;
 458   }
 459   _verbose_level = verbose_level;
 460 
 461   if (verbose_low()) {
 462     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 463                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 464   }
 465 
 466   _markStack.allocate(MarkStackSize);
 467 
 468   // Create & start a ConcurrentMark thread.
 469   _cmThread = new ConcurrentMarkThread(this);
 470   assert(cmThread() != NULL, "CM Thread should have been created");
 471   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 472 
 473   _g1h = G1CollectedHeap::heap();
 474   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 475   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 476   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 477 
 478   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 479   satb_qs.set_buffer_size(G1SATBBufferSize);
 480 
 481   _root_regions.init(_g1h, this);
 482 
 483   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 484   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 485 
 486   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num);
 487   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num);
 488 
 489   BitMap::idx_t card_bm_size = _card_bm.size();
 490 
 491   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 492   _active_tasks = _max_task_num;
 493   for (int i = 0; i < (int) _max_task_num; ++i) {
 494     CMTaskQueue* task_queue = new CMTaskQueue();
 495     task_queue->initialize();
 496     _task_queues->register_queue(i, task_queue);
 497 
 498     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 499     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions);
 500 
 501     _tasks[i] = new CMTask(i, this,
 502                            _count_marked_bytes[i],
 503                            &_count_card_bitmaps[i],
 504                            task_queue, _task_queues);
 505 
 506     _accum_task_vtime[i] = 0.0;
 507   }
 508 
 509   // Calculate the card number for the bottom of the heap. Used
 510   // in biasing indexes into the accounting card bitmaps.
 511   _heap_bottom_card_num =
 512     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 513                                 CardTableModRefBS::card_shift);
 514 
 515   // Clear all the liveness counting data
 516   clear_all_count_data();
 517 
 518   if (ConcGCThreads > ParallelGCThreads) {
 519     vm_exit_during_initialization("Can't have more ConcGCThreads "
 520                                   "than ParallelGCThreads.");
 521   }
 522   if (ParallelGCThreads == 0) {
 523     // if we are not running with any parallel GC threads we will not
 524     // spawn any marking threads either
 525     _parallel_marking_threads =       0;
 526     _max_parallel_marking_threads =   0;
 527     _sleep_factor             =     0.0;
 528     _marking_task_overhead    =     1.0;
 529   } else {
 530     if (ConcGCThreads > 0) {
 531       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 532       // if both are set
 533 
 534       _parallel_marking_threads = (uint) ConcGCThreads;
 535       _max_parallel_marking_threads = _parallel_marking_threads;
 536       _sleep_factor             = 0.0;
 537       _marking_task_overhead    = 1.0;
 538     } else if (G1MarkingOverheadPercent > 0) {
 539       // we will calculate the number of parallel marking threads
 540       // based on a target overhead with respect to the soft real-time
 541       // goal
 542 
 543       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 544       double overall_cm_overhead =
 545         (double) MaxGCPauseMillis * marking_overhead /
 546         (double) GCPauseIntervalMillis;
 547       double cpu_ratio = 1.0 / (double) os::processor_count();
 548       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 549       double marking_task_overhead =
 550         overall_cm_overhead / marking_thread_num *
 551                                                 (double) os::processor_count();
 552       double sleep_factor =
 553                          (1.0 - marking_task_overhead) / marking_task_overhead;
 554 
 555       _parallel_marking_threads = (uint) marking_thread_num;
 556       _max_parallel_marking_threads = _parallel_marking_threads;
 557       _sleep_factor             = sleep_factor;
 558       _marking_task_overhead    = marking_task_overhead;
 559     } else {
 560       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 561       _max_parallel_marking_threads = _parallel_marking_threads;
 562       _sleep_factor             = 0.0;
 563       _marking_task_overhead    = 1.0;
 564     }
 565 
 566     if (parallel_marking_threads() > 1) {
 567       _cleanup_task_overhead = 1.0;
 568     } else {
 569       _cleanup_task_overhead = marking_task_overhead();
 570     }
 571     _cleanup_sleep_factor =
 572                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 573 
 574 #if 0
 575     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 576     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 577     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 578     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 579     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 580 #endif
 581 
 582     guarantee(parallel_marking_threads() > 0, "peace of mind");
 583     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 584          _max_parallel_marking_threads, false, true);
 585     if (_parallel_workers == NULL) {
 586       vm_exit_during_initialization("Failed necessary allocation.");
 587     } else {
 588       _parallel_workers->initialize_workers();
 589     }
 590   }
 591 
 592   // so that the call below can read a sensible value
 593   _heap_start = (HeapWord*) rs.base();
 594   set_non_marking_state();
 595 }
 596 
 597 void ConcurrentMark::update_g1_committed(bool force) {
 598   // If concurrent marking is not in progress, then we do not need to
 599   // update _heap_end.
 600   if (!concurrent_marking_in_progress() && !force) return;
 601 
 602   MemRegion committed = _g1h->g1_committed();
 603   assert(committed.start() == _heap_start, "start shouldn't change");
 604   HeapWord* new_end = committed.end();
 605   if (new_end > _heap_end) {
 606     // The heap has been expanded.
 607 
 608     _heap_end = new_end;
 609   }
 610   // Notice that the heap can also shrink. However, this only happens
 611   // during a Full GC (at least currently) and the entire marking
 612   // phase will bail out and the task will not be restarted. So, let's
 613   // do nothing.
 614 }
 615 
 616 void ConcurrentMark::reset() {
 617   // Starting values for these two. This should be called in a STW
 618   // phase. CM will be notified of any future g1_committed expansions
 619   // will be at the end of evacuation pauses, when tasks are
 620   // inactive.
 621   MemRegion committed = _g1h->g1_committed();
 622   _heap_start = committed.start();
 623   _heap_end   = committed.end();
 624 
 625   // Separated the asserts so that we know which one fires.
 626   assert(_heap_start != NULL, "heap bounds should look ok");
 627   assert(_heap_end != NULL, "heap bounds should look ok");
 628   assert(_heap_start < _heap_end, "heap bounds should look ok");
 629 
 630   // reset all the marking data structures and any necessary flags
 631   clear_marking_state();
 632 
 633   if (verbose_low()) {
 634     gclog_or_tty->print_cr("[global] resetting");
 635   }
 636 
 637   // We do reset all of them, since different phases will use
 638   // different number of active threads. So, it's easiest to have all
 639   // of them ready.
 640   for (int i = 0; i < (int) _max_task_num; ++i) {
 641     _tasks[i]->reset(_nextMarkBitMap);
 642   }
 643 
 644   // we need this to make sure that the flag is on during the evac
 645   // pause with initial mark piggy-backed
 646   set_concurrent_marking_in_progress();
 647 }
 648 
 649 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 650   assert(active_tasks <= _max_task_num, "we should not have more");
 651 
 652   _active_tasks = active_tasks;
 653   // Need to update the three data structures below according to the
 654   // number of active threads for this phase.
 655   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 656   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 657   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658 
 659   _concurrent = concurrent;
 660   // We propagate this to all tasks, not just the active ones.
 661   for (int i = 0; i < (int) _max_task_num; ++i)
 662     _tasks[i]->set_concurrent(concurrent);
 663 
 664   if (concurrent) {
 665     set_concurrent_marking_in_progress();
 666   } else {
 667     // We currently assume that the concurrent flag has been set to
 668     // false before we start remark. At this point we should also be
 669     // in a STW phase.
 670     assert(!concurrent_marking_in_progress(), "invariant");
 671     assert(_finger == _heap_end, "only way to get here");
 672     update_g1_committed(true);
 673   }
 674 }
 675 
 676 void ConcurrentMark::set_non_marking_state() {
 677   // We set the global marking state to some default values when we're
 678   // not doing marking.
 679   clear_marking_state();
 680   _active_tasks = 0;
 681   clear_concurrent_marking_in_progress();
 682 }
 683 
 684 ConcurrentMark::~ConcurrentMark() {
 685   // The ConcurrentMark instance is never freed.
 686   ShouldNotReachHere();
 687 }
 688 
 689 void ConcurrentMark::clearNextBitmap() {
 690   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 691   G1CollectorPolicy* g1p = g1h->g1_policy();
 692 
 693   // Make sure that the concurrent mark thread looks to still be in
 694   // the current cycle.
 695   guarantee(cmThread()->during_cycle(), "invariant");
 696 
 697   // We are finishing up the current cycle by clearing the next
 698   // marking bitmap and getting it ready for the next cycle. During
 699   // this time no other cycle can start. So, let's make sure that this
 700   // is the case.
 701   guarantee(!g1h->mark_in_progress(), "invariant");
 702 
 703   // clear the mark bitmap (no grey objects to start with).
 704   // We need to do this in chunks and offer to yield in between
 705   // each chunk.
 706   HeapWord* start  = _nextMarkBitMap->startWord();
 707   HeapWord* end    = _nextMarkBitMap->endWord();
 708   HeapWord* cur    = start;
 709   size_t chunkSize = M;
 710   while (cur < end) {
 711     HeapWord* next = cur + chunkSize;
 712     if (next > end) {
 713       next = end;
 714     }
 715     MemRegion mr(cur,next);
 716     _nextMarkBitMap->clearRange(mr);
 717     cur = next;
 718     do_yield_check();
 719 
 720     // Repeat the asserts from above. We'll do them as asserts here to
 721     // minimize their overhead on the product. However, we'll have
 722     // them as guarantees at the beginning / end of the bitmap
 723     // clearing to get some checking in the product.
 724     assert(cmThread()->during_cycle(), "invariant");
 725     assert(!g1h->mark_in_progress(), "invariant");
 726   }
 727 
 728   // Clear the liveness counting data
 729   clear_all_count_data();
 730 
 731   // Repeat the asserts from above.
 732   guarantee(cmThread()->during_cycle(), "invariant");
 733   guarantee(!g1h->mark_in_progress(), "invariant");
 734 }
 735 
 736 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 737 public:
 738   bool doHeapRegion(HeapRegion* r) {
 739     if (!r->continuesHumongous()) {
 740       r->note_start_of_marking();
 741     }
 742     return false;
 743   }
 744 };
 745 
 746 void ConcurrentMark::checkpointRootsInitialPre() {
 747   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 748   G1CollectorPolicy* g1p = g1h->g1_policy();
 749 
 750   _has_aborted = false;
 751 
 752 #ifndef PRODUCT
 753   if (G1PrintReachableAtInitialMark) {
 754     print_reachable("at-cycle-start",
 755                     VerifyOption_G1UsePrevMarking, true /* all */);
 756   }
 757 #endif
 758 
 759   // Initialise marking structures. This has to be done in a STW phase.
 760   reset();
 761 
 762   // For each region note start of marking.
 763   NoteStartOfMarkHRClosure startcl;
 764   g1h->heap_region_iterate(&startcl);
 765 }
 766 
 767 
 768 void ConcurrentMark::checkpointRootsInitialPost() {
 769   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 770 
 771   // If we force an overflow during remark, the remark operation will
 772   // actually abort and we'll restart concurrent marking. If we always
 773   // force an oveflow during remark we'll never actually complete the
 774   // marking phase. So, we initilize this here, at the start of the
 775   // cycle, so that at the remaining overflow number will decrease at
 776   // every remark and we'll eventually not need to cause one.
 777   force_overflow_stw()->init();
 778 
 779   // Start Concurrent Marking weak-reference discovery.
 780   ReferenceProcessor* rp = g1h->ref_processor_cm();
 781   // enable ("weak") refs discovery
 782   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 783   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 784 
 785   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 786   // This is the start of  the marking cycle, we're expected all
 787   // threads to have SATB queues with active set to false.
 788   satb_mq_set.set_active_all_threads(true, /* new active value */
 789                                      false /* expected_active */);
 790 
 791   _root_regions.prepare_for_scan();
 792 
 793   // update_g1_committed() will be called at the end of an evac pause
 794   // when marking is on. So, it's also called at the end of the
 795   // initial-mark pause to update the heap end, if the heap expands
 796   // during it. No need to call it here.
 797 }
 798 
 799 /*
 800  * Notice that in the next two methods, we actually leave the STS
 801  * during the barrier sync and join it immediately afterwards. If we
 802  * do not do this, the following deadlock can occur: one thread could
 803  * be in the barrier sync code, waiting for the other thread to also
 804  * sync up, whereas another one could be trying to yield, while also
 805  * waiting for the other threads to sync up too.
 806  *
 807  * Note, however, that this code is also used during remark and in
 808  * this case we should not attempt to leave / enter the STS, otherwise
 809  * we'll either hit an asseert (debug / fastdebug) or deadlock
 810  * (product). So we should only leave / enter the STS if we are
 811  * operating concurrently.
 812  *
 813  * Because the thread that does the sync barrier has left the STS, it
 814  * is possible to be suspended for a Full GC or an evacuation pause
 815  * could occur. This is actually safe, since the entering the sync
 816  * barrier is one of the last things do_marking_step() does, and it
 817  * doesn't manipulate any data structures afterwards.
 818  */
 819 
 820 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 821   if (verbose_low()) {
 822     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 823   }
 824 
 825   if (concurrent()) {
 826     ConcurrentGCThread::stsLeave();
 827   }
 828   _first_overflow_barrier_sync.enter();
 829   if (concurrent()) {
 830     ConcurrentGCThread::stsJoin();
 831   }
 832   // at this point everyone should have synced up and not be doing any
 833   // more work
 834 
 835   if (verbose_low()) {
 836     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 837   }
 838 
 839   // let task 0 do this
 840   if (task_num == 0) {
 841     // task 0 is responsible for clearing the global data structures
 842     // We should be here because of an overflow. During STW we should
 843     // not clear the overflow flag since we rely on it being true when
 844     // we exit this method to abort the pause and restart concurent
 845     // marking.
 846     clear_marking_state(concurrent() /* clear_overflow */);
 847     force_overflow()->update();
 848 
 849     if (G1Log::fine()) {
 850       gclog_or_tty->date_stamp(PrintGCDateStamps);
 851       gclog_or_tty->stamp(PrintGCTimeStamps);
 852       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 853     }
 854   }
 855 
 856   // after this, each task should reset its own data structures then
 857   // then go into the second barrier
 858 }
 859 
 860 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 861   if (verbose_low()) {
 862     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 863   }
 864 
 865   if (concurrent()) {
 866     ConcurrentGCThread::stsLeave();
 867   }
 868   _second_overflow_barrier_sync.enter();
 869   if (concurrent()) {
 870     ConcurrentGCThread::stsJoin();
 871   }
 872   // at this point everything should be re-initialised and ready to go
 873 
 874   if (verbose_low()) {
 875     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 876   }
 877 }
 878 
 879 #ifndef PRODUCT
 880 void ForceOverflowSettings::init() {
 881   _num_remaining = G1ConcMarkForceOverflow;
 882   _force = false;
 883   update();
 884 }
 885 
 886 void ForceOverflowSettings::update() {
 887   if (_num_remaining > 0) {
 888     _num_remaining -= 1;
 889     _force = true;
 890   } else {
 891     _force = false;
 892   }
 893 }
 894 
 895 bool ForceOverflowSettings::should_force() {
 896   if (_force) {
 897     _force = false;
 898     return true;
 899   } else {
 900     return false;
 901   }
 902 }
 903 #endif // !PRODUCT
 904 
 905 class CMConcurrentMarkingTask: public AbstractGangTask {
 906 private:
 907   ConcurrentMark*       _cm;
 908   ConcurrentMarkThread* _cmt;
 909 
 910 public:
 911   void work(uint worker_id) {
 912     assert(Thread::current()->is_ConcurrentGC_thread(),
 913            "this should only be done by a conc GC thread");
 914     ResourceMark rm;
 915 
 916     double start_vtime = os::elapsedVTime();
 917 
 918     ConcurrentGCThread::stsJoin();
 919 
 920     assert(worker_id < _cm->active_tasks(), "invariant");
 921     CMTask* the_task = _cm->task(worker_id);
 922     the_task->record_start_time();
 923     if (!_cm->has_aborted()) {
 924       do {
 925         double start_vtime_sec = os::elapsedVTime();
 926         double start_time_sec = os::elapsedTime();
 927         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 928 
 929         the_task->do_marking_step(mark_step_duration_ms,
 930                                   true /* do_stealing    */,
 931                                   true /* do_termination */);
 932 
 933         double end_time_sec = os::elapsedTime();
 934         double end_vtime_sec = os::elapsedVTime();
 935         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 936         double elapsed_time_sec = end_time_sec - start_time_sec;
 937         _cm->clear_has_overflown();
 938 
 939         bool ret = _cm->do_yield_check(worker_id);
 940 
 941         jlong sleep_time_ms;
 942         if (!_cm->has_aborted() && the_task->has_aborted()) {
 943           sleep_time_ms =
 944             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 945           ConcurrentGCThread::stsLeave();
 946           os::sleep(Thread::current(), sleep_time_ms, false);
 947           ConcurrentGCThread::stsJoin();
 948         }
 949         double end_time2_sec = os::elapsedTime();
 950         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 951 
 952 #if 0
 953           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 954                                  "overhead %1.4lf",
 955                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 956                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 957           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 958                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 959 #endif
 960       } while (!_cm->has_aborted() && the_task->has_aborted());
 961     }
 962     the_task->record_end_time();
 963     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 964 
 965     ConcurrentGCThread::stsLeave();
 966 
 967     double end_vtime = os::elapsedVTime();
 968     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 969   }
 970 
 971   CMConcurrentMarkingTask(ConcurrentMark* cm,
 972                           ConcurrentMarkThread* cmt) :
 973       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 974 
 975   ~CMConcurrentMarkingTask() { }
 976 };
 977 
 978 // Calculates the number of active workers for a concurrent
 979 // phase.
 980 uint ConcurrentMark::calc_parallel_marking_threads() {
 981   if (G1CollectedHeap::use_parallel_gc_threads()) {
 982     uint n_conc_workers = 0;
 983     if (!UseDynamicNumberOfGCThreads ||
 984         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 985          !ForceDynamicNumberOfGCThreads)) {
 986       n_conc_workers = max_parallel_marking_threads();
 987     } else {
 988       n_conc_workers =
 989         AdaptiveSizePolicy::calc_default_active_workers(
 990                                      max_parallel_marking_threads(),
 991                                      1, /* Minimum workers */
 992                                      parallel_marking_threads(),
 993                                      Threads::number_of_non_daemon_threads());
 994       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 995       // that scaling has already gone into "_max_parallel_marking_threads".
 996     }
 997     assert(n_conc_workers > 0, "Always need at least 1");
 998     return n_conc_workers;
 999   }
1000   // If we are not running with any parallel GC threads we will not
1001   // have spawned any marking threads either. Hence the number of
1002   // concurrent workers should be 0.
1003   return 0;
1004 }
1005 
1006 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1007   // Currently, only survivors can be root regions.
1008   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1009   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1010 
1011   const uintx interval = PrefetchScanIntervalInBytes;
1012   HeapWord* curr = hr->bottom();
1013   const HeapWord* end = hr->top();
1014   while (curr < end) {
1015     Prefetch::read(curr, interval);
1016     oop obj = oop(curr);
1017     int size = obj->oop_iterate(&cl);
1018     assert(size == obj->size(), "sanity");
1019     curr += size;
1020   }
1021 }
1022 
1023 class CMRootRegionScanTask : public AbstractGangTask {
1024 private:
1025   ConcurrentMark* _cm;
1026 
1027 public:
1028   CMRootRegionScanTask(ConcurrentMark* cm) :
1029     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1030 
1031   void work(uint worker_id) {
1032     assert(Thread::current()->is_ConcurrentGC_thread(),
1033            "this should only be done by a conc GC thread");
1034 
1035     CMRootRegions* root_regions = _cm->root_regions();
1036     HeapRegion* hr = root_regions->claim_next();
1037     while (hr != NULL) {
1038       _cm->scanRootRegion(hr, worker_id);
1039       hr = root_regions->claim_next();
1040     }
1041   }
1042 };
1043 
1044 void ConcurrentMark::scanRootRegions() {
1045   // scan_in_progress() will have been set to true only if there was
1046   // at least one root region to scan. So, if it's false, we
1047   // should not attempt to do any further work.
1048   if (root_regions()->scan_in_progress()) {
1049     _parallel_marking_threads = calc_parallel_marking_threads();
1050     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1051            "Maximum number of marking threads exceeded");
1052     uint active_workers = MAX2(1U, parallel_marking_threads());
1053 
1054     CMRootRegionScanTask task(this);
1055     if (parallel_marking_threads() > 0) {
1056       _parallel_workers->set_active_workers((int) active_workers);
1057       _parallel_workers->run_task(&task);
1058     } else {
1059       task.work(0);
1060     }
1061 
1062     // It's possible that has_aborted() is true here without actually
1063     // aborting the survivor scan earlier. This is OK as it's
1064     // mainly used for sanity checking.
1065     root_regions()->scan_finished();
1066   }
1067 }
1068 
1069 void ConcurrentMark::markFromRoots() {
1070   // we might be tempted to assert that:
1071   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1072   //        "inconsistent argument?");
1073   // However that wouldn't be right, because it's possible that
1074   // a safepoint is indeed in progress as a younger generation
1075   // stop-the-world GC happens even as we mark in this generation.
1076 
1077   _restart_for_overflow = false;
1078   force_overflow_conc()->init();
1079 
1080   // _g1h has _n_par_threads
1081   _parallel_marking_threads = calc_parallel_marking_threads();
1082   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1083     "Maximum number of marking threads exceeded");
1084 
1085   uint active_workers = MAX2(1U, parallel_marking_threads());
1086 
1087   // Parallel task terminator is set in "set_phase()"
1088   set_phase(active_workers, true /* concurrent */);
1089 
1090   CMConcurrentMarkingTask markingTask(this, cmThread());
1091   if (parallel_marking_threads() > 0) {
1092     _parallel_workers->set_active_workers((int)active_workers);
1093     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1094     // and the decisions on that MT processing is made elsewhere.
1095     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1096     _parallel_workers->run_task(&markingTask);
1097   } else {
1098     markingTask.work(0);
1099   }
1100   print_stats();
1101 }
1102 
1103 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1104   // world is stopped at this checkpoint
1105   assert(SafepointSynchronize::is_at_safepoint(),
1106          "world should be stopped");
1107 
1108   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1109 
1110   // If a full collection has happened, we shouldn't do this.
1111   if (has_aborted()) {
1112     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1113     return;
1114   }
1115 
1116   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1117 
1118   if (VerifyDuringGC) {
1119     HandleMark hm;  // handle scope
1120     gclog_or_tty->print(" VerifyDuringGC:(before)");
1121     Universe::heap()->prepare_for_verify();
1122     Universe::verify(/* silent      */ false,
1123                      /* option      */ VerifyOption_G1UsePrevMarking);
1124   }
1125 
1126   G1CollectorPolicy* g1p = g1h->g1_policy();
1127   g1p->record_concurrent_mark_remark_start();
1128 
1129   double start = os::elapsedTime();
1130 
1131   checkpointRootsFinalWork();
1132 
1133   double mark_work_end = os::elapsedTime();
1134 
1135   weakRefsWork(clear_all_soft_refs);
1136 
1137   if (has_overflown()) {
1138     // Oops.  We overflowed.  Restart concurrent marking.
1139     _restart_for_overflow = true;
1140     // Clear the flag. We do not need it any more.
1141     clear_has_overflown();
1142     if (G1TraceMarkStackOverflow) {
1143       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1144     }
1145   } else {
1146     // Aggregate the per-task counting data that we have accumulated
1147     // while marking.
1148     aggregate_count_data();
1149 
1150     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1151     // We're done with marking.
1152     // This is the end of  the marking cycle, we're expected all
1153     // threads to have SATB queues with active set to true.
1154     satb_mq_set.set_active_all_threads(false, /* new active value */
1155                                        true /* expected_active */);
1156 
1157     if (VerifyDuringGC) {
1158       HandleMark hm;  // handle scope
1159       gclog_or_tty->print(" VerifyDuringGC:(after)");
1160       Universe::heap()->prepare_for_verify();
1161       Universe::verify(/* silent      */ false,
1162                        /* option      */ VerifyOption_G1UseNextMarking);
1163     }
1164     assert(!restart_for_overflow(), "sanity");
1165   }
1166 
1167   // Reset the marking state if marking completed
1168   if (!restart_for_overflow()) {
1169     set_non_marking_state();
1170   }
1171 
1172 #if VERIFY_OBJS_PROCESSED
1173   _scan_obj_cl.objs_processed = 0;
1174   ThreadLocalObjQueue::objs_enqueued = 0;
1175 #endif
1176 
1177   // Statistics
1178   double now = os::elapsedTime();
1179   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1180   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1181   _remark_times.add((now - start) * 1000.0);
1182 
1183   g1p->record_concurrent_mark_remark_end();
1184 }
1185 
1186 // Base class of the closures that finalize and verify the
1187 // liveness counting data.
1188 class CMCountDataClosureBase: public HeapRegionClosure {
1189 protected:
1190   ConcurrentMark* _cm;
1191   BitMap* _region_bm;
1192   BitMap* _card_bm;
1193 
1194   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1195     assert(start_idx <= last_idx, "sanity");
1196 
1197     // Set the inclusive bit range [start_idx, last_idx].
1198     // For small ranges (up to 8 cards) use a simple loop; otherwise
1199     // use par_at_put_range.
1200     if ((last_idx - start_idx) < 8) {
1201       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1202         _card_bm->par_set_bit(i);
1203       }
1204     } else {
1205       assert(last_idx < _card_bm->size(), "sanity");
1206       // Note BitMap::par_at_put_range() is exclusive.
1207       _card_bm->par_at_put_range(start_idx, last_idx+1, true);
1208     }
1209   }
1210 
1211   // It takes a region that's not empty (i.e., it has at least one
1212   // live object in it and sets its corresponding bit on the region
1213   // bitmap to 1. If the region is "starts humongous" it will also set
1214   // to 1 the bits on the region bitmap that correspond to its
1215   // associated "continues humongous" regions.
1216   void set_bit_for_region(HeapRegion* hr) {
1217     assert(!hr->continuesHumongous(), "should have filtered those out");
1218 
1219     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1220     if (!hr->startsHumongous()) {
1221       // Normal (non-humongous) case: just set the bit.
1222       _region_bm->par_at_put(index, true);
1223     } else {
1224       // Starts humongous case: calculate how many regions are part of
1225       // this humongous region and then set the bit range.
1226       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1227       HeapRegion *last_hr = g1h->heap_region_containing_raw(hr->end() - 1);
1228       BitMap::idx_t end_index = (BitMap::idx_t) last_hr->hrs_index() + 1;
1229       _region_bm->par_at_put_range(index, end_index, true);
1230     }
1231   }
1232 
1233 public:
1234   CMCountDataClosureBase(ConcurrentMark *cm,
1235                          BitMap* region_bm, BitMap* card_bm):
1236     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
1237 };
1238 
1239 // Closure that calculates the # live objects per region. Used
1240 // for verification purposes during the cleanup pause.
1241 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1242   CMBitMapRO* _bm;
1243   size_t _region_marked_bytes;
1244 
1245 public:
1246   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1247                          BitMap* region_bm, BitMap* card_bm) :
1248     CMCountDataClosureBase(cm, region_bm, card_bm),
1249     _bm(bm), _region_marked_bytes(0) { }
1250 
1251   bool doHeapRegion(HeapRegion* hr) {
1252 
1253     if (hr->continuesHumongous()) {
1254       // We will ignore these here and process them when their
1255       // associated "starts humongous" region is processed (see
1256       // set_bit_for_heap_region()). Note that we cannot rely on their
1257       // associated "starts humongous" region to have their bit set to
1258       // 1 since, due to the region chunking in the parallel region
1259       // iteration, a "continues humongous" region might be visited
1260       // before its associated "starts humongous".
1261       return false;
1262     }
1263 
1264     HeapWord* nextTop = hr->next_top_at_mark_start();
1265     HeapWord* start   = hr->bottom();
1266 
1267     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1268            err_msg("Preconditions not met - "
1269                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1270                    start, nextTop, hr->end()));
1271 
1272     // Find the first marked object at or after "start".
1273     start = _bm->getNextMarkedWordAddress(start, nextTop);
1274 
1275     size_t marked_bytes = 0;
1276 
1277     while (start < nextTop) {
1278       oop obj = oop(start);
1279       int obj_sz = obj->size();
1280       HeapWord* obj_last = start + obj_sz - 1;
1281 
1282       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1283       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
1284 
1285       // Set the bits in the card BM for this object (inclusive).
1286       set_card_bitmap_range(start_idx, last_idx);
1287 
1288       // Add the size of this object to the number of marked bytes.
1289       marked_bytes += (size_t)obj_sz * HeapWordSize;
1290 
1291       // Find the next marked object after this one.
1292       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1293     }
1294 
1295     // Mark the allocated-since-marking portion...
1296     HeapWord* top = hr->top();
1297     if (nextTop < top) {
1298       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1299       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1300 
1301       set_card_bitmap_range(start_idx, last_idx);
1302 
1303       // This definitely means the region has live objects.
1304       set_bit_for_region(hr);
1305     }
1306 
1307     // Update the live region bitmap.
1308     if (marked_bytes > 0) {
1309       set_bit_for_region(hr);
1310     }
1311 
1312     // Set the marked bytes for the current region so that
1313     // it can be queried by a calling verificiation routine
1314     _region_marked_bytes = marked_bytes;
1315 
1316     return false;
1317   }
1318 
1319   size_t region_marked_bytes() const { return _region_marked_bytes; }
1320 };
1321 
1322 // Heap region closure used for verifying the counting data
1323 // that was accumulated concurrently and aggregated during
1324 // the remark pause. This closure is applied to the heap
1325 // regions during the STW cleanup pause.
1326 
1327 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1328   ConcurrentMark* _cm;
1329   CalcLiveObjectsClosure _calc_cl;
1330   BitMap* _region_bm;   // Region BM to be verified
1331   BitMap* _card_bm;     // Card BM to be verified
1332   bool _verbose;        // verbose output?
1333 
1334   BitMap* _exp_region_bm; // Expected Region BM values
1335   BitMap* _exp_card_bm;   // Expected card BM values
1336 
1337   int _failures;
1338 
1339 public:
1340   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1341                                 BitMap* region_bm,
1342                                 BitMap* card_bm,
1343                                 BitMap* exp_region_bm,
1344                                 BitMap* exp_card_bm,
1345                                 bool verbose) :
1346     _cm(cm),
1347     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1348     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1349     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1350     _failures(0) { }
1351 
1352   int failures() const { return _failures; }
1353 
1354   bool doHeapRegion(HeapRegion* hr) {
1355     if (hr->continuesHumongous()) {
1356       // We will ignore these here and process them when their
1357       // associated "starts humongous" region is processed (see
1358       // set_bit_for_heap_region()). Note that we cannot rely on their
1359       // associated "starts humongous" region to have their bit set to
1360       // 1 since, due to the region chunking in the parallel region
1361       // iteration, a "continues humongous" region might be visited
1362       // before its associated "starts humongous".
1363       return false;
1364     }
1365 
1366     int failures = 0;
1367 
1368     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1369     // this region and set the corresponding bits in the expected region
1370     // and card bitmaps.
1371     bool res = _calc_cl.doHeapRegion(hr);
1372     assert(res == false, "should be continuing");
1373 
1374     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1375                     Mutex::_no_safepoint_check_flag);
1376 
1377     // Verify the marked bytes for this region.
1378     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1379     size_t act_marked_bytes = hr->next_marked_bytes();
1380 
1381     // We're not OK if expected marked bytes > actual marked bytes. It means
1382     // we have missed accounting some objects during the actual marking.
1383     if (exp_marked_bytes > act_marked_bytes) {
1384       if (_verbose) {
1385         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1386                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1387                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1388       }
1389       failures += 1;
1390     }
1391 
1392     // Verify the bit, for this region, in the actual and expected
1393     // (which was just calculated) region bit maps.
1394     // We're not OK if the bit in the calculated expected region
1395     // bitmap is set and the bit in the actual region bitmap is not.
1396     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1397 
1398     bool expected = _exp_region_bm->at(index);
1399     bool actual = _region_bm->at(index);
1400     if (expected && !actual) {
1401       if (_verbose) {
1402         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1403                                "expected: %s, actual: %s",
1404                                hr->hrs_index(),
1405                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1406       }
1407       failures += 1;
1408     }
1409 
1410     // Verify that the card bit maps for the cards spanned by the current
1411     // region match. We have an error if we have a set bit in the expected
1412     // bit map and the corresponding bit in the actual bitmap is not set.
1413 
1414     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1415     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1416 
1417     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1418       expected = _exp_card_bm->at(i);
1419       actual = _card_bm->at(i);
1420 
1421       if (expected && !actual) {
1422         if (_verbose) {
1423           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1424                                  "expected: %s, actual: %s",
1425                                  hr->hrs_index(), i,
1426                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1427         }
1428         failures += 1;
1429       }
1430     }
1431 
1432     if (failures > 0 && _verbose)  {
1433       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1434                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1435                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1436                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1437     }
1438 
1439     _failures += failures;
1440 
1441     // We could stop iteration over the heap when we
1442     // find the first violating region by returning true.
1443     return false;
1444   }
1445 };
1446 
1447 
1448 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1449 protected:
1450   G1CollectedHeap* _g1h;
1451   ConcurrentMark* _cm;
1452   BitMap* _actual_region_bm;
1453   BitMap* _actual_card_bm;
1454 
1455   uint    _n_workers;
1456 
1457   BitMap* _expected_region_bm;
1458   BitMap* _expected_card_bm;
1459 
1460   int  _failures;
1461   bool _verbose;
1462 
1463 public:
1464   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1465                             BitMap* region_bm, BitMap* card_bm,
1466                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1467     : AbstractGangTask("G1 verify final counting"),
1468       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1469       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1470       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1471       _failures(0), _verbose(false),
1472       _n_workers(0) {
1473     assert(VerifyDuringGC, "don't call this otherwise");
1474 
1475     // Use the value already set as the number of active threads
1476     // in the call to run_task().
1477     if (G1CollectedHeap::use_parallel_gc_threads()) {
1478       assert( _g1h->workers()->active_workers() > 0,
1479         "Should have been previously set");
1480       _n_workers = _g1h->workers()->active_workers();
1481     } else {
1482       _n_workers = 1;
1483     }
1484 
1485     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1486     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1487 
1488     _verbose = _cm->verbose_medium();
1489   }
1490 
1491   void work(uint worker_id) {
1492     assert(worker_id < _n_workers, "invariant");
1493 
1494     VerifyLiveObjectDataHRClosure verify_cl(_cm,
1495                                             _actual_region_bm, _actual_card_bm,
1496                                             _expected_region_bm,
1497                                             _expected_card_bm,
1498                                             _verbose);
1499 
1500     if (G1CollectedHeap::use_parallel_gc_threads()) {
1501       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1502                                             worker_id,
1503                                             _n_workers,
1504                                             HeapRegion::VerifyCountClaimValue);
1505     } else {
1506       _g1h->heap_region_iterate(&verify_cl);
1507     }
1508 
1509     Atomic::add(verify_cl.failures(), &_failures);
1510   }
1511 
1512   int failures() const { return _failures; }
1513 };
1514 
1515 // Closure that finalizes the liveness counting data.
1516 // Used during the cleanup pause.
1517 // Sets the bits corresponding to the interval [NTAMS, top]
1518 // (which contains the implicitly live objects) in the
1519 // card liveness bitmap. Also sets the bit for each region,
1520 // containing live data, in the region liveness bitmap.
1521 
1522 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1523  public:
1524   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1525                               BitMap* region_bm,
1526                               BitMap* card_bm) :
1527     CMCountDataClosureBase(cm, region_bm, card_bm) { }
1528 
1529   bool doHeapRegion(HeapRegion* hr) {
1530 
1531     if (hr->continuesHumongous()) {
1532       // We will ignore these here and process them when their
1533       // associated "starts humongous" region is processed (see
1534       // set_bit_for_heap_region()). Note that we cannot rely on their
1535       // associated "starts humongous" region to have their bit set to
1536       // 1 since, due to the region chunking in the parallel region
1537       // iteration, a "continues humongous" region might be visited
1538       // before its associated "starts humongous".
1539       return false;
1540     }
1541 
1542     HeapWord* ntams = hr->next_top_at_mark_start();
1543     HeapWord* top   = hr->top();
1544 
1545     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1546 
1547     // Mark the allocated-since-marking portion...
1548     if (ntams < top) {
1549       // This definitely means the region has live objects.
1550       set_bit_for_region(hr);
1551     }
1552 
1553     // Now set the bits for [ntams, top]
1554     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1555     BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
1556     set_card_bitmap_range(start_idx, last_idx);
1557 
1558     // Set the bit for the region if it contains live data
1559     if (hr->next_marked_bytes() > 0) {
1560       set_bit_for_region(hr);
1561     }
1562 
1563     return false;
1564   }
1565 };
1566 
1567 class G1ParFinalCountTask: public AbstractGangTask {
1568 protected:
1569   G1CollectedHeap* _g1h;
1570   ConcurrentMark* _cm;
1571   BitMap* _actual_region_bm;
1572   BitMap* _actual_card_bm;
1573 
1574   uint    _n_workers;
1575 
1576 public:
1577   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1578     : AbstractGangTask("G1 final counting"),
1579       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1580       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1581       _n_workers(0) {
1582     // Use the value already set as the number of active threads
1583     // in the call to run_task().
1584     if (G1CollectedHeap::use_parallel_gc_threads()) {
1585       assert( _g1h->workers()->active_workers() > 0,
1586         "Should have been previously set");
1587       _n_workers = _g1h->workers()->active_workers();
1588     } else {
1589       _n_workers = 1;
1590     }
1591   }
1592 
1593   void work(uint worker_id) {
1594     assert(worker_id < _n_workers, "invariant");
1595 
1596     FinalCountDataUpdateClosure final_update_cl(_cm,
1597                                                 _actual_region_bm,
1598                                                 _actual_card_bm);
1599 
1600     if (G1CollectedHeap::use_parallel_gc_threads()) {
1601       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1602                                             worker_id,
1603                                             _n_workers,
1604                                             HeapRegion::FinalCountClaimValue);
1605     } else {
1606       _g1h->heap_region_iterate(&final_update_cl);
1607     }
1608   }
1609 };
1610 
1611 class G1ParNoteEndTask;
1612 
1613 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1614   G1CollectedHeap* _g1;
1615   int _worker_num;
1616   size_t _max_live_bytes;
1617   uint _regions_claimed;
1618   size_t _freed_bytes;
1619   FreeRegionList* _local_cleanup_list;
1620   OldRegionSet* _old_proxy_set;
1621   HumongousRegionSet* _humongous_proxy_set;
1622   HRRSCleanupTask* _hrrs_cleanup_task;
1623   double _claimed_region_time;
1624   double _max_region_time;
1625 
1626 public:
1627   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1628                              int worker_num,
1629                              FreeRegionList* local_cleanup_list,
1630                              OldRegionSet* old_proxy_set,
1631                              HumongousRegionSet* humongous_proxy_set,
1632                              HRRSCleanupTask* hrrs_cleanup_task) :
1633     _g1(g1), _worker_num(worker_num),
1634     _max_live_bytes(0), _regions_claimed(0),
1635     _freed_bytes(0),
1636     _claimed_region_time(0.0), _max_region_time(0.0),
1637     _local_cleanup_list(local_cleanup_list),
1638     _old_proxy_set(old_proxy_set),
1639     _humongous_proxy_set(humongous_proxy_set),
1640     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1641 
1642   size_t freed_bytes() { return _freed_bytes; }
1643 
1644   bool doHeapRegion(HeapRegion *hr) {
1645     // We use a claim value of zero here because all regions
1646     // were claimed with value 1 in the FinalCount task.
1647     hr->reset_gc_time_stamp();
1648     if (!hr->continuesHumongous()) {
1649       double start = os::elapsedTime();
1650       _regions_claimed++;
1651       hr->note_end_of_marking();
1652       _max_live_bytes += hr->max_live_bytes();
1653       _g1->free_region_if_empty(hr,
1654                                 &_freed_bytes,
1655                                 _local_cleanup_list,
1656                                 _old_proxy_set,
1657                                 _humongous_proxy_set,
1658                                 _hrrs_cleanup_task,
1659                                 true /* par */);
1660       double region_time = (os::elapsedTime() - start);
1661       _claimed_region_time += region_time;
1662       if (region_time > _max_region_time) {
1663         _max_region_time = region_time;
1664       }
1665     }
1666     return false;
1667   }
1668 
1669   size_t max_live_bytes() { return _max_live_bytes; }
1670   uint regions_claimed() { return _regions_claimed; }
1671   double claimed_region_time_sec() { return _claimed_region_time; }
1672   double max_region_time_sec() { return _max_region_time; }
1673 };
1674 
1675 class G1ParNoteEndTask: public AbstractGangTask {
1676   friend class G1NoteEndOfConcMarkClosure;
1677 
1678 protected:
1679   G1CollectedHeap* _g1h;
1680   size_t _max_live_bytes;
1681   size_t _freed_bytes;
1682   FreeRegionList* _cleanup_list;
1683 
1684 public:
1685   G1ParNoteEndTask(G1CollectedHeap* g1h,
1686                    FreeRegionList* cleanup_list) :
1687     AbstractGangTask("G1 note end"), _g1h(g1h),
1688     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1689 
1690   void work(uint worker_id) {
1691     double start = os::elapsedTime();
1692     FreeRegionList local_cleanup_list("Local Cleanup List");
1693     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1694     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1695     HRRSCleanupTask hrrs_cleanup_task;
1696     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1697                                            &old_proxy_set,
1698                                            &humongous_proxy_set,
1699                                            &hrrs_cleanup_task);
1700     if (G1CollectedHeap::use_parallel_gc_threads()) {
1701       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1702                                             _g1h->workers()->active_workers(),
1703                                             HeapRegion::NoteEndClaimValue);
1704     } else {
1705       _g1h->heap_region_iterate(&g1_note_end);
1706     }
1707     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1708 
1709     // Now update the lists
1710     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1711                                             NULL /* free_list */,
1712                                             &old_proxy_set,
1713                                             &humongous_proxy_set,
1714                                             true /* par */);
1715     {
1716       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1717       _max_live_bytes += g1_note_end.max_live_bytes();
1718       _freed_bytes += g1_note_end.freed_bytes();
1719 
1720       // If we iterate over the global cleanup list at the end of
1721       // cleanup to do this printing we will not guarantee to only
1722       // generate output for the newly-reclaimed regions (the list
1723       // might not be empty at the beginning of cleanup; we might
1724       // still be working on its previous contents). So we do the
1725       // printing here, before we append the new regions to the global
1726       // cleanup list.
1727 
1728       G1HRPrinter* hr_printer = _g1h->hr_printer();
1729       if (hr_printer->is_active()) {
1730         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1731         while (iter.more_available()) {
1732           HeapRegion* hr = iter.get_next();
1733           hr_printer->cleanup(hr);
1734         }
1735       }
1736 
1737       _cleanup_list->add_as_tail(&local_cleanup_list);
1738       assert(local_cleanup_list.is_empty(), "post-condition");
1739 
1740       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1741     }
1742   }
1743   size_t max_live_bytes() { return _max_live_bytes; }
1744   size_t freed_bytes() { return _freed_bytes; }
1745 };
1746 
1747 class G1ParScrubRemSetTask: public AbstractGangTask {
1748 protected:
1749   G1RemSet* _g1rs;
1750   BitMap* _region_bm;
1751   BitMap* _card_bm;
1752 public:
1753   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1754                        BitMap* region_bm, BitMap* card_bm) :
1755     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1756     _region_bm(region_bm), _card_bm(card_bm) { }
1757 
1758   void work(uint worker_id) {
1759     if (G1CollectedHeap::use_parallel_gc_threads()) {
1760       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1761                        HeapRegion::ScrubRemSetClaimValue);
1762     } else {
1763       _g1rs->scrub(_region_bm, _card_bm);
1764     }
1765   }
1766 
1767 };
1768 
1769 void ConcurrentMark::cleanup() {
1770   // world is stopped at this checkpoint
1771   assert(SafepointSynchronize::is_at_safepoint(),
1772          "world should be stopped");
1773   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1774 
1775   // If a full collection has happened, we shouldn't do this.
1776   if (has_aborted()) {
1777     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1778     return;
1779   }
1780 
1781   HRSPhaseSetter x(HRSPhaseCleanup);
1782   g1h->verify_region_sets_optional();
1783 
1784   if (VerifyDuringGC) {
1785     HandleMark hm;  // handle scope
1786     gclog_or_tty->print(" VerifyDuringGC:(before)");
1787     Universe::heap()->prepare_for_verify();
1788     Universe::verify(/* silent      */ false,
1789                      /* option      */ VerifyOption_G1UsePrevMarking);
1790   }
1791 
1792   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1793   g1p->record_concurrent_mark_cleanup_start();
1794 
1795   double start = os::elapsedTime();
1796 
1797   HeapRegionRemSet::reset_for_cleanup_tasks();
1798 
1799   uint n_workers;
1800 
1801   // Do counting once more with the world stopped for good measure.
1802   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1803 
1804   if (G1CollectedHeap::use_parallel_gc_threads()) {
1805    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1806            "sanity check");
1807 
1808     g1h->set_par_threads();
1809     n_workers = g1h->n_par_threads();
1810     assert(g1h->n_par_threads() == n_workers,
1811            "Should not have been reset");
1812     g1h->workers()->run_task(&g1_par_count_task);
1813     // Done with the parallel phase so reset to 0.
1814     g1h->set_par_threads(0);
1815 
1816     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1817            "sanity check");
1818   } else {
1819     n_workers = 1;
1820     g1_par_count_task.work(0);
1821   }
1822 
1823   if (VerifyDuringGC) {
1824     // Verify that the counting data accumulated during marking matches
1825     // that calculated by walking the marking bitmap.
1826 
1827     // Bitmaps to hold expected values
1828     BitMap expected_region_bm(_region_bm.size(), false);
1829     BitMap expected_card_bm(_card_bm.size(), false);
1830 
1831     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1832                                                  &_region_bm,
1833                                                  &_card_bm,
1834                                                  &expected_region_bm,
1835                                                  &expected_card_bm);
1836 
1837     if (G1CollectedHeap::use_parallel_gc_threads()) {
1838       g1h->set_par_threads((int)n_workers);
1839       g1h->workers()->run_task(&g1_par_verify_task);
1840       // Done with the parallel phase so reset to 0.
1841       g1h->set_par_threads(0);
1842 
1843       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1844              "sanity check");
1845     } else {
1846       g1_par_verify_task.work(0);
1847     }
1848 
1849     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1850   }
1851 
1852   size_t start_used_bytes = g1h->used();
1853   g1h->set_marking_complete();
1854 
1855   double count_end = os::elapsedTime();
1856   double this_final_counting_time = (count_end - start);
1857   _total_counting_time += this_final_counting_time;
1858 
1859   if (G1PrintRegionLivenessInfo) {
1860     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1861     _g1h->heap_region_iterate(&cl);
1862   }
1863 
1864   // Install newly created mark bitMap as "prev".
1865   swapMarkBitMaps();
1866 
1867   g1h->reset_gc_time_stamp();
1868 
1869   // Note end of marking in all heap regions.
1870   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1871   if (G1CollectedHeap::use_parallel_gc_threads()) {
1872     g1h->set_par_threads((int)n_workers);
1873     g1h->workers()->run_task(&g1_par_note_end_task);
1874     g1h->set_par_threads(0);
1875 
1876     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1877            "sanity check");
1878   } else {
1879     g1_par_note_end_task.work(0);
1880   }
1881 
1882   if (!cleanup_list_is_empty()) {
1883     // The cleanup list is not empty, so we'll have to process it
1884     // concurrently. Notify anyone else that might be wanting free
1885     // regions that there will be more free regions coming soon.
1886     g1h->set_free_regions_coming();
1887   }
1888 
1889   // call below, since it affects the metric by which we sort the heap
1890   // regions.
1891   if (G1ScrubRemSets) {
1892     double rs_scrub_start = os::elapsedTime();
1893     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1894     if (G1CollectedHeap::use_parallel_gc_threads()) {
1895       g1h->set_par_threads((int)n_workers);
1896       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1897       g1h->set_par_threads(0);
1898 
1899       assert(g1h->check_heap_region_claim_values(
1900                                             HeapRegion::ScrubRemSetClaimValue),
1901              "sanity check");
1902     } else {
1903       g1_par_scrub_rs_task.work(0);
1904     }
1905 
1906     double rs_scrub_end = os::elapsedTime();
1907     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1908     _total_rs_scrub_time += this_rs_scrub_time;
1909   }
1910 
1911   // this will also free any regions totally full of garbage objects,
1912   // and sort the regions.
1913   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1914 
1915   // Statistics.
1916   double end = os::elapsedTime();
1917   _cleanup_times.add((end - start) * 1000.0);
1918 
1919   if (G1Log::fine()) {
1920     g1h->print_size_transition(gclog_or_tty,
1921                                start_used_bytes,
1922                                g1h->used(),
1923                                g1h->capacity());
1924   }
1925 
1926   // Clean up will have freed any regions completely full of garbage.
1927   // Update the soft reference policy with the new heap occupancy.
1928   Universe::update_heap_info_at_gc();
1929 
1930   // We need to make this be a "collection" so any collection pause that
1931   // races with it goes around and waits for completeCleanup to finish.
1932   g1h->increment_total_collections();
1933 
1934   // We reclaimed old regions so we should calculate the sizes to make
1935   // sure we update the old gen/space data.
1936   g1h->g1mm()->update_sizes();
1937 
1938   if (VerifyDuringGC) {
1939     HandleMark hm;  // handle scope
1940     gclog_or_tty->print(" VerifyDuringGC:(after)");
1941     Universe::heap()->prepare_for_verify();
1942     Universe::verify(/* silent      */ false,
1943                      /* option      */ VerifyOption_G1UsePrevMarking);
1944   }
1945 
1946   g1h->verify_region_sets_optional();
1947 }
1948 
1949 void ConcurrentMark::completeCleanup() {
1950   if (has_aborted()) return;
1951 
1952   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1953 
1954   _cleanup_list.verify_optional();
1955   FreeRegionList tmp_free_list("Tmp Free List");
1956 
1957   if (G1ConcRegionFreeingVerbose) {
1958     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1959                            "cleanup list has %u entries",
1960                            _cleanup_list.length());
1961   }
1962 
1963   // Noone else should be accessing the _cleanup_list at this point,
1964   // so it's not necessary to take any locks
1965   while (!_cleanup_list.is_empty()) {
1966     HeapRegion* hr = _cleanup_list.remove_head();
1967     assert(hr != NULL, "the list was not empty");
1968     hr->par_clear();
1969     tmp_free_list.add_as_tail(hr);
1970 
1971     // Instead of adding one region at a time to the secondary_free_list,
1972     // we accumulate them in the local list and move them a few at a
1973     // time. This also cuts down on the number of notify_all() calls
1974     // we do during this process. We'll also append the local list when
1975     // _cleanup_list is empty (which means we just removed the last
1976     // region from the _cleanup_list).
1977     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1978         _cleanup_list.is_empty()) {
1979       if (G1ConcRegionFreeingVerbose) {
1980         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1981                                "appending %u entries to the secondary_free_list, "
1982                                "cleanup list still has %u entries",
1983                                tmp_free_list.length(),
1984                                _cleanup_list.length());
1985       }
1986 
1987       {
1988         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1989         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1990         SecondaryFreeList_lock->notify_all();
1991       }
1992 
1993       if (G1StressConcRegionFreeing) {
1994         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1995           os::sleep(Thread::current(), (jlong) 1, false);
1996         }
1997       }
1998     }
1999   }
2000   assert(tmp_free_list.is_empty(), "post-condition");
2001 }
2002 
2003 // Support closures for reference procssing in G1
2004 
2005 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2006   HeapWord* addr = (HeapWord*)obj;
2007   return addr != NULL &&
2008          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2009 }
2010 
2011 class G1CMKeepAliveClosure: public OopClosure {
2012   G1CollectedHeap* _g1;
2013   ConcurrentMark*  _cm;
2014  public:
2015   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2016     _g1(g1), _cm(cm) {
2017     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2018   }
2019 
2020   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2021   virtual void do_oop(      oop* p) { do_oop_work(p); }
2022 
2023   template <class T> void do_oop_work(T* p) {
2024     oop obj = oopDesc::load_decode_heap_oop(p);
2025     HeapWord* addr = (HeapWord*)obj;
2026 
2027     if (_cm->verbose_high()) {
2028       gclog_or_tty->print_cr("\t[0] we're looking at location "
2029                              "*"PTR_FORMAT" = "PTR_FORMAT,
2030                              p, (void*) obj);
2031     }
2032 
2033     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2034       _cm->mark_and_count(obj);
2035       _cm->mark_stack_push(obj);
2036     }
2037   }
2038 };
2039 
2040 class G1CMDrainMarkingStackClosure: public VoidClosure {
2041   ConcurrentMark*               _cm;
2042   CMMarkStack*                  _markStack;
2043   G1CMKeepAliveClosure*         _oopClosure;
2044  public:
2045   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2046                                G1CMKeepAliveClosure* oopClosure) :
2047     _cm(cm),
2048     _markStack(markStack),
2049     _oopClosure(oopClosure) { }
2050 
2051   void do_void() {
2052     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2053   }
2054 };
2055 
2056 // 'Keep Alive' closure used by parallel reference processing.
2057 // An instance of this closure is used in the parallel reference processing
2058 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2059 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2060 // placed on to discovered ref lists once so we can mark and push with no
2061 // need to check whether the object has already been marked. Using the
2062 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2063 // operating on the global mark stack. This means that an individual
2064 // worker would be doing lock-free pushes while it processes its own
2065 // discovered ref list followed by drain call. If the discovered ref lists
2066 // are unbalanced then this could cause interference with the other
2067 // workers. Using a CMTask (and its embedded local data structures)
2068 // avoids that potential interference.
2069 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2070   ConcurrentMark*  _cm;
2071   CMTask*          _task;
2072   int              _ref_counter_limit;
2073   int              _ref_counter;
2074  public:
2075   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2076     _cm(cm), _task(task),
2077     _ref_counter_limit(G1RefProcDrainInterval) {
2078     assert(_ref_counter_limit > 0, "sanity");
2079     _ref_counter = _ref_counter_limit;
2080   }
2081 
2082   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2083   virtual void do_oop(      oop* p) { do_oop_work(p); }
2084 
2085   template <class T> void do_oop_work(T* p) {
2086     if (!_cm->has_overflown()) {
2087       oop obj = oopDesc::load_decode_heap_oop(p);
2088       if (_cm->verbose_high()) {
2089         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2090                                "*"PTR_FORMAT" = "PTR_FORMAT,
2091                                _task->task_id(), p, (void*) obj);
2092       }
2093 
2094       _task->deal_with_reference(obj);
2095       _ref_counter--;
2096 
2097       if (_ref_counter == 0) {
2098         // We have dealt with _ref_counter_limit references, pushing them and objects
2099         // reachable from them on to the local stack (and possibly the global stack).
2100         // Call do_marking_step() to process these entries. We call the routine in a
2101         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2102         // with the entries that we've pushed as a result of the deal_with_reference
2103         // calls above) or we overflow.
2104         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2105         // while there may still be some work to do. (See the comment at the
2106         // beginning of CMTask::do_marking_step() for those conditions - one of which
2107         // is reaching the specified time target.) It is only when
2108         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2109         // that the marking has completed.
2110         do {
2111           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2112           _task->do_marking_step(mark_step_duration_ms,
2113                                  false /* do_stealing    */,
2114                                  false /* do_termination */);
2115         } while (_task->has_aborted() && !_cm->has_overflown());
2116         _ref_counter = _ref_counter_limit;
2117       }
2118     } else {
2119       if (_cm->verbose_high()) {
2120          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2121       }
2122     }
2123   }
2124 };
2125 
2126 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2127   ConcurrentMark* _cm;
2128   CMTask* _task;
2129  public:
2130   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2131     _cm(cm), _task(task) { }
2132 
2133   void do_void() {
2134     do {
2135       if (_cm->verbose_high()) {
2136         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2137                                _task->task_id());
2138       }
2139 
2140       // We call CMTask::do_marking_step() to completely drain the local and
2141       // global marking stacks. The routine is called in a loop, which we'll
2142       // exit if there's nothing more to do (i.e. we'completely drained the
2143       // entries that were pushed as a result of applying the
2144       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2145       // lists above) or we overflow the global marking stack.
2146       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2147       // while there may still be some work to do. (See the comment at the
2148       // beginning of CMTask::do_marking_step() for those conditions - one of which
2149       // is reaching the specified time target.) It is only when
2150       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2151       // that the marking has completed.
2152 
2153       _task->do_marking_step(1000000000.0 /* something very large */,
2154                              true /* do_stealing    */,
2155                              true /* do_termination */);
2156     } while (_task->has_aborted() && !_cm->has_overflown());
2157   }
2158 };
2159 
2160 // Implementation of AbstractRefProcTaskExecutor for parallel
2161 // reference processing at the end of G1 concurrent marking
2162 
2163 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2164 private:
2165   G1CollectedHeap* _g1h;
2166   ConcurrentMark*  _cm;
2167   WorkGang*        _workers;
2168   int              _active_workers;
2169 
2170 public:
2171   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2172                         ConcurrentMark* cm,
2173                         WorkGang* workers,
2174                         int n_workers) :
2175     _g1h(g1h), _cm(cm),
2176     _workers(workers), _active_workers(n_workers) { }
2177 
2178   // Executes the given task using concurrent marking worker threads.
2179   virtual void execute(ProcessTask& task);
2180   virtual void execute(EnqueueTask& task);
2181 };
2182 
2183 class G1CMRefProcTaskProxy: public AbstractGangTask {
2184   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2185   ProcessTask&     _proc_task;
2186   G1CollectedHeap* _g1h;
2187   ConcurrentMark*  _cm;
2188 
2189 public:
2190   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2191                      G1CollectedHeap* g1h,
2192                      ConcurrentMark* cm) :
2193     AbstractGangTask("Process reference objects in parallel"),
2194     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2195 
2196   virtual void work(uint worker_id) {
2197     CMTask* marking_task = _cm->task(worker_id);
2198     G1CMIsAliveClosure g1_is_alive(_g1h);
2199     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2200     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2201 
2202     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2203   }
2204 };
2205 
2206 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2207   assert(_workers != NULL, "Need parallel worker threads.");
2208 
2209   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2210 
2211   // We need to reset the phase for each task execution so that
2212   // the termination protocol of CMTask::do_marking_step works.
2213   _cm->set_phase(_active_workers, false /* concurrent */);
2214   _g1h->set_par_threads(_active_workers);
2215   _workers->run_task(&proc_task_proxy);
2216   _g1h->set_par_threads(0);
2217 }
2218 
2219 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2220   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2221   EnqueueTask& _enq_task;
2222 
2223 public:
2224   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2225     AbstractGangTask("Enqueue reference objects in parallel"),
2226     _enq_task(enq_task) { }
2227 
2228   virtual void work(uint worker_id) {
2229     _enq_task.work(worker_id);
2230   }
2231 };
2232 
2233 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2234   assert(_workers != NULL, "Need parallel worker threads.");
2235 
2236   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2237 
2238   _g1h->set_par_threads(_active_workers);
2239   _workers->run_task(&enq_task_proxy);
2240   _g1h->set_par_threads(0);
2241 }
2242 
2243 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2244   ResourceMark rm;
2245   HandleMark   hm;
2246 
2247   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2248 
2249   // Is alive closure.
2250   G1CMIsAliveClosure g1_is_alive(g1h);
2251 
2252   // Inner scope to exclude the cleaning of the string and symbol
2253   // tables from the displayed time.
2254   {
2255     if (G1Log::finer()) {
2256       gclog_or_tty->put(' ');
2257     }
2258     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2259 
2260     ReferenceProcessor* rp = g1h->ref_processor_cm();
2261 
2262     // See the comment in G1CollectedHeap::ref_processing_init()
2263     // about how reference processing currently works in G1.
2264 
2265     // Process weak references.
2266     rp->setup_policy(clear_all_soft_refs);
2267     assert(_markStack.isEmpty(), "mark stack should be empty");
2268 
2269     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2270     G1CMDrainMarkingStackClosure
2271       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2272 
2273     // We use the work gang from the G1CollectedHeap and we utilize all
2274     // the worker threads.
2275     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2276     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2277 
2278     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2279                                               g1h->workers(), active_workers);
2280 
2281     if (rp->processing_is_mt()) {
2282       // Set the degree of MT here.  If the discovery is done MT, there
2283       // may have been a different number of threads doing the discovery
2284       // and a different number of discovered lists may have Ref objects.
2285       // That is OK as long as the Reference lists are balanced (see
2286       // balance_all_queues() and balance_queues()).
2287       rp->set_active_mt_degree(active_workers);
2288 
2289       rp->process_discovered_references(&g1_is_alive,
2290                                       &g1_keep_alive,
2291                                       &g1_drain_mark_stack,
2292                                       &par_task_executor);
2293 
2294       // The work routines of the parallel keep_alive and drain_marking_stack
2295       // will set the has_overflown flag if we overflow the global marking
2296       // stack.
2297     } else {
2298       rp->process_discovered_references(&g1_is_alive,
2299                                         &g1_keep_alive,
2300                                         &g1_drain_mark_stack,
2301                                         NULL);
2302     }
2303 
2304     assert(_markStack.overflow() || _markStack.isEmpty(),
2305             "mark stack should be empty (unless it overflowed)");
2306     if (_markStack.overflow()) {
2307       // Should have been done already when we tried to push an
2308       // entry on to the global mark stack. But let's do it again.
2309       set_has_overflown();
2310     }
2311 
2312     if (rp->processing_is_mt()) {
2313       assert(rp->num_q() == active_workers, "why not");
2314       rp->enqueue_discovered_references(&par_task_executor);
2315     } else {
2316       rp->enqueue_discovered_references();
2317     }
2318 
2319     rp->verify_no_references_recorded();
2320     assert(!rp->discovery_enabled(), "Post condition");
2321   }
2322 
2323   // Now clean up stale oops in StringTable
2324   StringTable::unlink(&g1_is_alive);
2325   // Clean up unreferenced symbols in symbol table.
2326   SymbolTable::unlink();
2327 }
2328 
2329 void ConcurrentMark::swapMarkBitMaps() {
2330   CMBitMapRO* temp = _prevMarkBitMap;
2331   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2332   _nextMarkBitMap  = (CMBitMap*)  temp;
2333 }
2334 
2335 class CMRemarkTask: public AbstractGangTask {
2336 private:
2337   ConcurrentMark *_cm;
2338 
2339 public:
2340   void work(uint worker_id) {
2341     // Since all available tasks are actually started, we should
2342     // only proceed if we're supposed to be actived.
2343     if (worker_id < _cm->active_tasks()) {
2344       CMTask* task = _cm->task(worker_id);
2345       task->record_start_time();
2346       do {
2347         task->do_marking_step(1000000000.0 /* something very large */,
2348                               true /* do_stealing    */,
2349                               true /* do_termination */);
2350       } while (task->has_aborted() && !_cm->has_overflown());
2351       // If we overflow, then we do not want to restart. We instead
2352       // want to abort remark and do concurrent marking again.
2353       task->record_end_time();
2354     }
2355   }
2356 
2357   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2358     AbstractGangTask("Par Remark"), _cm(cm) {
2359     _cm->terminator()->reset_for_reuse(active_workers);
2360   }
2361 };
2362 
2363 void ConcurrentMark::checkpointRootsFinalWork() {
2364   ResourceMark rm;
2365   HandleMark   hm;
2366   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2367 
2368   g1h->ensure_parsability(false);
2369 
2370   if (G1CollectedHeap::use_parallel_gc_threads()) {
2371     G1CollectedHeap::StrongRootsScope srs(g1h);
2372     // this is remark, so we'll use up all active threads
2373     uint active_workers = g1h->workers()->active_workers();
2374     if (active_workers == 0) {
2375       assert(active_workers > 0, "Should have been set earlier");
2376       active_workers = (uint) ParallelGCThreads;
2377       g1h->workers()->set_active_workers(active_workers);
2378     }
2379     set_phase(active_workers, false /* concurrent */);
2380     // Leave _parallel_marking_threads at it's
2381     // value originally calculated in the ConcurrentMark
2382     // constructor and pass values of the active workers
2383     // through the gang in the task.
2384 
2385     CMRemarkTask remarkTask(this, active_workers);
2386     g1h->set_par_threads(active_workers);
2387     g1h->workers()->run_task(&remarkTask);
2388     g1h->set_par_threads(0);
2389   } else {
2390     G1CollectedHeap::StrongRootsScope srs(g1h);
2391     // this is remark, so we'll use up all available threads
2392     uint active_workers = 1;
2393     set_phase(active_workers, false /* concurrent */);
2394 
2395     CMRemarkTask remarkTask(this, active_workers);
2396     // We will start all available threads, even if we decide that the
2397     // active_workers will be fewer. The extra ones will just bail out
2398     // immediately.
2399     remarkTask.work(0);
2400   }
2401   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2402   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2403 
2404   print_stats();
2405 
2406 #if VERIFY_OBJS_PROCESSED
2407   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2408     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2409                            _scan_obj_cl.objs_processed,
2410                            ThreadLocalObjQueue::objs_enqueued);
2411     guarantee(_scan_obj_cl.objs_processed ==
2412               ThreadLocalObjQueue::objs_enqueued,
2413               "Different number of objs processed and enqueued.");
2414   }
2415 #endif
2416 }
2417 
2418 #ifndef PRODUCT
2419 
2420 class PrintReachableOopClosure: public OopClosure {
2421 private:
2422   G1CollectedHeap* _g1h;
2423   outputStream*    _out;
2424   VerifyOption     _vo;
2425   bool             _all;
2426 
2427 public:
2428   PrintReachableOopClosure(outputStream* out,
2429                            VerifyOption  vo,
2430                            bool          all) :
2431     _g1h(G1CollectedHeap::heap()),
2432     _out(out), _vo(vo), _all(all) { }
2433 
2434   void do_oop(narrowOop* p) { do_oop_work(p); }
2435   void do_oop(      oop* p) { do_oop_work(p); }
2436 
2437   template <class T> void do_oop_work(T* p) {
2438     oop         obj = oopDesc::load_decode_heap_oop(p);
2439     const char* str = NULL;
2440     const char* str2 = "";
2441 
2442     if (obj == NULL) {
2443       str = "";
2444     } else if (!_g1h->is_in_g1_reserved(obj)) {
2445       str = " O";
2446     } else {
2447       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2448       guarantee(hr != NULL, "invariant");
2449       bool over_tams = false;
2450       bool marked = false;
2451 
2452       switch (_vo) {
2453         case VerifyOption_G1UsePrevMarking:
2454           over_tams = hr->obj_allocated_since_prev_marking(obj);
2455           marked = _g1h->isMarkedPrev(obj);
2456           break;
2457         case VerifyOption_G1UseNextMarking:
2458           over_tams = hr->obj_allocated_since_next_marking(obj);
2459           marked = _g1h->isMarkedNext(obj);
2460           break;
2461         case VerifyOption_G1UseMarkWord:
2462           marked = obj->is_gc_marked();
2463           break;
2464         default:
2465           ShouldNotReachHere();
2466       }
2467 
2468       if (over_tams) {
2469         str = " >";
2470         if (marked) {
2471           str2 = " AND MARKED";
2472         }
2473       } else if (marked) {
2474         str = " M";
2475       } else {
2476         str = " NOT";
2477       }
2478     }
2479 
2480     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2481                    p, (void*) obj, str, str2);
2482   }
2483 };
2484 
2485 class PrintReachableObjectClosure : public ObjectClosure {
2486 private:
2487   G1CollectedHeap* _g1h;
2488   outputStream*    _out;
2489   VerifyOption     _vo;
2490   bool             _all;
2491   HeapRegion*      _hr;
2492 
2493 public:
2494   PrintReachableObjectClosure(outputStream* out,
2495                               VerifyOption  vo,
2496                               bool          all,
2497                               HeapRegion*   hr) :
2498     _g1h(G1CollectedHeap::heap()),
2499     _out(out), _vo(vo), _all(all), _hr(hr) { }
2500 
2501   void do_object(oop o) {
2502     bool over_tams = false;
2503     bool marked = false;
2504 
2505     switch (_vo) {
2506       case VerifyOption_G1UsePrevMarking:
2507         over_tams = _hr->obj_allocated_since_prev_marking(o);
2508         marked = _g1h->isMarkedPrev(o);
2509         break;
2510       case VerifyOption_G1UseNextMarking:
2511         over_tams = _hr->obj_allocated_since_next_marking(o);
2512         marked = _g1h->isMarkedNext(o);
2513         break;
2514       case VerifyOption_G1UseMarkWord:
2515         marked = o->is_gc_marked();
2516         break;
2517       default:
2518         ShouldNotReachHere();
2519     }
2520     bool print_it = _all || over_tams || marked;
2521 
2522     if (print_it) {
2523       _out->print_cr(" "PTR_FORMAT"%s",
2524                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2525       PrintReachableOopClosure oopCl(_out, _vo, _all);
2526       o->oop_iterate(&oopCl);
2527     }
2528   }
2529 };
2530 
2531 class PrintReachableRegionClosure : public HeapRegionClosure {
2532 private:
2533   outputStream* _out;
2534   VerifyOption  _vo;
2535   bool          _all;
2536 
2537 public:
2538   bool doHeapRegion(HeapRegion* hr) {
2539     HeapWord* b = hr->bottom();
2540     HeapWord* e = hr->end();
2541     HeapWord* t = hr->top();
2542     HeapWord* p = NULL;
2543 
2544     switch (_vo) {
2545       case VerifyOption_G1UsePrevMarking:
2546         p = hr->prev_top_at_mark_start();
2547         break;
2548       case VerifyOption_G1UseNextMarking:
2549         p = hr->next_top_at_mark_start();
2550         break;
2551       case VerifyOption_G1UseMarkWord:
2552         // When we are verifying marking using the mark word
2553         // TAMS has no relevance.
2554         assert(p == NULL, "post-condition");
2555         break;
2556       default:
2557         ShouldNotReachHere();
2558     }
2559     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2560                    "TAMS: "PTR_FORMAT, b, e, t, p);
2561     _out->cr();
2562 
2563     HeapWord* from = b;
2564     HeapWord* to   = t;
2565 
2566     if (to > from) {
2567       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2568       _out->cr();
2569       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2570       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2571       _out->cr();
2572     }
2573 
2574     return false;
2575   }
2576 
2577   PrintReachableRegionClosure(outputStream* out,
2578                               VerifyOption  vo,
2579                               bool          all) :
2580     _out(out), _vo(vo), _all(all) { }
2581 };
2582 
2583 static const char* verify_option_to_tams(VerifyOption vo) {
2584   switch (vo) {
2585     case VerifyOption_G1UsePrevMarking:
2586       return "PTAMS";
2587     case VerifyOption_G1UseNextMarking:
2588       return "NTAMS";
2589     default:
2590       return "NONE";
2591   }
2592 }
2593 
2594 void ConcurrentMark::print_reachable(const char* str,
2595                                      VerifyOption vo,
2596                                      bool all) {
2597   gclog_or_tty->cr();
2598   gclog_or_tty->print_cr("== Doing heap dump... ");
2599 
2600   if (G1PrintReachableBaseFile == NULL) {
2601     gclog_or_tty->print_cr("  #### error: no base file defined");
2602     return;
2603   }
2604 
2605   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2606       (JVM_MAXPATHLEN - 1)) {
2607     gclog_or_tty->print_cr("  #### error: file name too long");
2608     return;
2609   }
2610 
2611   char file_name[JVM_MAXPATHLEN];
2612   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2613   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2614 
2615   fileStream fout(file_name);
2616   if (!fout.is_open()) {
2617     gclog_or_tty->print_cr("  #### error: could not open file");
2618     return;
2619   }
2620 
2621   outputStream* out = &fout;
2622   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2623   out->cr();
2624 
2625   out->print_cr("--- ITERATING OVER REGIONS");
2626   out->cr();
2627   PrintReachableRegionClosure rcl(out, vo, all);
2628   _g1h->heap_region_iterate(&rcl);
2629   out->cr();
2630 
2631   gclog_or_tty->print_cr("  done");
2632   gclog_or_tty->flush();
2633 }
2634 
2635 #endif // PRODUCT
2636 
2637 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2638   // Note we are overriding the read-only view of the prev map here, via
2639   // the cast.
2640   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2641 }
2642 
2643 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2644   _nextMarkBitMap->clearRange(mr);
2645 }
2646 
2647 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2648   clearRangePrevBitmap(mr);
2649   clearRangeNextBitmap(mr);
2650 }
2651 
2652 HeapRegion*
2653 ConcurrentMark::claim_region(int task_num) {
2654   // "checkpoint" the finger
2655   HeapWord* finger = _finger;
2656 
2657   // _heap_end will not change underneath our feet; it only changes at
2658   // yield points.
2659   while (finger < _heap_end) {
2660     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2661 
2662     // Note on how this code handles humongous regions. In the
2663     // normal case the finger will reach the start of a "starts
2664     // humongous" (SH) region. Its end will either be the end of the
2665     // last "continues humongous" (CH) region in the sequence, or the
2666     // standard end of the SH region (if the SH is the only region in
2667     // the sequence). That way claim_region() will skip over the CH
2668     // regions. However, there is a subtle race between a CM thread
2669     // executing this method and a mutator thread doing a humongous
2670     // object allocation. The two are not mutually exclusive as the CM
2671     // thread does not need to hold the Heap_lock when it gets
2672     // here. So there is a chance that claim_region() will come across
2673     // a free region that's in the progress of becoming a SH or a CH
2674     // region. In the former case, it will either
2675     //   a) Miss the update to the region's end, in which case it will
2676     //      visit every subsequent CH region, will find their bitmaps
2677     //      empty, and do nothing, or
2678     //   b) Will observe the update of the region's end (in which case
2679     //      it will skip the subsequent CH regions).
2680     // If it comes across a region that suddenly becomes CH, the
2681     // scenario will be similar to b). So, the race between
2682     // claim_region() and a humongous object allocation might force us
2683     // to do a bit of unnecessary work (due to some unnecessary bitmap
2684     // iterations) but it should not introduce and correctness issues.
2685     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2686     HeapWord*   bottom        = curr_region->bottom();
2687     HeapWord*   end           = curr_region->end();
2688     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2689 
2690     if (verbose_low()) {
2691       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2692                              "["PTR_FORMAT", "PTR_FORMAT"), "
2693                              "limit = "PTR_FORMAT,
2694                              task_num, curr_region, bottom, end, limit);
2695     }
2696 
2697     // Is the gap between reading the finger and doing the CAS too long?
2698     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2699     if (res == finger) {
2700       // we succeeded
2701 
2702       // notice that _finger == end cannot be guaranteed here since,
2703       // someone else might have moved the finger even further
2704       assert(_finger >= end, "the finger should have moved forward");
2705 
2706       if (verbose_low()) {
2707         gclog_or_tty->print_cr("[%d] we were successful with region = "
2708                                PTR_FORMAT, task_num, curr_region);
2709       }
2710 
2711       if (limit > bottom) {
2712         if (verbose_low()) {
2713           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2714                                  "returning it ", task_num, curr_region);
2715         }
2716         return curr_region;
2717       } else {
2718         assert(limit == bottom,
2719                "the region limit should be at bottom");
2720         if (verbose_low()) {
2721           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2722                                  "returning NULL", task_num, curr_region);
2723         }
2724         // we return NULL and the caller should try calling
2725         // claim_region() again.
2726         return NULL;
2727       }
2728     } else {
2729       assert(_finger > finger, "the finger should have moved forward");
2730       if (verbose_low()) {
2731         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2732                                "global finger = "PTR_FORMAT", "
2733                                "our finger = "PTR_FORMAT,
2734                                task_num, _finger, finger);
2735       }
2736 
2737       // read it again
2738       finger = _finger;
2739     }
2740   }
2741 
2742   return NULL;
2743 }
2744 
2745 #ifndef PRODUCT
2746 enum VerifyNoCSetOopsPhase {
2747   VerifyNoCSetOopsStack,
2748   VerifyNoCSetOopsQueues,
2749   VerifyNoCSetOopsSATBCompleted,
2750   VerifyNoCSetOopsSATBThread
2751 };
2752 
2753 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2754 private:
2755   G1CollectedHeap* _g1h;
2756   VerifyNoCSetOopsPhase _phase;
2757   int _info;
2758 
2759   const char* phase_str() {
2760     switch (_phase) {
2761     case VerifyNoCSetOopsStack:         return "Stack";
2762     case VerifyNoCSetOopsQueues:        return "Queue";
2763     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2764     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2765     default:                            ShouldNotReachHere();
2766     }
2767     return NULL;
2768   }
2769 
2770   void do_object_work(oop obj) {
2771     guarantee(!_g1h->obj_in_cs(obj),
2772               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2773                       (void*) obj, phase_str(), _info));
2774   }
2775 
2776 public:
2777   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2778 
2779   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2780     _phase = phase;
2781     _info = info;
2782   }
2783 
2784   virtual void do_oop(oop* p) {
2785     oop obj = oopDesc::load_decode_heap_oop(p);
2786     do_object_work(obj);
2787   }
2788 
2789   virtual void do_oop(narrowOop* p) {
2790     // We should not come across narrow oops while scanning marking
2791     // stacks and SATB buffers.
2792     ShouldNotReachHere();
2793   }
2794 
2795   virtual void do_object(oop obj) {
2796     do_object_work(obj);
2797   }
2798 };
2799 
2800 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2801                                          bool verify_enqueued_buffers,
2802                                          bool verify_thread_buffers,
2803                                          bool verify_fingers) {
2804   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2805   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2806     return;
2807   }
2808 
2809   VerifyNoCSetOopsClosure cl;
2810 
2811   if (verify_stacks) {
2812     // Verify entries on the global mark stack
2813     cl.set_phase(VerifyNoCSetOopsStack);
2814     _markStack.oops_do(&cl);
2815 
2816     // Verify entries on the task queues
2817     for (int i = 0; i < (int) _max_task_num; i += 1) {
2818       cl.set_phase(VerifyNoCSetOopsQueues, i);
2819       OopTaskQueue* queue = _task_queues->queue(i);
2820       queue->oops_do(&cl);
2821     }
2822   }
2823 
2824   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2825 
2826   // Verify entries on the enqueued SATB buffers
2827   if (verify_enqueued_buffers) {
2828     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2829     satb_qs.iterate_completed_buffers_read_only(&cl);
2830   }
2831 
2832   // Verify entries on the per-thread SATB buffers
2833   if (verify_thread_buffers) {
2834     cl.set_phase(VerifyNoCSetOopsSATBThread);
2835     satb_qs.iterate_thread_buffers_read_only(&cl);
2836   }
2837 
2838   if (verify_fingers) {
2839     // Verify the global finger
2840     HeapWord* global_finger = finger();
2841     if (global_finger != NULL && global_finger < _heap_end) {
2842       // The global finger always points to a heap region boundary. We
2843       // use heap_region_containing_raw() to get the containing region
2844       // given that the global finger could be pointing to a free region
2845       // which subsequently becomes continues humongous. If that
2846       // happens, heap_region_containing() will return the bottom of the
2847       // corresponding starts humongous region and the check below will
2848       // not hold any more.
2849       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2850       guarantee(global_finger == global_hr->bottom(),
2851                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2852                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2853     }
2854 
2855     // Verify the task fingers
2856     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2857     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2858       CMTask* task = _tasks[i];
2859       HeapWord* task_finger = task->finger();
2860       if (task_finger != NULL && task_finger < _heap_end) {
2861         // See above note on the global finger verification.
2862         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2863         guarantee(task_finger == task_hr->bottom() ||
2864                   !task_hr->in_collection_set(),
2865                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2866                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2867       }
2868     }
2869   }
2870 }
2871 #endif // PRODUCT
2872 
2873 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2874   _markStack.setEmpty();
2875   _markStack.clear_overflow();
2876   if (clear_overflow) {
2877     clear_has_overflown();
2878   } else {
2879     assert(has_overflown(), "pre-condition");
2880   }
2881   _finger = _heap_start;
2882 
2883   for (int i = 0; i < (int)_max_task_num; ++i) {
2884     OopTaskQueue* queue = _task_queues->queue(i);
2885     queue->set_empty();
2886   }
2887 }
2888 
2889 // Aggregate the counting data that was constructed concurrently
2890 // with marking.
2891 class AggregateCountDataHRClosure: public HeapRegionClosure {
2892   ConcurrentMark* _cm;
2893   BitMap* _cm_card_bm;
2894   size_t _max_task_num;
2895 
2896  public:
2897   AggregateCountDataHRClosure(ConcurrentMark *cm,
2898                               BitMap* cm_card_bm,
2899                               size_t max_task_num) :
2900     _cm(cm), _cm_card_bm(cm_card_bm),
2901     _max_task_num(max_task_num) { }
2902 
2903   bool is_card_aligned(HeapWord* p) {
2904     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2905   }
2906 
2907   bool doHeapRegion(HeapRegion* hr) {
2908     if (hr->continuesHumongous()) {
2909       // We will ignore these here and process them when their
2910       // associated "starts humongous" region is processed.
2911       // Note that we cannot rely on their associated
2912       // "starts humongous" region to have their bit set to 1
2913       // since, due to the region chunking in the parallel region
2914       // iteration, a "continues humongous" region might be visited
2915       // before its associated "starts humongous".
2916       return false;
2917     }
2918 
2919     HeapWord* start = hr->bottom();
2920     HeapWord* limit = hr->next_top_at_mark_start();
2921     HeapWord* end = hr->end();
2922 
2923     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2924            err_msg("Preconditions not met - "
2925                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2926                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2927                    start, limit, hr->top(), hr->end()));
2928 
2929     assert(hr->next_marked_bytes() == 0, "Precondition");
2930 
2931     if (start == limit) {
2932       // NTAMS of this region has not been set so nothing to do.
2933       return false;
2934     }
2935 
2936     assert(is_card_aligned(start), "sanity");
2937     assert(is_card_aligned(end), "sanity");
2938 
2939     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2940     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2941     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2942 
2943     // If ntams is not card aligned then we bump the index for
2944     // limit so that we get the card spanning ntams.
2945     if (!is_card_aligned(limit)) {
2946       limit_idx += 1;
2947     }
2948 
2949     assert(limit_idx <= end_idx, "or else use atomics");
2950 
2951     // Aggregate the "stripe" in the count data associated with hr.
2952     uint hrs_index = hr->hrs_index();
2953     size_t marked_bytes = 0;
2954 
2955     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2956       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2957       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2958 
2959       // Fetch the marked_bytes in this region for task i and
2960       // add it to the running total for this region.
2961       marked_bytes += marked_bytes_array[hrs_index];
2962 
2963       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2964       // into the global card bitmap.
2965       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2966 
2967       while (scan_idx < limit_idx) {
2968         assert(task_card_bm->at(scan_idx) == true, "should be");
2969         _cm_card_bm->set_bit(scan_idx);
2970         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2971 
2972         // BitMap::get_next_one_offset() can handle the case when
2973         // its left_offset parameter is greater than its right_offset
2974         // parameter. If does, however, have an early exit if
2975         // left_offset == right_offset. So let's limit the value
2976         // passed in for left offset here.
2977         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2978         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2979       }
2980     }
2981 
2982     // Update the marked bytes for this region.
2983     hr->add_to_marked_bytes(marked_bytes);
2984 
2985     // Next heap region
2986     return false;
2987   }
2988 };
2989 
2990 class G1AggregateCountDataTask: public AbstractGangTask {
2991 protected:
2992   G1CollectedHeap* _g1h;
2993   ConcurrentMark* _cm;
2994   BitMap* _cm_card_bm;
2995   size_t _max_task_num;
2996   int _active_workers;
2997 
2998 public:
2999   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3000                            ConcurrentMark* cm,
3001                            BitMap* cm_card_bm,
3002                            size_t max_task_num,
3003                            int n_workers) :
3004     AbstractGangTask("Count Aggregation"),
3005     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3006     _max_task_num(max_task_num),
3007     _active_workers(n_workers) { }
3008 
3009   void work(uint worker_id) {
3010     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
3011 
3012     if (G1CollectedHeap::use_parallel_gc_threads()) {
3013       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3014                                             _active_workers,
3015                                             HeapRegion::AggregateCountClaimValue);
3016     } else {
3017       _g1h->heap_region_iterate(&cl);
3018     }
3019   }
3020 };
3021 
3022 
3023 void ConcurrentMark::aggregate_count_data() {
3024   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3025                         _g1h->workers()->active_workers() :
3026                         1);
3027 
3028   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3029                                            _max_task_num, n_workers);
3030 
3031   if (G1CollectedHeap::use_parallel_gc_threads()) {
3032     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3033            "sanity check");
3034     _g1h->set_par_threads(n_workers);
3035     _g1h->workers()->run_task(&g1_par_agg_task);
3036     _g1h->set_par_threads(0);
3037 
3038     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3039            "sanity check");
3040     _g1h->reset_heap_region_claim_values();
3041   } else {
3042     g1_par_agg_task.work(0);
3043   }
3044 }
3045 
3046 // Clear the per-worker arrays used to store the per-region counting data
3047 void ConcurrentMark::clear_all_count_data() {
3048   // Clear the global card bitmap - it will be filled during
3049   // liveness count aggregation (during remark) and the
3050   // final counting task.
3051   _card_bm.clear();
3052 
3053   // Clear the global region bitmap - it will be filled as part
3054   // of the final counting task.
3055   _region_bm.clear();
3056 
3057   uint max_regions = _g1h->max_regions();
3058   assert(_max_task_num != 0, "unitialized");
3059 
3060   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3061     BitMap* task_card_bm = count_card_bitmap_for(i);
3062     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3063 
3064     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3065     assert(marked_bytes_array != NULL, "uninitialized");
3066 
3067     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3068     task_card_bm->clear();
3069   }
3070 }
3071 
3072 void ConcurrentMark::print_stats() {
3073   if (verbose_stats()) {
3074     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3075     for (size_t i = 0; i < _active_tasks; ++i) {
3076       _tasks[i]->print_stats();
3077       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3078     }
3079   }
3080 }
3081 
3082 // abandon current marking iteration due to a Full GC
3083 void ConcurrentMark::abort() {
3084   // Clear all marks to force marking thread to do nothing
3085   _nextMarkBitMap->clearAll();
3086   // Clear the liveness counting data
3087   clear_all_count_data();
3088   // Empty mark stack
3089   clear_marking_state();
3090   for (int i = 0; i < (int)_max_task_num; ++i) {
3091     _tasks[i]->clear_region_fields();
3092   }
3093   _has_aborted = true;
3094 
3095   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3096   satb_mq_set.abandon_partial_marking();
3097   // This can be called either during or outside marking, we'll read
3098   // the expected_active value from the SATB queue set.
3099   satb_mq_set.set_active_all_threads(
3100                                  false, /* new active value */
3101                                  satb_mq_set.is_active() /* expected_active */);
3102 }
3103 
3104 static void print_ms_time_info(const char* prefix, const char* name,
3105                                NumberSeq& ns) {
3106   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3107                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3108   if (ns.num() > 0) {
3109     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3110                            prefix, ns.sd(), ns.maximum());
3111   }
3112 }
3113 
3114 void ConcurrentMark::print_summary_info() {
3115   gclog_or_tty->print_cr(" Concurrent marking:");
3116   print_ms_time_info("  ", "init marks", _init_times);
3117   print_ms_time_info("  ", "remarks", _remark_times);
3118   {
3119     print_ms_time_info("     ", "final marks", _remark_mark_times);
3120     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3121 
3122   }
3123   print_ms_time_info("  ", "cleanups", _cleanup_times);
3124   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3125                          _total_counting_time,
3126                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3127                           (double)_cleanup_times.num()
3128                          : 0.0));
3129   if (G1ScrubRemSets) {
3130     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3131                            _total_rs_scrub_time,
3132                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3133                             (double)_cleanup_times.num()
3134                            : 0.0));
3135   }
3136   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3137                          (_init_times.sum() + _remark_times.sum() +
3138                           _cleanup_times.sum())/1000.0);
3139   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3140                 "(%8.2f s marking).",
3141                 cmThread()->vtime_accum(),
3142                 cmThread()->vtime_mark_accum());
3143 }
3144 
3145 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3146   _parallel_workers->print_worker_threads_on(st);
3147 }
3148 
3149 // We take a break if someone is trying to stop the world.
3150 bool ConcurrentMark::do_yield_check(uint worker_id) {
3151   if (should_yield()) {
3152     if (worker_id == 0) {
3153       _g1h->g1_policy()->record_concurrent_pause();
3154     }
3155     cmThread()->yield();
3156     if (worker_id == 0) {
3157       _g1h->g1_policy()->record_concurrent_pause_end();
3158     }
3159     return true;
3160   } else {
3161     return false;
3162   }
3163 }
3164 
3165 bool ConcurrentMark::should_yield() {
3166   return cmThread()->should_yield();
3167 }
3168 
3169 bool ConcurrentMark::containing_card_is_marked(void* p) {
3170   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3171   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3172 }
3173 
3174 bool ConcurrentMark::containing_cards_are_marked(void* start,
3175                                                  void* last) {
3176   return containing_card_is_marked(start) &&
3177          containing_card_is_marked(last);
3178 }
3179 
3180 #ifndef PRODUCT
3181 // for debugging purposes
3182 void ConcurrentMark::print_finger() {
3183   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3184                          _heap_start, _heap_end, _finger);
3185   for (int i = 0; i < (int) _max_task_num; ++i) {
3186     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3187   }
3188   gclog_or_tty->print_cr("");
3189 }
3190 #endif
3191 
3192 void CMTask::scan_object(oop obj) {
3193   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3194 
3195   if (_cm->verbose_high()) {
3196     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3197                            _task_id, (void*) obj);
3198   }
3199 
3200   size_t obj_size = obj->size();
3201   _words_scanned += obj_size;
3202 
3203   obj->oop_iterate(_cm_oop_closure);
3204   statsOnly( ++_objs_scanned );
3205   check_limits();
3206 }
3207 
3208 // Closure for iteration over bitmaps
3209 class CMBitMapClosure : public BitMapClosure {
3210 private:
3211   // the bitmap that is being iterated over
3212   CMBitMap*                   _nextMarkBitMap;
3213   ConcurrentMark*             _cm;
3214   CMTask*                     _task;
3215 
3216 public:
3217   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3218     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3219 
3220   bool do_bit(size_t offset) {
3221     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3222     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3223     assert( addr < _cm->finger(), "invariant");
3224 
3225     statsOnly( _task->increase_objs_found_on_bitmap() );
3226     assert(addr >= _task->finger(), "invariant");
3227 
3228     // We move that task's local finger along.
3229     _task->move_finger_to(addr);
3230 
3231     _task->scan_object(oop(addr));
3232     // we only partially drain the local queue and global stack
3233     _task->drain_local_queue(true);
3234     _task->drain_global_stack(true);
3235 
3236     // if the has_aborted flag has been raised, we need to bail out of
3237     // the iteration
3238     return !_task->has_aborted();
3239   }
3240 };
3241 
3242 // Closure for iterating over objects, currently only used for
3243 // processing SATB buffers.
3244 class CMObjectClosure : public ObjectClosure {
3245 private:
3246   CMTask* _task;
3247 
3248 public:
3249   void do_object(oop obj) {
3250     _task->deal_with_reference(obj);
3251   }
3252 
3253   CMObjectClosure(CMTask* task) : _task(task) { }
3254 };
3255 
3256 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3257                                ConcurrentMark* cm,
3258                                CMTask* task)
3259   : _g1h(g1h), _cm(cm), _task(task) {
3260   assert(_ref_processor == NULL, "should be initialized to NULL");
3261 
3262   if (G1UseConcMarkReferenceProcessing) {
3263     _ref_processor = g1h->ref_processor_cm();
3264     assert(_ref_processor != NULL, "should not be NULL");
3265   }
3266 }
3267 
3268 void CMTask::setup_for_region(HeapRegion* hr) {
3269   // Separated the asserts so that we know which one fires.
3270   assert(hr != NULL,
3271         "claim_region() should have filtered out continues humongous regions");
3272   assert(!hr->continuesHumongous(),
3273         "claim_region() should have filtered out continues humongous regions");
3274 
3275   if (_cm->verbose_low()) {
3276     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3277                            _task_id, hr);
3278   }
3279 
3280   _curr_region  = hr;
3281   _finger       = hr->bottom();
3282   update_region_limit();
3283 }
3284 
3285 void CMTask::update_region_limit() {
3286   HeapRegion* hr            = _curr_region;
3287   HeapWord* bottom          = hr->bottom();
3288   HeapWord* limit           = hr->next_top_at_mark_start();
3289 
3290   if (limit == bottom) {
3291     if (_cm->verbose_low()) {
3292       gclog_or_tty->print_cr("[%d] found an empty region "
3293                              "["PTR_FORMAT", "PTR_FORMAT")",
3294                              _task_id, bottom, limit);
3295     }
3296     // The region was collected underneath our feet.
3297     // We set the finger to bottom to ensure that the bitmap
3298     // iteration that will follow this will not do anything.
3299     // (this is not a condition that holds when we set the region up,
3300     // as the region is not supposed to be empty in the first place)
3301     _finger = bottom;
3302   } else if (limit >= _region_limit) {
3303     assert(limit >= _finger, "peace of mind");
3304   } else {
3305     assert(limit < _region_limit, "only way to get here");
3306     // This can happen under some pretty unusual circumstances.  An
3307     // evacuation pause empties the region underneath our feet (NTAMS
3308     // at bottom). We then do some allocation in the region (NTAMS
3309     // stays at bottom), followed by the region being used as a GC
3310     // alloc region (NTAMS will move to top() and the objects
3311     // originally below it will be grayed). All objects now marked in
3312     // the region are explicitly grayed, if below the global finger,
3313     // and we do not need in fact to scan anything else. So, we simply
3314     // set _finger to be limit to ensure that the bitmap iteration
3315     // doesn't do anything.
3316     _finger = limit;
3317   }
3318 
3319   _region_limit = limit;
3320 }
3321 
3322 void CMTask::giveup_current_region() {
3323   assert(_curr_region != NULL, "invariant");
3324   if (_cm->verbose_low()) {
3325     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3326                            _task_id, _curr_region);
3327   }
3328   clear_region_fields();
3329 }
3330 
3331 void CMTask::clear_region_fields() {
3332   // Values for these three fields that indicate that we're not
3333   // holding on to a region.
3334   _curr_region   = NULL;
3335   _finger        = NULL;
3336   _region_limit  = NULL;
3337 }
3338 
3339 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3340   if (cm_oop_closure == NULL) {
3341     assert(_cm_oop_closure != NULL, "invariant");
3342   } else {
3343     assert(_cm_oop_closure == NULL, "invariant");
3344   }
3345   _cm_oop_closure = cm_oop_closure;
3346 }
3347 
3348 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3349   guarantee(nextMarkBitMap != NULL, "invariant");
3350 
3351   if (_cm->verbose_low()) {
3352     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3353   }
3354 
3355   _nextMarkBitMap                = nextMarkBitMap;
3356   clear_region_fields();
3357 
3358   _calls                         = 0;
3359   _elapsed_time_ms               = 0.0;
3360   _termination_time_ms           = 0.0;
3361   _termination_start_time_ms     = 0.0;
3362 
3363 #if _MARKING_STATS_
3364   _local_pushes                  = 0;
3365   _local_pops                    = 0;
3366   _local_max_size                = 0;
3367   _objs_scanned                  = 0;
3368   _global_pushes                 = 0;
3369   _global_pops                   = 0;
3370   _global_max_size               = 0;
3371   _global_transfers_to           = 0;
3372   _global_transfers_from         = 0;
3373   _regions_claimed               = 0;
3374   _objs_found_on_bitmap          = 0;
3375   _satb_buffers_processed        = 0;
3376   _steal_attempts                = 0;
3377   _steals                        = 0;
3378   _aborted                       = 0;
3379   _aborted_overflow              = 0;
3380   _aborted_cm_aborted            = 0;
3381   _aborted_yield                 = 0;
3382   _aborted_timed_out             = 0;
3383   _aborted_satb                  = 0;
3384   _aborted_termination           = 0;
3385 #endif // _MARKING_STATS_
3386 }
3387 
3388 bool CMTask::should_exit_termination() {
3389   regular_clock_call();
3390   // This is called when we are in the termination protocol. We should
3391   // quit if, for some reason, this task wants to abort or the global
3392   // stack is not empty (this means that we can get work from it).
3393   return !_cm->mark_stack_empty() || has_aborted();
3394 }
3395 
3396 void CMTask::reached_limit() {
3397   assert(_words_scanned >= _words_scanned_limit ||
3398          _refs_reached >= _refs_reached_limit ,
3399          "shouldn't have been called otherwise");
3400   regular_clock_call();
3401 }
3402 
3403 void CMTask::regular_clock_call() {
3404   if (has_aborted()) return;
3405 
3406   // First, we need to recalculate the words scanned and refs reached
3407   // limits for the next clock call.
3408   recalculate_limits();
3409 
3410   // During the regular clock call we do the following
3411 
3412   // (1) If an overflow has been flagged, then we abort.
3413   if (_cm->has_overflown()) {
3414     set_has_aborted();
3415     return;
3416   }
3417 
3418   // If we are not concurrent (i.e. we're doing remark) we don't need
3419   // to check anything else. The other steps are only needed during
3420   // the concurrent marking phase.
3421   if (!concurrent()) return;
3422 
3423   // (2) If marking has been aborted for Full GC, then we also abort.
3424   if (_cm->has_aborted()) {
3425     set_has_aborted();
3426     statsOnly( ++_aborted_cm_aborted );
3427     return;
3428   }
3429 
3430   double curr_time_ms = os::elapsedVTime() * 1000.0;
3431 
3432   // (3) If marking stats are enabled, then we update the step history.
3433 #if _MARKING_STATS_
3434   if (_words_scanned >= _words_scanned_limit) {
3435     ++_clock_due_to_scanning;
3436   }
3437   if (_refs_reached >= _refs_reached_limit) {
3438     ++_clock_due_to_marking;
3439   }
3440 
3441   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3442   _interval_start_time_ms = curr_time_ms;
3443   _all_clock_intervals_ms.add(last_interval_ms);
3444 
3445   if (_cm->verbose_medium()) {
3446       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3447                         "scanned = %d%s, refs reached = %d%s",
3448                         _task_id, last_interval_ms,
3449                         _words_scanned,
3450                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3451                         _refs_reached,
3452                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3453   }
3454 #endif // _MARKING_STATS_
3455 
3456   // (4) We check whether we should yield. If we have to, then we abort.
3457   if (_cm->should_yield()) {
3458     // We should yield. To do this we abort the task. The caller is
3459     // responsible for yielding.
3460     set_has_aborted();
3461     statsOnly( ++_aborted_yield );
3462     return;
3463   }
3464 
3465   // (5) We check whether we've reached our time quota. If we have,
3466   // then we abort.
3467   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3468   if (elapsed_time_ms > _time_target_ms) {
3469     set_has_aborted();
3470     _has_timed_out = true;
3471     statsOnly( ++_aborted_timed_out );
3472     return;
3473   }
3474 
3475   // (6) Finally, we check whether there are enough completed STAB
3476   // buffers available for processing. If there are, we abort.
3477   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3478   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3479     if (_cm->verbose_low()) {
3480       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3481                              _task_id);
3482     }
3483     // we do need to process SATB buffers, we'll abort and restart
3484     // the marking task to do so
3485     set_has_aborted();
3486     statsOnly( ++_aborted_satb );
3487     return;
3488   }
3489 }
3490 
3491 void CMTask::recalculate_limits() {
3492   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3493   _words_scanned_limit      = _real_words_scanned_limit;
3494 
3495   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3496   _refs_reached_limit       = _real_refs_reached_limit;
3497 }
3498 
3499 void CMTask::decrease_limits() {
3500   // This is called when we believe that we're going to do an infrequent
3501   // operation which will increase the per byte scanned cost (i.e. move
3502   // entries to/from the global stack). It basically tries to decrease the
3503   // scanning limit so that the clock is called earlier.
3504 
3505   if (_cm->verbose_medium()) {
3506     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3507   }
3508 
3509   _words_scanned_limit = _real_words_scanned_limit -
3510     3 * words_scanned_period / 4;
3511   _refs_reached_limit  = _real_refs_reached_limit -
3512     3 * refs_reached_period / 4;
3513 }
3514 
3515 void CMTask::move_entries_to_global_stack() {
3516   // local array where we'll store the entries that will be popped
3517   // from the local queue
3518   oop buffer[global_stack_transfer_size];
3519 
3520   int n = 0;
3521   oop obj;
3522   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3523     buffer[n] = obj;
3524     ++n;
3525   }
3526 
3527   if (n > 0) {
3528     // we popped at least one entry from the local queue
3529 
3530     statsOnly( ++_global_transfers_to; _local_pops += n );
3531 
3532     if (!_cm->mark_stack_push(buffer, n)) {
3533       if (_cm->verbose_low()) {
3534         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3535                                _task_id);
3536       }
3537       set_has_aborted();
3538     } else {
3539       // the transfer was successful
3540 
3541       if (_cm->verbose_medium()) {
3542         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3543                                _task_id, n);
3544       }
3545       statsOnly( int tmp_size = _cm->mark_stack_size();
3546                  if (tmp_size > _global_max_size) {
3547                    _global_max_size = tmp_size;
3548                  }
3549                  _global_pushes += n );
3550     }
3551   }
3552 
3553   // this operation was quite expensive, so decrease the limits
3554   decrease_limits();
3555 }
3556 
3557 void CMTask::get_entries_from_global_stack() {
3558   // local array where we'll store the entries that will be popped
3559   // from the global stack.
3560   oop buffer[global_stack_transfer_size];
3561   int n;
3562   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3563   assert(n <= global_stack_transfer_size,
3564          "we should not pop more than the given limit");
3565   if (n > 0) {
3566     // yes, we did actually pop at least one entry
3567 
3568     statsOnly( ++_global_transfers_from; _global_pops += n );
3569     if (_cm->verbose_medium()) {
3570       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3571                              _task_id, n);
3572     }
3573     for (int i = 0; i < n; ++i) {
3574       bool success = _task_queue->push(buffer[i]);
3575       // We only call this when the local queue is empty or under a
3576       // given target limit. So, we do not expect this push to fail.
3577       assert(success, "invariant");
3578     }
3579 
3580     statsOnly( int tmp_size = _task_queue->size();
3581                if (tmp_size > _local_max_size) {
3582                  _local_max_size = tmp_size;
3583                }
3584                _local_pushes += n );
3585   }
3586 
3587   // this operation was quite expensive, so decrease the limits
3588   decrease_limits();
3589 }
3590 
3591 void CMTask::drain_local_queue(bool partially) {
3592   if (has_aborted()) return;
3593 
3594   // Decide what the target size is, depending whether we're going to
3595   // drain it partially (so that other tasks can steal if they run out
3596   // of things to do) or totally (at the very end).
3597   size_t target_size;
3598   if (partially) {
3599     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3600   } else {
3601     target_size = 0;
3602   }
3603 
3604   if (_task_queue->size() > target_size) {
3605     if (_cm->verbose_high()) {
3606       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3607                              _task_id, target_size);
3608     }
3609 
3610     oop obj;
3611     bool ret = _task_queue->pop_local(obj);
3612     while (ret) {
3613       statsOnly( ++_local_pops );
3614 
3615       if (_cm->verbose_high()) {
3616         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3617                                (void*) obj);
3618       }
3619 
3620       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3621       assert(!_g1h->is_on_master_free_list(
3622                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3623 
3624       scan_object(obj);
3625 
3626       if (_task_queue->size() <= target_size || has_aborted()) {
3627         ret = false;
3628       } else {
3629         ret = _task_queue->pop_local(obj);
3630       }
3631     }
3632 
3633     if (_cm->verbose_high()) {
3634       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3635                              _task_id, _task_queue->size());
3636     }
3637   }
3638 }
3639 
3640 void CMTask::drain_global_stack(bool partially) {
3641   if (has_aborted()) return;
3642 
3643   // We have a policy to drain the local queue before we attempt to
3644   // drain the global stack.
3645   assert(partially || _task_queue->size() == 0, "invariant");
3646 
3647   // Decide what the target size is, depending whether we're going to
3648   // drain it partially (so that other tasks can steal if they run out
3649   // of things to do) or totally (at the very end).  Notice that,
3650   // because we move entries from the global stack in chunks or
3651   // because another task might be doing the same, we might in fact
3652   // drop below the target. But, this is not a problem.
3653   size_t target_size;
3654   if (partially) {
3655     target_size = _cm->partial_mark_stack_size_target();
3656   } else {
3657     target_size = 0;
3658   }
3659 
3660   if (_cm->mark_stack_size() > target_size) {
3661     if (_cm->verbose_low()) {
3662       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3663                              _task_id, target_size);
3664     }
3665 
3666     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3667       get_entries_from_global_stack();
3668       drain_local_queue(partially);
3669     }
3670 
3671     if (_cm->verbose_low()) {
3672       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3673                              _task_id, _cm->mark_stack_size());
3674     }
3675   }
3676 }
3677 
3678 // SATB Queue has several assumptions on whether to call the par or
3679 // non-par versions of the methods. this is why some of the code is
3680 // replicated. We should really get rid of the single-threaded version
3681 // of the code to simplify things.
3682 void CMTask::drain_satb_buffers() {
3683   if (has_aborted()) return;
3684 
3685   // We set this so that the regular clock knows that we're in the
3686   // middle of draining buffers and doesn't set the abort flag when it
3687   // notices that SATB buffers are available for draining. It'd be
3688   // very counter productive if it did that. :-)
3689   _draining_satb_buffers = true;
3690 
3691   CMObjectClosure oc(this);
3692   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3693   if (G1CollectedHeap::use_parallel_gc_threads()) {
3694     satb_mq_set.set_par_closure(_task_id, &oc);
3695   } else {
3696     satb_mq_set.set_closure(&oc);
3697   }
3698 
3699   // This keeps claiming and applying the closure to completed buffers
3700   // until we run out of buffers or we need to abort.
3701   if (G1CollectedHeap::use_parallel_gc_threads()) {
3702     while (!has_aborted() &&
3703            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3704       if (_cm->verbose_medium()) {
3705         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3706       }
3707       statsOnly( ++_satb_buffers_processed );
3708       regular_clock_call();
3709     }
3710   } else {
3711     while (!has_aborted() &&
3712            satb_mq_set.apply_closure_to_completed_buffer()) {
3713       if (_cm->verbose_medium()) {
3714         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3715       }
3716       statsOnly( ++_satb_buffers_processed );
3717       regular_clock_call();
3718     }
3719   }
3720 
3721   if (!concurrent() && !has_aborted()) {
3722     // We should only do this during remark.
3723     if (G1CollectedHeap::use_parallel_gc_threads()) {
3724       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3725     } else {
3726       satb_mq_set.iterate_closure_all_threads();
3727     }
3728   }
3729 
3730   _draining_satb_buffers = false;
3731 
3732   assert(has_aborted() ||
3733          concurrent() ||
3734          satb_mq_set.completed_buffers_num() == 0, "invariant");
3735 
3736   if (G1CollectedHeap::use_parallel_gc_threads()) {
3737     satb_mq_set.set_par_closure(_task_id, NULL);
3738   } else {
3739     satb_mq_set.set_closure(NULL);
3740   }
3741 
3742   // again, this was a potentially expensive operation, decrease the
3743   // limits to get the regular clock call early
3744   decrease_limits();
3745 }
3746 
3747 void CMTask::print_stats() {
3748   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3749                          _task_id, _calls);
3750   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3751                          _elapsed_time_ms, _termination_time_ms);
3752   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3753                          _step_times_ms.num(), _step_times_ms.avg(),
3754                          _step_times_ms.sd());
3755   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3756                          _step_times_ms.maximum(), _step_times_ms.sum());
3757 
3758 #if _MARKING_STATS_
3759   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3760                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3761                          _all_clock_intervals_ms.sd());
3762   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3763                          _all_clock_intervals_ms.maximum(),
3764                          _all_clock_intervals_ms.sum());
3765   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3766                          _clock_due_to_scanning, _clock_due_to_marking);
3767   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3768                          _objs_scanned, _objs_found_on_bitmap);
3769   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3770                          _local_pushes, _local_pops, _local_max_size);
3771   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3772                          _global_pushes, _global_pops, _global_max_size);
3773   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3774                          _global_transfers_to,_global_transfers_from);
3775   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3776   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3777   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3778                          _steal_attempts, _steals);
3779   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3780   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3781                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3782   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3783                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3784 #endif // _MARKING_STATS_
3785 }
3786 
3787 /*****************************************************************************
3788 
3789     The do_marking_step(time_target_ms) method is the building block
3790     of the parallel marking framework. It can be called in parallel
3791     with other invocations of do_marking_step() on different tasks
3792     (but only one per task, obviously) and concurrently with the
3793     mutator threads, or during remark, hence it eliminates the need
3794     for two versions of the code. When called during remark, it will
3795     pick up from where the task left off during the concurrent marking
3796     phase. Interestingly, tasks are also claimable during evacuation
3797     pauses too, since do_marking_step() ensures that it aborts before
3798     it needs to yield.
3799 
3800     The data structures that is uses to do marking work are the
3801     following:
3802 
3803       (1) Marking Bitmap. If there are gray objects that appear only
3804       on the bitmap (this happens either when dealing with an overflow
3805       or when the initial marking phase has simply marked the roots
3806       and didn't push them on the stack), then tasks claim heap
3807       regions whose bitmap they then scan to find gray objects. A
3808       global finger indicates where the end of the last claimed region
3809       is. A local finger indicates how far into the region a task has
3810       scanned. The two fingers are used to determine how to gray an
3811       object (i.e. whether simply marking it is OK, as it will be
3812       visited by a task in the future, or whether it needs to be also
3813       pushed on a stack).
3814 
3815       (2) Local Queue. The local queue of the task which is accessed
3816       reasonably efficiently by the task. Other tasks can steal from
3817       it when they run out of work. Throughout the marking phase, a
3818       task attempts to keep its local queue short but not totally
3819       empty, so that entries are available for stealing by other
3820       tasks. Only when there is no more work, a task will totally
3821       drain its local queue.
3822 
3823       (3) Global Mark Stack. This handles local queue overflow. During
3824       marking only sets of entries are moved between it and the local
3825       queues, as access to it requires a mutex and more fine-grain
3826       interaction with it which might cause contention. If it
3827       overflows, then the marking phase should restart and iterate
3828       over the bitmap to identify gray objects. Throughout the marking
3829       phase, tasks attempt to keep the global mark stack at a small
3830       length but not totally empty, so that entries are available for
3831       popping by other tasks. Only when there is no more work, tasks
3832       will totally drain the global mark stack.
3833 
3834       (4) SATB Buffer Queue. This is where completed SATB buffers are
3835       made available. Buffers are regularly removed from this queue
3836       and scanned for roots, so that the queue doesn't get too
3837       long. During remark, all completed buffers are processed, as
3838       well as the filled in parts of any uncompleted buffers.
3839 
3840     The do_marking_step() method tries to abort when the time target
3841     has been reached. There are a few other cases when the
3842     do_marking_step() method also aborts:
3843 
3844       (1) When the marking phase has been aborted (after a Full GC).
3845 
3846       (2) When a global overflow (on the global stack) has been
3847       triggered. Before the task aborts, it will actually sync up with
3848       the other tasks to ensure that all the marking data structures
3849       (local queues, stacks, fingers etc.)  are re-initialised so that
3850       when do_marking_step() completes, the marking phase can
3851       immediately restart.
3852 
3853       (3) When enough completed SATB buffers are available. The
3854       do_marking_step() method only tries to drain SATB buffers right
3855       at the beginning. So, if enough buffers are available, the
3856       marking step aborts and the SATB buffers are processed at
3857       the beginning of the next invocation.
3858 
3859       (4) To yield. when we have to yield then we abort and yield
3860       right at the end of do_marking_step(). This saves us from a lot
3861       of hassle as, by yielding we might allow a Full GC. If this
3862       happens then objects will be compacted underneath our feet, the
3863       heap might shrink, etc. We save checking for this by just
3864       aborting and doing the yield right at the end.
3865 
3866     From the above it follows that the do_marking_step() method should
3867     be called in a loop (or, otherwise, regularly) until it completes.
3868 
3869     If a marking step completes without its has_aborted() flag being
3870     true, it means it has completed the current marking phase (and
3871     also all other marking tasks have done so and have all synced up).
3872 
3873     A method called regular_clock_call() is invoked "regularly" (in
3874     sub ms intervals) throughout marking. It is this clock method that
3875     checks all the abort conditions which were mentioned above and
3876     decides when the task should abort. A work-based scheme is used to
3877     trigger this clock method: when the number of object words the
3878     marking phase has scanned or the number of references the marking
3879     phase has visited reach a given limit. Additional invocations to
3880     the method clock have been planted in a few other strategic places
3881     too. The initial reason for the clock method was to avoid calling
3882     vtime too regularly, as it is quite expensive. So, once it was in
3883     place, it was natural to piggy-back all the other conditions on it
3884     too and not constantly check them throughout the code.
3885 
3886  *****************************************************************************/
3887 
3888 void CMTask::do_marking_step(double time_target_ms,
3889                              bool do_stealing,
3890                              bool do_termination) {
3891   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3892   assert(concurrent() == _cm->concurrent(), "they should be the same");
3893 
3894   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3895   assert(_task_queues != NULL, "invariant");
3896   assert(_task_queue != NULL, "invariant");
3897   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3898 
3899   assert(!_claimed,
3900          "only one thread should claim this task at any one time");
3901 
3902   // OK, this doesn't safeguard again all possible scenarios, as it is
3903   // possible for two threads to set the _claimed flag at the same
3904   // time. But it is only for debugging purposes anyway and it will
3905   // catch most problems.
3906   _claimed = true;
3907 
3908   _start_time_ms = os::elapsedVTime() * 1000.0;
3909   statsOnly( _interval_start_time_ms = _start_time_ms );
3910 
3911   double diff_prediction_ms =
3912     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3913   _time_target_ms = time_target_ms - diff_prediction_ms;
3914 
3915   // set up the variables that are used in the work-based scheme to
3916   // call the regular clock method
3917   _words_scanned = 0;
3918   _refs_reached  = 0;
3919   recalculate_limits();
3920 
3921   // clear all flags
3922   clear_has_aborted();
3923   _has_timed_out = false;
3924   _draining_satb_buffers = false;
3925 
3926   ++_calls;
3927 
3928   if (_cm->verbose_low()) {
3929     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3930                            "target = %1.2lfms >>>>>>>>>>",
3931                            _task_id, _calls, _time_target_ms);
3932   }
3933 
3934   // Set up the bitmap and oop closures. Anything that uses them is
3935   // eventually called from this method, so it is OK to allocate these
3936   // statically.
3937   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3938   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3939   set_cm_oop_closure(&cm_oop_closure);
3940 
3941   if (_cm->has_overflown()) {
3942     // This can happen if the mark stack overflows during a GC pause
3943     // and this task, after a yield point, restarts. We have to abort
3944     // as we need to get into the overflow protocol which happens
3945     // right at the end of this task.
3946     set_has_aborted();
3947   }
3948 
3949   // First drain any available SATB buffers. After this, we will not
3950   // look at SATB buffers before the next invocation of this method.
3951   // If enough completed SATB buffers are queued up, the regular clock
3952   // will abort this task so that it restarts.
3953   drain_satb_buffers();
3954   // ...then partially drain the local queue and the global stack
3955   drain_local_queue(true);
3956   drain_global_stack(true);
3957 
3958   do {
3959     if (!has_aborted() && _curr_region != NULL) {
3960       // This means that we're already holding on to a region.
3961       assert(_finger != NULL, "if region is not NULL, then the finger "
3962              "should not be NULL either");
3963 
3964       // We might have restarted this task after an evacuation pause
3965       // which might have evacuated the region we're holding on to
3966       // underneath our feet. Let's read its limit again to make sure
3967       // that we do not iterate over a region of the heap that
3968       // contains garbage (update_region_limit() will also move
3969       // _finger to the start of the region if it is found empty).
3970       update_region_limit();
3971       // We will start from _finger not from the start of the region,
3972       // as we might be restarting this task after aborting half-way
3973       // through scanning this region. In this case, _finger points to
3974       // the address where we last found a marked object. If this is a
3975       // fresh region, _finger points to start().
3976       MemRegion mr = MemRegion(_finger, _region_limit);
3977 
3978       if (_cm->verbose_low()) {
3979         gclog_or_tty->print_cr("[%d] we're scanning part "
3980                                "["PTR_FORMAT", "PTR_FORMAT") "
3981                                "of region "PTR_FORMAT,
3982                                _task_id, _finger, _region_limit, _curr_region);
3983       }
3984 
3985       // Let's iterate over the bitmap of the part of the
3986       // region that is left.
3987       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3988         // We successfully completed iterating over the region. Now,
3989         // let's give up the region.
3990         giveup_current_region();
3991         regular_clock_call();
3992       } else {
3993         assert(has_aborted(), "currently the only way to do so");
3994         // The only way to abort the bitmap iteration is to return
3995         // false from the do_bit() method. However, inside the
3996         // do_bit() method we move the _finger to point to the
3997         // object currently being looked at. So, if we bail out, we
3998         // have definitely set _finger to something non-null.
3999         assert(_finger != NULL, "invariant");
4000 
4001         // Region iteration was actually aborted. So now _finger
4002         // points to the address of the object we last scanned. If we
4003         // leave it there, when we restart this task, we will rescan
4004         // the object. It is easy to avoid this. We move the finger by
4005         // enough to point to the next possible object header (the
4006         // bitmap knows by how much we need to move it as it knows its
4007         // granularity).
4008         assert(_finger < _region_limit, "invariant");
4009         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4010         // Check if bitmap iteration was aborted while scanning the last object
4011         if (new_finger >= _region_limit) {
4012           giveup_current_region();
4013         } else {
4014           move_finger_to(new_finger);
4015         }
4016       }
4017     }
4018     // At this point we have either completed iterating over the
4019     // region we were holding on to, or we have aborted.
4020 
4021     // We then partially drain the local queue and the global stack.
4022     // (Do we really need this?)
4023     drain_local_queue(true);
4024     drain_global_stack(true);
4025 
4026     // Read the note on the claim_region() method on why it might
4027     // return NULL with potentially more regions available for
4028     // claiming and why we have to check out_of_regions() to determine
4029     // whether we're done or not.
4030     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4031       // We are going to try to claim a new region. We should have
4032       // given up on the previous one.
4033       // Separated the asserts so that we know which one fires.
4034       assert(_curr_region  == NULL, "invariant");
4035       assert(_finger       == NULL, "invariant");
4036       assert(_region_limit == NULL, "invariant");
4037       if (_cm->verbose_low()) {
4038         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4039       }
4040       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4041       if (claimed_region != NULL) {
4042         // Yes, we managed to claim one
4043         statsOnly( ++_regions_claimed );
4044 
4045         if (_cm->verbose_low()) {
4046           gclog_or_tty->print_cr("[%d] we successfully claimed "
4047                                  "region "PTR_FORMAT,
4048                                  _task_id, claimed_region);
4049         }
4050 
4051         setup_for_region(claimed_region);
4052         assert(_curr_region == claimed_region, "invariant");
4053       }
4054       // It is important to call the regular clock here. It might take
4055       // a while to claim a region if, for example, we hit a large
4056       // block of empty regions. So we need to call the regular clock
4057       // method once round the loop to make sure it's called
4058       // frequently enough.
4059       regular_clock_call();
4060     }
4061 
4062     if (!has_aborted() && _curr_region == NULL) {
4063       assert(_cm->out_of_regions(),
4064              "at this point we should be out of regions");
4065     }
4066   } while ( _curr_region != NULL && !has_aborted());
4067 
4068   if (!has_aborted()) {
4069     // We cannot check whether the global stack is empty, since other
4070     // tasks might be pushing objects to it concurrently.
4071     assert(_cm->out_of_regions(),
4072            "at this point we should be out of regions");
4073 
4074     if (_cm->verbose_low()) {
4075       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4076     }
4077 
4078     // Try to reduce the number of available SATB buffers so that
4079     // remark has less work to do.
4080     drain_satb_buffers();
4081   }
4082 
4083   // Since we've done everything else, we can now totally drain the
4084   // local queue and global stack.
4085   drain_local_queue(false);
4086   drain_global_stack(false);
4087 
4088   // Attempt at work stealing from other task's queues.
4089   if (do_stealing && !has_aborted()) {
4090     // We have not aborted. This means that we have finished all that
4091     // we could. Let's try to do some stealing...
4092 
4093     // We cannot check whether the global stack is empty, since other
4094     // tasks might be pushing objects to it concurrently.
4095     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4096            "only way to reach here");
4097 
4098     if (_cm->verbose_low()) {
4099       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4100     }
4101 
4102     while (!has_aborted()) {
4103       oop obj;
4104       statsOnly( ++_steal_attempts );
4105 
4106       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4107         if (_cm->verbose_medium()) {
4108           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4109                                  _task_id, (void*) obj);
4110         }
4111 
4112         statsOnly( ++_steals );
4113 
4114         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4115                "any stolen object should be marked");
4116         scan_object(obj);
4117 
4118         // And since we're towards the end, let's totally drain the
4119         // local queue and global stack.
4120         drain_local_queue(false);
4121         drain_global_stack(false);
4122       } else {
4123         break;
4124       }
4125     }
4126   }
4127 
4128   // If we are about to wrap up and go into termination, check if we
4129   // should raise the overflow flag.
4130   if (do_termination && !has_aborted()) {
4131     if (_cm->force_overflow()->should_force()) {
4132       _cm->set_has_overflown();
4133       regular_clock_call();
4134     }
4135   }
4136 
4137   // We still haven't aborted. Now, let's try to get into the
4138   // termination protocol.
4139   if (do_termination && !has_aborted()) {
4140     // We cannot check whether the global stack is empty, since other
4141     // tasks might be concurrently pushing objects on it.
4142     // Separated the asserts so that we know which one fires.
4143     assert(_cm->out_of_regions(), "only way to reach here");
4144     assert(_task_queue->size() == 0, "only way to reach here");
4145 
4146     if (_cm->verbose_low()) {
4147       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4148     }
4149 
4150     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4151     // The CMTask class also extends the TerminatorTerminator class,
4152     // hence its should_exit_termination() method will also decide
4153     // whether to exit the termination protocol or not.
4154     bool finished = _cm->terminator()->offer_termination(this);
4155     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4156     _termination_time_ms +=
4157       termination_end_time_ms - _termination_start_time_ms;
4158 
4159     if (finished) {
4160       // We're all done.
4161 
4162       if (_task_id == 0) {
4163         // let's allow task 0 to do this
4164         if (concurrent()) {
4165           assert(_cm->concurrent_marking_in_progress(), "invariant");
4166           // we need to set this to false before the next
4167           // safepoint. This way we ensure that the marking phase
4168           // doesn't observe any more heap expansions.
4169           _cm->clear_concurrent_marking_in_progress();
4170         }
4171       }
4172 
4173       // We can now guarantee that the global stack is empty, since
4174       // all other tasks have finished. We separated the guarantees so
4175       // that, if a condition is false, we can immediately find out
4176       // which one.
4177       guarantee(_cm->out_of_regions(), "only way to reach here");
4178       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4179       guarantee(_task_queue->size() == 0, "only way to reach here");
4180       guarantee(!_cm->has_overflown(), "only way to reach here");
4181       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4182 
4183       if (_cm->verbose_low()) {
4184         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4185       }
4186     } else {
4187       // Apparently there's more work to do. Let's abort this task. It
4188       // will restart it and we can hopefully find more things to do.
4189 
4190       if (_cm->verbose_low()) {
4191         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4192                                _task_id);
4193       }
4194 
4195       set_has_aborted();
4196       statsOnly( ++_aborted_termination );
4197     }
4198   }
4199 
4200   // Mainly for debugging purposes to make sure that a pointer to the
4201   // closure which was statically allocated in this frame doesn't
4202   // escape it by accident.
4203   set_cm_oop_closure(NULL);
4204   double end_time_ms = os::elapsedVTime() * 1000.0;
4205   double elapsed_time_ms = end_time_ms - _start_time_ms;
4206   // Update the step history.
4207   _step_times_ms.add(elapsed_time_ms);
4208 
4209   if (has_aborted()) {
4210     // The task was aborted for some reason.
4211 
4212     statsOnly( ++_aborted );
4213 
4214     if (_has_timed_out) {
4215       double diff_ms = elapsed_time_ms - _time_target_ms;
4216       // Keep statistics of how well we did with respect to hitting
4217       // our target only if we actually timed out (if we aborted for
4218       // other reasons, then the results might get skewed).
4219       _marking_step_diffs_ms.add(diff_ms);
4220     }
4221 
4222     if (_cm->has_overflown()) {
4223       // This is the interesting one. We aborted because a global
4224       // overflow was raised. This means we have to restart the
4225       // marking phase and start iterating over regions. However, in
4226       // order to do this we have to make sure that all tasks stop
4227       // what they are doing and re-initialise in a safe manner. We
4228       // will achieve this with the use of two barrier sync points.
4229 
4230       if (_cm->verbose_low()) {
4231         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4232       }
4233 
4234       _cm->enter_first_sync_barrier(_task_id);
4235       // When we exit this sync barrier we know that all tasks have
4236       // stopped doing marking work. So, it's now safe to
4237       // re-initialise our data structures. At the end of this method,
4238       // task 0 will clear the global data structures.
4239 
4240       statsOnly( ++_aborted_overflow );
4241 
4242       // We clear the local state of this task...
4243       clear_region_fields();
4244 
4245       // ...and enter the second barrier.
4246       _cm->enter_second_sync_barrier(_task_id);
4247       // At this point everything has bee re-initialised and we're
4248       // ready to restart.
4249     }
4250 
4251     if (_cm->verbose_low()) {
4252       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4253                              "elapsed = %1.2lfms <<<<<<<<<<",
4254                              _task_id, _time_target_ms, elapsed_time_ms);
4255       if (_cm->has_aborted()) {
4256         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4257                                _task_id);
4258       }
4259     }
4260   } else {
4261     if (_cm->verbose_low()) {
4262       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4263                              "elapsed = %1.2lfms <<<<<<<<<<",
4264                              _task_id, _time_target_ms, elapsed_time_ms);
4265     }
4266   }
4267 
4268   _claimed = false;
4269 }
4270 
4271 CMTask::CMTask(int task_id,
4272                ConcurrentMark* cm,
4273                size_t* marked_bytes,
4274                BitMap* card_bm,
4275                CMTaskQueue* task_queue,
4276                CMTaskQueueSet* task_queues)
4277   : _g1h(G1CollectedHeap::heap()),
4278     _task_id(task_id), _cm(cm),
4279     _claimed(false),
4280     _nextMarkBitMap(NULL), _hash_seed(17),
4281     _task_queue(task_queue),
4282     _task_queues(task_queues),
4283     _cm_oop_closure(NULL),
4284     _marked_bytes_array(marked_bytes),
4285     _card_bm(card_bm) {
4286   guarantee(task_queue != NULL, "invariant");
4287   guarantee(task_queues != NULL, "invariant");
4288 
4289   statsOnly( _clock_due_to_scanning = 0;
4290              _clock_due_to_marking  = 0 );
4291 
4292   _marking_step_diffs_ms.add(0.5);
4293 }
4294 
4295 // These are formatting macros that are used below to ensure
4296 // consistent formatting. The *_H_* versions are used to format the
4297 // header for a particular value and they should be kept consistent
4298 // with the corresponding macro. Also note that most of the macros add
4299 // the necessary white space (as a prefix) which makes them a bit
4300 // easier to compose.
4301 
4302 // All the output lines are prefixed with this string to be able to
4303 // identify them easily in a large log file.
4304 #define G1PPRL_LINE_PREFIX            "###"
4305 
4306 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4307 #ifdef _LP64
4308 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4309 #else // _LP64
4310 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4311 #endif // _LP64
4312 
4313 // For per-region info
4314 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4315 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4316 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4317 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4318 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4319 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4320 
4321 // For summary info
4322 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4323 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4324 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4325 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4326 
4327 G1PrintRegionLivenessInfoClosure::
4328 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4329   : _out(out),
4330     _total_used_bytes(0), _total_capacity_bytes(0),
4331     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4332     _hum_used_bytes(0), _hum_capacity_bytes(0),
4333     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4334   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4335   MemRegion g1_committed = g1h->g1_committed();
4336   MemRegion g1_reserved = g1h->g1_reserved();
4337   double now = os::elapsedTime();
4338 
4339   // Print the header of the output.
4340   _out->cr();
4341   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4342   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4343                  G1PPRL_SUM_ADDR_FORMAT("committed")
4344                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4345                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4346                  g1_committed.start(), g1_committed.end(),
4347                  g1_reserved.start(), g1_reserved.end(),
4348                  HeapRegion::GrainBytes);
4349   _out->print_cr(G1PPRL_LINE_PREFIX);
4350   _out->print_cr(G1PPRL_LINE_PREFIX
4351                  G1PPRL_TYPE_H_FORMAT
4352                  G1PPRL_ADDR_BASE_H_FORMAT
4353                  G1PPRL_BYTE_H_FORMAT
4354                  G1PPRL_BYTE_H_FORMAT
4355                  G1PPRL_BYTE_H_FORMAT
4356                  G1PPRL_DOUBLE_H_FORMAT,
4357                  "type", "address-range",
4358                  "used", "prev-live", "next-live", "gc-eff");
4359   _out->print_cr(G1PPRL_LINE_PREFIX
4360                  G1PPRL_TYPE_H_FORMAT
4361                  G1PPRL_ADDR_BASE_H_FORMAT
4362                  G1PPRL_BYTE_H_FORMAT
4363                  G1PPRL_BYTE_H_FORMAT
4364                  G1PPRL_BYTE_H_FORMAT
4365                  G1PPRL_DOUBLE_H_FORMAT,
4366                  "", "",
4367                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4368 }
4369 
4370 // It takes as a parameter a reference to one of the _hum_* fields, it
4371 // deduces the corresponding value for a region in a humongous region
4372 // series (either the region size, or what's left if the _hum_* field
4373 // is < the region size), and updates the _hum_* field accordingly.
4374 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4375   size_t bytes = 0;
4376   // The > 0 check is to deal with the prev and next live bytes which
4377   // could be 0.
4378   if (*hum_bytes > 0) {
4379     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4380     *hum_bytes -= bytes;
4381   }
4382   return bytes;
4383 }
4384 
4385 // It deduces the values for a region in a humongous region series
4386 // from the _hum_* fields and updates those accordingly. It assumes
4387 // that that _hum_* fields have already been set up from the "starts
4388 // humongous" region and we visit the regions in address order.
4389 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4390                                                      size_t* capacity_bytes,
4391                                                      size_t* prev_live_bytes,
4392                                                      size_t* next_live_bytes) {
4393   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4394   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4395   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4396   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4397   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4398 }
4399 
4400 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4401   const char* type = "";
4402   HeapWord* bottom       = r->bottom();
4403   HeapWord* end          = r->end();
4404   size_t capacity_bytes  = r->capacity();
4405   size_t used_bytes      = r->used();
4406   size_t prev_live_bytes = r->live_bytes();
4407   size_t next_live_bytes = r->next_live_bytes();
4408   double gc_eff          = r->gc_efficiency();
4409   if (r->used() == 0) {
4410     type = "FREE";
4411   } else if (r->is_survivor()) {
4412     type = "SURV";
4413   } else if (r->is_young()) {
4414     type = "EDEN";
4415   } else if (r->startsHumongous()) {
4416     type = "HUMS";
4417 
4418     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4419            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4420            "they should have been zeroed after the last time we used them");
4421     // Set up the _hum_* fields.
4422     _hum_capacity_bytes  = capacity_bytes;
4423     _hum_used_bytes      = used_bytes;
4424     _hum_prev_live_bytes = prev_live_bytes;
4425     _hum_next_live_bytes = next_live_bytes;
4426     get_hum_bytes(&used_bytes, &capacity_bytes,
4427                   &prev_live_bytes, &next_live_bytes);
4428     end = bottom + HeapRegion::GrainWords;
4429   } else if (r->continuesHumongous()) {
4430     type = "HUMC";
4431     get_hum_bytes(&used_bytes, &capacity_bytes,
4432                   &prev_live_bytes, &next_live_bytes);
4433     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4434   } else {
4435     type = "OLD";
4436   }
4437 
4438   _total_used_bytes      += used_bytes;
4439   _total_capacity_bytes  += capacity_bytes;
4440   _total_prev_live_bytes += prev_live_bytes;
4441   _total_next_live_bytes += next_live_bytes;
4442 
4443   // Print a line for this particular region.
4444   _out->print_cr(G1PPRL_LINE_PREFIX
4445                  G1PPRL_TYPE_FORMAT
4446                  G1PPRL_ADDR_BASE_FORMAT
4447                  G1PPRL_BYTE_FORMAT
4448                  G1PPRL_BYTE_FORMAT
4449                  G1PPRL_BYTE_FORMAT
4450                  G1PPRL_DOUBLE_FORMAT,
4451                  type, bottom, end,
4452                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4453 
4454   return false;
4455 }
4456 
4457 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4458   // Print the footer of the output.
4459   _out->print_cr(G1PPRL_LINE_PREFIX);
4460   _out->print_cr(G1PPRL_LINE_PREFIX
4461                  " SUMMARY"
4462                  G1PPRL_SUM_MB_FORMAT("capacity")
4463                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4464                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4465                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4466                  bytes_to_mb(_total_capacity_bytes),
4467                  bytes_to_mb(_total_used_bytes),
4468                  perc(_total_used_bytes, _total_capacity_bytes),
4469                  bytes_to_mb(_total_prev_live_bytes),
4470                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4471                  bytes_to_mb(_total_next_live_bytes),
4472                  perc(_total_next_live_bytes, _total_capacity_bytes));
4473   _out->cr();
4474 }