1 /*
   2  * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "memory/genOopClosures.inline.hpp"
  40 #include "memory/referencePolicy.hpp"
  41 #include "memory/resourceArea.hpp"
  42 #include "oops/oop.inline.hpp"
  43 #include "runtime/handles.inline.hpp"
  44 #include "runtime/java.hpp"
  45 #include "services/memTracker.hpp"
  46 
  47 // Concurrent marking bit map wrapper
  48 
  49 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  50   _bm((uintptr_t*)NULL,0),
  51   _shifter(shifter) {
  52   _bmStartWord = (HeapWord*)(rs.base());
  53   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  54   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  55                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  56 
  57   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  58 
  59   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  60   // For now we'll just commit all of the bit map up fromt.
  61   // Later on we'll try to be more parsimonious with swap.
  62   guarantee(_virtual_space.initialize(brs, brs.size()),
  63             "couldn't reseve backing store for concurrent marking bit map");
  64   assert(_virtual_space.committed_size() == brs.size(),
  65          "didn't reserve backing store for all of concurrent marking bit map?");
  66   _bm.set_map((uintptr_t*)_virtual_space.low());
  67   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  68          _bmWordSize, "inconsistency in bit map sizing");
  69   _bm.set_size(_bmWordSize >> _shifter);
  70 }
  71 
  72 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  73                                                HeapWord* limit) const {
  74   // First we must round addr *up* to a possible object boundary.
  75   addr = (HeapWord*)align_size_up((intptr_t)addr,
  76                                   HeapWordSize << _shifter);
  77   size_t addrOffset = heapWordToOffset(addr);
  78   if (limit == NULL) {
  79     limit = _bmStartWord + _bmWordSize;
  80   }
  81   size_t limitOffset = heapWordToOffset(limit);
  82   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  83   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  84   assert(nextAddr >= addr, "get_next_one postcondition");
  85   assert(nextAddr == limit || isMarked(nextAddr),
  86          "get_next_one postcondition");
  87   return nextAddr;
  88 }
  89 
  90 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  91                                                  HeapWord* limit) const {
  92   size_t addrOffset = heapWordToOffset(addr);
  93   if (limit == NULL) {
  94     limit = _bmStartWord + _bmWordSize;
  95   }
  96   size_t limitOffset = heapWordToOffset(limit);
  97   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  98   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  99   assert(nextAddr >= addr, "get_next_one postcondition");
 100   assert(nextAddr == limit || !isMarked(nextAddr),
 101          "get_next_one postcondition");
 102   return nextAddr;
 103 }
 104 
 105 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 106   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 107   return (int) (diff >> _shifter);
 108 }
 109 
 110 #ifndef PRODUCT
 111 bool CMBitMapRO::covers(ReservedSpace rs) const {
 112   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 113   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 114          "size inconsistency");
 115   return _bmStartWord == (HeapWord*)(rs.base()) &&
 116          _bmWordSize  == rs.size()>>LogHeapWordSize;
 117 }
 118 #endif
 119 
 120 void CMBitMap::clearAll() {
 121   _bm.clear();
 122   return;
 123 }
 124 
 125 void CMBitMap::markRange(MemRegion mr) {
 126   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 127   assert(!mr.is_empty(), "unexpected empty region");
 128   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 129           ((HeapWord *) mr.end())),
 130          "markRange memory region end is not card aligned");
 131   // convert address range into offset range
 132   _bm.at_put_range(heapWordToOffset(mr.start()),
 133                    heapWordToOffset(mr.end()), true);
 134 }
 135 
 136 void CMBitMap::clearRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   // convert address range into offset range
 140   _bm.at_put_range(heapWordToOffset(mr.start()),
 141                    heapWordToOffset(mr.end()), false);
 142 }
 143 
 144 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 145                                             HeapWord* end_addr) {
 146   HeapWord* start = getNextMarkedWordAddress(addr);
 147   start = MIN2(start, end_addr);
 148   HeapWord* end   = getNextUnmarkedWordAddress(start);
 149   end = MIN2(end, end_addr);
 150   assert(start <= end, "Consistency check");
 151   MemRegion mr(start, end);
 152   if (!mr.is_empty()) {
 153     clearRange(mr);
 154   }
 155   return mr;
 156 }
 157 
 158 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 159   _base(NULL), _cm(cm)
 160 #ifdef ASSERT
 161   , _drain_in_progress(false)
 162   , _drain_in_progress_yields(false)
 163 #endif
 164 {}
 165 
 166 void CMMarkStack::allocate(size_t size) {
 167   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 168   if (_base == NULL) {
 169     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 170   }
 171   _index = 0;
 172   _capacity = (jint) size;
 173   _saved_index = -1;
 174   NOT_PRODUCT(_max_depth = 0);
 175 }
 176 
 177 CMMarkStack::~CMMarkStack() {
 178   if (_base != NULL) {
 179     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 180   }
 181 }
 182 
 183 void CMMarkStack::par_push(oop ptr) {
 184   while (true) {
 185     if (isFull()) {
 186       _overflow = true;
 187       return;
 188     }
 189     // Otherwise...
 190     jint index = _index;
 191     jint next_index = index+1;
 192     jint res = Atomic::cmpxchg(next_index, &_index, index);
 193     if (res == index) {
 194       _base[index] = ptr;
 195       // Note that we don't maintain this atomically.  We could, but it
 196       // doesn't seem necessary.
 197       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 198       return;
 199     }
 200     // Otherwise, we need to try again.
 201   }
 202 }
 203 
 204 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 205   while (true) {
 206     if (isFull()) {
 207       _overflow = true;
 208       return;
 209     }
 210     // Otherwise...
 211     jint index = _index;
 212     jint next_index = index + n;
 213     if (next_index > _capacity) {
 214       _overflow = true;
 215       return;
 216     }
 217     jint res = Atomic::cmpxchg(next_index, &_index, index);
 218     if (res == index) {
 219       for (int i = 0; i < n; i++) {
 220         int ind = index + i;
 221         assert(ind < _capacity, "By overflow test above.");
 222         _base[ind] = ptr_arr[i];
 223       }
 224       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 225       return;
 226     }
 227     // Otherwise, we need to try again.
 228   }
 229 }
 230 
 231 
 232 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 233   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 234   jint start = _index;
 235   jint next_index = start + n;
 236   if (next_index > _capacity) {
 237     _overflow = true;
 238     return;
 239   }
 240   // Otherwise.
 241   _index = next_index;
 242   for (int i = 0; i < n; i++) {
 243     int ind = start + i;
 244     assert(ind < _capacity, "By overflow test above.");
 245     _base[ind] = ptr_arr[i];
 246   }
 247 }
 248 
 249 
 250 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint index = _index;
 253   if (index == 0) {
 254     *n = 0;
 255     return false;
 256   } else {
 257     int k = MIN2(max, index);
 258     jint new_ind = index - k;
 259     for (int j = 0; j < k; j++) {
 260       ptr_arr[j] = _base[new_ind + j];
 261     }
 262     _index = new_ind;
 263     *n = k;
 264     return true;
 265   }
 266 }
 267 
 268 template<class OopClosureClass>
 269 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 270   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 271          || SafepointSynchronize::is_at_safepoint(),
 272          "Drain recursion must be yield-safe.");
 273   bool res = true;
 274   debug_only(_drain_in_progress = true);
 275   debug_only(_drain_in_progress_yields = yield_after);
 276   while (!isEmpty()) {
 277     oop newOop = pop();
 278     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 279     assert(newOop->is_oop(), "Expected an oop");
 280     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 281            "only grey objects on this stack");
 282     newOop->oop_iterate(cl);
 283     if (yield_after && _cm->do_yield_check()) {
 284       res = false;
 285       break;
 286     }
 287   }
 288   debug_only(_drain_in_progress = false);
 289   return res;
 290 }
 291 
 292 void CMMarkStack::note_start_of_gc() {
 293   assert(_saved_index == -1,
 294          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 295   _saved_index = _index;
 296 }
 297 
 298 void CMMarkStack::note_end_of_gc() {
 299   // This is intentionally a guarantee, instead of an assert. If we
 300   // accidentally add something to the mark stack during GC, it
 301   // will be a correctness issue so it's better if we crash. we'll
 302   // only check this once per GC anyway, so it won't be a performance
 303   // issue in any way.
 304   guarantee(_saved_index == _index,
 305             err_msg("saved index: %d index: %d", _saved_index, _index));
 306   _saved_index = -1;
 307 }
 308 
 309 void CMMarkStack::oops_do(OopClosure* f) {
 310   assert(_saved_index == _index,
 311          err_msg("saved index: %d index: %d", _saved_index, _index));
 312   for (int i = 0; i < _index; i += 1) {
 313     f->do_oop(&_base[i]);
 314   }
 315 }
 316 
 317 bool ConcurrentMark::not_yet_marked(oop obj) const {
 318   return (_g1h->is_obj_ill(obj)
 319           || (_g1h->is_in_permanent(obj)
 320               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 321 }
 322 
 323 CMRootRegions::CMRootRegions() :
 324   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 325   _should_abort(false),  _next_survivor(NULL) { }
 326 
 327 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 328   _young_list = g1h->young_list();
 329   _cm = cm;
 330 }
 331 
 332 void CMRootRegions::prepare_for_scan() {
 333   assert(!scan_in_progress(), "pre-condition");
 334 
 335   // Currently, only survivors can be root regions.
 336   assert(_next_survivor == NULL, "pre-condition");
 337   _next_survivor = _young_list->first_survivor_region();
 338   _scan_in_progress = (_next_survivor != NULL);
 339   _should_abort = false;
 340 }
 341 
 342 HeapRegion* CMRootRegions::claim_next() {
 343   if (_should_abort) {
 344     // If someone has set the should_abort flag, we return NULL to
 345     // force the caller to bail out of their loop.
 346     return NULL;
 347   }
 348 
 349   // Currently, only survivors can be root regions.
 350   HeapRegion* res = _next_survivor;
 351   if (res != NULL) {
 352     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 353     // Read it again in case it changed while we were waiting for the lock.
 354     res = _next_survivor;
 355     if (res != NULL) {
 356       if (res == _young_list->last_survivor_region()) {
 357         // We just claimed the last survivor so store NULL to indicate
 358         // that we're done.
 359         _next_survivor = NULL;
 360       } else {
 361         _next_survivor = res->get_next_young_region();
 362       }
 363     } else {
 364       // Someone else claimed the last survivor while we were trying
 365       // to take the lock so nothing else to do.
 366     }
 367   }
 368   assert(res == NULL || res->is_survivor(), "post-condition");
 369 
 370   return res;
 371 }
 372 
 373 void CMRootRegions::scan_finished() {
 374   assert(scan_in_progress(), "pre-condition");
 375 
 376   // Currently, only survivors can be root regions.
 377   if (!_should_abort) {
 378     assert(_next_survivor == NULL, "we should have claimed all survivors");
 379   }
 380   _next_survivor = NULL;
 381 
 382   {
 383     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 384     _scan_in_progress = false;
 385     RootRegionScan_lock->notify_all();
 386   }
 387 }
 388 
 389 bool CMRootRegions::wait_until_scan_finished() {
 390   if (!scan_in_progress()) return false;
 391 
 392   {
 393     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 394     while (scan_in_progress()) {
 395       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 396     }
 397   }
 398   return true;
 399 }
 400 
 401 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 402 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 403 #endif // _MSC_VER
 404 
 405 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 406   return MAX2((n_par_threads + 2) / 4, 1U);
 407 }
 408 
 409 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 410   _markBitMap1(rs, MinObjAlignment - 1),
 411   _markBitMap2(rs, MinObjAlignment - 1),
 412 
 413   _parallel_marking_threads(0),
 414   _max_parallel_marking_threads(0),
 415   _sleep_factor(0.0),
 416   _marking_task_overhead(1.0),
 417   _cleanup_sleep_factor(0.0),
 418   _cleanup_task_overhead(1.0),
 419   _cleanup_list("Cleanup List"),
 420   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 421   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 422            CardTableModRefBS::card_shift,
 423            false /* in_resource_area*/),
 424 
 425   _prevMarkBitMap(&_markBitMap1),
 426   _nextMarkBitMap(&_markBitMap2),
 427 
 428   _markStack(this),
 429   // _finger set in set_non_marking_state
 430 
 431   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 432   // _active_tasks set in set_non_marking_state
 433   // _tasks set inside the constructor
 434   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 435   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 436 
 437   _has_overflown(false),
 438   _concurrent(false),
 439   _has_aborted(false),
 440   _restart_for_overflow(false),
 441   _concurrent_marking_in_progress(false),
 442 
 443   // _verbose_level set below
 444 
 445   _init_times(),
 446   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 447   _cleanup_times(),
 448   _total_counting_time(0.0),
 449   _total_rs_scrub_time(0.0),
 450 
 451   _parallel_workers(NULL),
 452 
 453   _count_card_bitmaps(NULL),
 454   _count_marked_bytes(NULL) {
 455   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 456   if (verbose_level < no_verbose) {
 457     verbose_level = no_verbose;
 458   }
 459   if (verbose_level > high_verbose) {
 460     verbose_level = high_verbose;
 461   }
 462   _verbose_level = verbose_level;
 463 
 464   if (verbose_low()) {
 465     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 466                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 467   }
 468 
 469   _markStack.allocate(MarkStackSize);
 470 
 471   // Create & start a ConcurrentMark thread.
 472   _cmThread = new ConcurrentMarkThread(this);
 473   assert(cmThread() != NULL, "CM Thread should have been created");
 474   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 475 
 476   _g1h = G1CollectedHeap::heap();
 477   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 478   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 479   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 480 
 481   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 482   satb_qs.set_buffer_size(G1SATBBufferSize);
 483 
 484   _root_regions.init(_g1h, this);
 485 
 486   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 487   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 488 
 489   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 490   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 491 
 492   BitMap::idx_t card_bm_size = _card_bm.size();
 493 
 494   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 495   _active_tasks = _max_task_num;
 496   for (int i = 0; i < (int) _max_task_num; ++i) {
 497     CMTaskQueue* task_queue = new CMTaskQueue();
 498     task_queue->initialize();
 499     _task_queues->register_queue(i, task_queue);
 500 
 501     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 502     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 503 
 504     _tasks[i] = new CMTask(i, this,
 505                            _count_marked_bytes[i],
 506                            &_count_card_bitmaps[i],
 507                            task_queue, _task_queues);
 508 
 509     _accum_task_vtime[i] = 0.0;
 510   }
 511 
 512   // Calculate the card number for the bottom of the heap. Used
 513   // in biasing indexes into the accounting card bitmaps.
 514   _heap_bottom_card_num =
 515     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 516                                 CardTableModRefBS::card_shift);
 517 
 518   // Clear all the liveness counting data
 519   clear_all_count_data();
 520 
 521   if (ConcGCThreads > ParallelGCThreads) {
 522     vm_exit_during_initialization("Can't have more ConcGCThreads "
 523                                   "than ParallelGCThreads.");
 524   }
 525   if (ParallelGCThreads == 0) {
 526     // if we are not running with any parallel GC threads we will not
 527     // spawn any marking threads either
 528     _parallel_marking_threads =       0;
 529     _max_parallel_marking_threads =   0;
 530     _sleep_factor             =     0.0;
 531     _marking_task_overhead    =     1.0;
 532   } else {
 533     if (ConcGCThreads > 0) {
 534       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 535       // if both are set
 536 
 537       _parallel_marking_threads = (uint) ConcGCThreads;
 538       _max_parallel_marking_threads = _parallel_marking_threads;
 539       _sleep_factor             = 0.0;
 540       _marking_task_overhead    = 1.0;
 541     } else if (G1MarkingOverheadPercent > 0) {
 542       // we will calculate the number of parallel marking threads
 543       // based on a target overhead with respect to the soft real-time
 544       // goal
 545 
 546       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 547       double overall_cm_overhead =
 548         (double) MaxGCPauseMillis * marking_overhead /
 549         (double) GCPauseIntervalMillis;
 550       double cpu_ratio = 1.0 / (double) os::processor_count();
 551       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 552       double marking_task_overhead =
 553         overall_cm_overhead / marking_thread_num *
 554                                                 (double) os::processor_count();
 555       double sleep_factor =
 556                          (1.0 - marking_task_overhead) / marking_task_overhead;
 557 
 558       _parallel_marking_threads = (uint) marking_thread_num;
 559       _max_parallel_marking_threads = _parallel_marking_threads;
 560       _sleep_factor             = sleep_factor;
 561       _marking_task_overhead    = marking_task_overhead;
 562     } else {
 563       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 564       _max_parallel_marking_threads = _parallel_marking_threads;
 565       _sleep_factor             = 0.0;
 566       _marking_task_overhead    = 1.0;
 567     }
 568 
 569     if (parallel_marking_threads() > 1) {
 570       _cleanup_task_overhead = 1.0;
 571     } else {
 572       _cleanup_task_overhead = marking_task_overhead();
 573     }
 574     _cleanup_sleep_factor =
 575                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 576 
 577 #if 0
 578     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 579     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 580     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 581     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 582     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 583 #endif
 584 
 585     guarantee(parallel_marking_threads() > 0, "peace of mind");
 586     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 587          _max_parallel_marking_threads, false, true);
 588     if (_parallel_workers == NULL) {
 589       vm_exit_during_initialization("Failed necessary allocation.");
 590     } else {
 591       _parallel_workers->initialize_workers();
 592     }
 593   }
 594 
 595   // so that the call below can read a sensible value
 596   _heap_start = (HeapWord*) rs.base();
 597   set_non_marking_state();
 598 }
 599 
 600 void ConcurrentMark::update_g1_committed(bool force) {
 601   // If concurrent marking is not in progress, then we do not need to
 602   // update _heap_end.
 603   if (!concurrent_marking_in_progress() && !force) return;
 604 
 605   MemRegion committed = _g1h->g1_committed();
 606   assert(committed.start() == _heap_start, "start shouldn't change");
 607   HeapWord* new_end = committed.end();
 608   if (new_end > _heap_end) {
 609     // The heap has been expanded.
 610 
 611     _heap_end = new_end;
 612   }
 613   // Notice that the heap can also shrink. However, this only happens
 614   // during a Full GC (at least currently) and the entire marking
 615   // phase will bail out and the task will not be restarted. So, let's
 616   // do nothing.
 617 }
 618 
 619 void ConcurrentMark::reset() {
 620   // Starting values for these two. This should be called in a STW
 621   // phase. CM will be notified of any future g1_committed expansions
 622   // will be at the end of evacuation pauses, when tasks are
 623   // inactive.
 624   MemRegion committed = _g1h->g1_committed();
 625   _heap_start = committed.start();
 626   _heap_end   = committed.end();
 627 
 628   // Separated the asserts so that we know which one fires.
 629   assert(_heap_start != NULL, "heap bounds should look ok");
 630   assert(_heap_end != NULL, "heap bounds should look ok");
 631   assert(_heap_start < _heap_end, "heap bounds should look ok");
 632 
 633   // reset all the marking data structures and any necessary flags
 634   clear_marking_state();
 635 
 636   if (verbose_low()) {
 637     gclog_or_tty->print_cr("[global] resetting");
 638   }
 639 
 640   // We do reset all of them, since different phases will use
 641   // different number of active threads. So, it's easiest to have all
 642   // of them ready.
 643   for (int i = 0; i < (int) _max_task_num; ++i) {
 644     _tasks[i]->reset(_nextMarkBitMap);
 645   }
 646 
 647   // we need this to make sure that the flag is on during the evac
 648   // pause with initial mark piggy-backed
 649   set_concurrent_marking_in_progress();
 650 }
 651 
 652 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 653   assert(active_tasks <= _max_task_num, "we should not have more");
 654 
 655   _active_tasks = active_tasks;
 656   // Need to update the three data structures below according to the
 657   // number of active threads for this phase.
 658   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 659   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 660   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 661 
 662   _concurrent = concurrent;
 663   // We propagate this to all tasks, not just the active ones.
 664   for (int i = 0; i < (int) _max_task_num; ++i)
 665     _tasks[i]->set_concurrent(concurrent);
 666 
 667   if (concurrent) {
 668     set_concurrent_marking_in_progress();
 669   } else {
 670     // We currently assume that the concurrent flag has been set to
 671     // false before we start remark. At this point we should also be
 672     // in a STW phase.
 673     assert(!concurrent_marking_in_progress(), "invariant");
 674     assert(_finger == _heap_end, "only way to get here");
 675     update_g1_committed(true);
 676   }
 677 }
 678 
 679 void ConcurrentMark::set_non_marking_state() {
 680   // We set the global marking state to some default values when we're
 681   // not doing marking.
 682   clear_marking_state();
 683   _active_tasks = 0;
 684   clear_concurrent_marking_in_progress();
 685 }
 686 
 687 ConcurrentMark::~ConcurrentMark() {
 688   // The ConcurrentMark instance is never freed.
 689   ShouldNotReachHere();
 690 }
 691 
 692 void ConcurrentMark::clearNextBitmap() {
 693   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 694   G1CollectorPolicy* g1p = g1h->g1_policy();
 695 
 696   // Make sure that the concurrent mark thread looks to still be in
 697   // the current cycle.
 698   guarantee(cmThread()->during_cycle(), "invariant");
 699 
 700   // We are finishing up the current cycle by clearing the next
 701   // marking bitmap and getting it ready for the next cycle. During
 702   // this time no other cycle can start. So, let's make sure that this
 703   // is the case.
 704   guarantee(!g1h->mark_in_progress(), "invariant");
 705 
 706   // clear the mark bitmap (no grey objects to start with).
 707   // We need to do this in chunks and offer to yield in between
 708   // each chunk.
 709   HeapWord* start  = _nextMarkBitMap->startWord();
 710   HeapWord* end    = _nextMarkBitMap->endWord();
 711   HeapWord* cur    = start;
 712   size_t chunkSize = M;
 713   while (cur < end) {
 714     HeapWord* next = cur + chunkSize;
 715     if (next > end) {
 716       next = end;
 717     }
 718     MemRegion mr(cur,next);
 719     _nextMarkBitMap->clearRange(mr);
 720     cur = next;
 721     do_yield_check();
 722 
 723     // Repeat the asserts from above. We'll do them as asserts here to
 724     // minimize their overhead on the product. However, we'll have
 725     // them as guarantees at the beginning / end of the bitmap
 726     // clearing to get some checking in the product.
 727     assert(cmThread()->during_cycle(), "invariant");
 728     assert(!g1h->mark_in_progress(), "invariant");
 729   }
 730 
 731   // Clear the liveness counting data
 732   clear_all_count_data();
 733 
 734   // Repeat the asserts from above.
 735   guarantee(cmThread()->during_cycle(), "invariant");
 736   guarantee(!g1h->mark_in_progress(), "invariant");
 737 }
 738 
 739 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 740 public:
 741   bool doHeapRegion(HeapRegion* r) {
 742     if (!r->continuesHumongous()) {
 743       r->note_start_of_marking();
 744     }
 745     return false;
 746   }
 747 };
 748 
 749 void ConcurrentMark::checkpointRootsInitialPre() {
 750   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 751   G1CollectorPolicy* g1p = g1h->g1_policy();
 752 
 753   _has_aborted = false;
 754 
 755 #ifndef PRODUCT
 756   if (G1PrintReachableAtInitialMark) {
 757     print_reachable("at-cycle-start",
 758                     VerifyOption_G1UsePrevMarking, true /* all */);
 759   }
 760 #endif
 761 
 762   // Initialise marking structures. This has to be done in a STW phase.
 763   reset();
 764 
 765   // For each region note start of marking.
 766   NoteStartOfMarkHRClosure startcl;
 767   g1h->heap_region_iterate(&startcl);
 768 }
 769 
 770 
 771 void ConcurrentMark::checkpointRootsInitialPost() {
 772   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 773 
 774   // If we force an overflow during remark, the remark operation will
 775   // actually abort and we'll restart concurrent marking. If we always
 776   // force an oveflow during remark we'll never actually complete the
 777   // marking phase. So, we initilize this here, at the start of the
 778   // cycle, so that at the remaining overflow number will decrease at
 779   // every remark and we'll eventually not need to cause one.
 780   force_overflow_stw()->init();
 781 
 782   // Start Concurrent Marking weak-reference discovery.
 783   ReferenceProcessor* rp = g1h->ref_processor_cm();
 784   // enable ("weak") refs discovery
 785   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 786   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 787 
 788   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 789   // This is the start of  the marking cycle, we're expected all
 790   // threads to have SATB queues with active set to false.
 791   satb_mq_set.set_active_all_threads(true, /* new active value */
 792                                      false /* expected_active */);
 793 
 794   _root_regions.prepare_for_scan();
 795 
 796   // update_g1_committed() will be called at the end of an evac pause
 797   // when marking is on. So, it's also called at the end of the
 798   // initial-mark pause to update the heap end, if the heap expands
 799   // during it. No need to call it here.
 800 }
 801 
 802 /*
 803  * Notice that in the next two methods, we actually leave the STS
 804  * during the barrier sync and join it immediately afterwards. If we
 805  * do not do this, the following deadlock can occur: one thread could
 806  * be in the barrier sync code, waiting for the other thread to also
 807  * sync up, whereas another one could be trying to yield, while also
 808  * waiting for the other threads to sync up too.
 809  *
 810  * Note, however, that this code is also used during remark and in
 811  * this case we should not attempt to leave / enter the STS, otherwise
 812  * we'll either hit an asseert (debug / fastdebug) or deadlock
 813  * (product). So we should only leave / enter the STS if we are
 814  * operating concurrently.
 815  *
 816  * Because the thread that does the sync barrier has left the STS, it
 817  * is possible to be suspended for a Full GC or an evacuation pause
 818  * could occur. This is actually safe, since the entering the sync
 819  * barrier is one of the last things do_marking_step() does, and it
 820  * doesn't manipulate any data structures afterwards.
 821  */
 822 
 823 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 824   if (verbose_low()) {
 825     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 826   }
 827 
 828   if (concurrent()) {
 829     ConcurrentGCThread::stsLeave();
 830   }
 831   _first_overflow_barrier_sync.enter();
 832   if (concurrent()) {
 833     ConcurrentGCThread::stsJoin();
 834   }
 835   // at this point everyone should have synced up and not be doing any
 836   // more work
 837 
 838   if (verbose_low()) {
 839     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 840   }
 841 
 842   // let task 0 do this
 843   if (task_num == 0) {
 844     // task 0 is responsible for clearing the global data structures
 845     // We should be here because of an overflow. During STW we should
 846     // not clear the overflow flag since we rely on it being true when
 847     // we exit this method to abort the pause and restart concurent
 848     // marking.
 849     clear_marking_state(concurrent() /* clear_overflow */);
 850     force_overflow()->update();
 851 
 852     if (G1Log::fine()) {
 853       gclog_or_tty->date_stamp(PrintGCDateStamps);
 854       gclog_or_tty->stamp(PrintGCTimeStamps);
 855       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 856     }
 857   }
 858 
 859   // after this, each task should reset its own data structures then
 860   // then go into the second barrier
 861 }
 862 
 863 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 864   if (verbose_low()) {
 865     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 866   }
 867 
 868   if (concurrent()) {
 869     ConcurrentGCThread::stsLeave();
 870   }
 871   _second_overflow_barrier_sync.enter();
 872   if (concurrent()) {
 873     ConcurrentGCThread::stsJoin();
 874   }
 875   // at this point everything should be re-initialised and ready to go
 876 
 877   if (verbose_low()) {
 878     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 879   }
 880 }
 881 
 882 #ifndef PRODUCT
 883 void ForceOverflowSettings::init() {
 884   _num_remaining = G1ConcMarkForceOverflow;
 885   _force = false;
 886   update();
 887 }
 888 
 889 void ForceOverflowSettings::update() {
 890   if (_num_remaining > 0) {
 891     _num_remaining -= 1;
 892     _force = true;
 893   } else {
 894     _force = false;
 895   }
 896 }
 897 
 898 bool ForceOverflowSettings::should_force() {
 899   if (_force) {
 900     _force = false;
 901     return true;
 902   } else {
 903     return false;
 904   }
 905 }
 906 #endif // !PRODUCT
 907 
 908 class CMConcurrentMarkingTask: public AbstractGangTask {
 909 private:
 910   ConcurrentMark*       _cm;
 911   ConcurrentMarkThread* _cmt;
 912 
 913 public:
 914   void work(uint worker_id) {
 915     assert(Thread::current()->is_ConcurrentGC_thread(),
 916            "this should only be done by a conc GC thread");
 917     ResourceMark rm;
 918 
 919     double start_vtime = os::elapsedVTime();
 920 
 921     ConcurrentGCThread::stsJoin();
 922 
 923     assert(worker_id < _cm->active_tasks(), "invariant");
 924     CMTask* the_task = _cm->task(worker_id);
 925     the_task->record_start_time();
 926     if (!_cm->has_aborted()) {
 927       do {
 928         double start_vtime_sec = os::elapsedVTime();
 929         double start_time_sec = os::elapsedTime();
 930         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 931 
 932         the_task->do_marking_step(mark_step_duration_ms,
 933                                   true /* do_stealing    */,
 934                                   true /* do_termination */);
 935 
 936         double end_time_sec = os::elapsedTime();
 937         double end_vtime_sec = os::elapsedVTime();
 938         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 939         double elapsed_time_sec = end_time_sec - start_time_sec;
 940         _cm->clear_has_overflown();
 941 
 942         bool ret = _cm->do_yield_check(worker_id);
 943 
 944         jlong sleep_time_ms;
 945         if (!_cm->has_aborted() && the_task->has_aborted()) {
 946           sleep_time_ms =
 947             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 948           ConcurrentGCThread::stsLeave();
 949           os::sleep(Thread::current(), sleep_time_ms, false);
 950           ConcurrentGCThread::stsJoin();
 951         }
 952         double end_time2_sec = os::elapsedTime();
 953         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 954 
 955 #if 0
 956           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 957                                  "overhead %1.4lf",
 958                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 959                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 960           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 961                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 962 #endif
 963       } while (!_cm->has_aborted() && the_task->has_aborted());
 964     }
 965     the_task->record_end_time();
 966     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 967 
 968     ConcurrentGCThread::stsLeave();
 969 
 970     double end_vtime = os::elapsedVTime();
 971     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 972   }
 973 
 974   CMConcurrentMarkingTask(ConcurrentMark* cm,
 975                           ConcurrentMarkThread* cmt) :
 976       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 977 
 978   ~CMConcurrentMarkingTask() { }
 979 };
 980 
 981 // Calculates the number of active workers for a concurrent
 982 // phase.
 983 uint ConcurrentMark::calc_parallel_marking_threads() {
 984   if (G1CollectedHeap::use_parallel_gc_threads()) {
 985     uint n_conc_workers = 0;
 986     if (!UseDynamicNumberOfGCThreads ||
 987         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 988          !ForceDynamicNumberOfGCThreads)) {
 989       n_conc_workers = max_parallel_marking_threads();
 990     } else {
 991       n_conc_workers =
 992         AdaptiveSizePolicy::calc_default_active_workers(
 993                                      max_parallel_marking_threads(),
 994                                      1, /* Minimum workers */
 995                                      parallel_marking_threads(),
 996                                      Threads::number_of_non_daemon_threads());
 997       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 998       // that scaling has already gone into "_max_parallel_marking_threads".
 999     }
1000     assert(n_conc_workers > 0, "Always need at least 1");
1001     return n_conc_workers;
1002   }
1003   // If we are not running with any parallel GC threads we will not
1004   // have spawned any marking threads either. Hence the number of
1005   // concurrent workers should be 0.
1006   return 0;
1007 }
1008 
1009 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1010   // Currently, only survivors can be root regions.
1011   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1012   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1013 
1014   const uintx interval = PrefetchScanIntervalInBytes;
1015   HeapWord* curr = hr->bottom();
1016   const HeapWord* end = hr->top();
1017   while (curr < end) {
1018     Prefetch::read(curr, interval);
1019     oop obj = oop(curr);
1020     int size = obj->oop_iterate(&cl);
1021     assert(size == obj->size(), "sanity");
1022     curr += size;
1023   }
1024 }
1025 
1026 class CMRootRegionScanTask : public AbstractGangTask {
1027 private:
1028   ConcurrentMark* _cm;
1029 
1030 public:
1031   CMRootRegionScanTask(ConcurrentMark* cm) :
1032     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1033 
1034   void work(uint worker_id) {
1035     assert(Thread::current()->is_ConcurrentGC_thread(),
1036            "this should only be done by a conc GC thread");
1037 
1038     CMRootRegions* root_regions = _cm->root_regions();
1039     HeapRegion* hr = root_regions->claim_next();
1040     while (hr != NULL) {
1041       _cm->scanRootRegion(hr, worker_id);
1042       hr = root_regions->claim_next();
1043     }
1044   }
1045 };
1046 
1047 void ConcurrentMark::scanRootRegions() {
1048   // scan_in_progress() will have been set to true only if there was
1049   // at least one root region to scan. So, if it's false, we
1050   // should not attempt to do any further work.
1051   if (root_regions()->scan_in_progress()) {
1052     _parallel_marking_threads = calc_parallel_marking_threads();
1053     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1054            "Maximum number of marking threads exceeded");
1055     uint active_workers = MAX2(1U, parallel_marking_threads());
1056 
1057     CMRootRegionScanTask task(this);
1058     if (parallel_marking_threads() > 0) {
1059       _parallel_workers->set_active_workers((int) active_workers);
1060       _parallel_workers->run_task(&task);
1061     } else {
1062       task.work(0);
1063     }
1064 
1065     // It's possible that has_aborted() is true here without actually
1066     // aborting the survivor scan earlier. This is OK as it's
1067     // mainly used for sanity checking.
1068     root_regions()->scan_finished();
1069   }
1070 }
1071 
1072 void ConcurrentMark::markFromRoots() {
1073   // we might be tempted to assert that:
1074   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1075   //        "inconsistent argument?");
1076   // However that wouldn't be right, because it's possible that
1077   // a safepoint is indeed in progress as a younger generation
1078   // stop-the-world GC happens even as we mark in this generation.
1079 
1080   _restart_for_overflow = false;
1081   force_overflow_conc()->init();
1082 
1083   // _g1h has _n_par_threads
1084   _parallel_marking_threads = calc_parallel_marking_threads();
1085   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1086     "Maximum number of marking threads exceeded");
1087 
1088   uint active_workers = MAX2(1U, parallel_marking_threads());
1089 
1090   // Parallel task terminator is set in "set_phase()"
1091   set_phase(active_workers, true /* concurrent */);
1092 
1093   CMConcurrentMarkingTask markingTask(this, cmThread());
1094   if (parallel_marking_threads() > 0) {
1095     _parallel_workers->set_active_workers((int)active_workers);
1096     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1097     // and the decisions on that MT processing is made elsewhere.
1098     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1099     _parallel_workers->run_task(&markingTask);
1100   } else {
1101     markingTask.work(0);
1102   }
1103   print_stats();
1104 }
1105 
1106 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1107   // world is stopped at this checkpoint
1108   assert(SafepointSynchronize::is_at_safepoint(),
1109          "world should be stopped");
1110 
1111   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1112 
1113   // If a full collection has happened, we shouldn't do this.
1114   if (has_aborted()) {
1115     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1116     return;
1117   }
1118 
1119   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1120 
1121   if (VerifyDuringGC) {
1122     HandleMark hm;  // handle scope
1123     gclog_or_tty->print(" VerifyDuringGC:(before)");
1124     Universe::heap()->prepare_for_verify();
1125     Universe::verify(/* silent */ false,
1126                      /* option */ VerifyOption_G1UsePrevMarking);
1127   }
1128 
1129   G1CollectorPolicy* g1p = g1h->g1_policy();
1130   g1p->record_concurrent_mark_remark_start();
1131 
1132   double start = os::elapsedTime();
1133 
1134   checkpointRootsFinalWork();
1135 
1136   double mark_work_end = os::elapsedTime();
1137 
1138   weakRefsWork(clear_all_soft_refs);
1139 
1140   if (has_overflown()) {
1141     // Oops.  We overflowed.  Restart concurrent marking.
1142     _restart_for_overflow = true;
1143     // Clear the flag. We do not need it any more.
1144     clear_has_overflown();
1145     if (G1TraceMarkStackOverflow) {
1146       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1147     }
1148   } else {
1149     // Aggregate the per-task counting data that we have accumulated
1150     // while marking.
1151     aggregate_count_data();
1152 
1153     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1154     // We're done with marking.
1155     // This is the end of  the marking cycle, we're expected all
1156     // threads to have SATB queues with active set to true.
1157     satb_mq_set.set_active_all_threads(false, /* new active value */
1158                                        true /* expected_active */);
1159 
1160     if (VerifyDuringGC) {
1161       HandleMark hm;  // handle scope
1162       gclog_or_tty->print(" VerifyDuringGC:(after)");
1163       Universe::heap()->prepare_for_verify();
1164       Universe::verify(/* silent */ false,
1165                        /* option */ VerifyOption_G1UseNextMarking);
1166     }
1167     assert(!restart_for_overflow(), "sanity");
1168   }
1169 
1170   // Reset the marking state if marking completed
1171   if (!restart_for_overflow()) {
1172     set_non_marking_state();
1173   }
1174 
1175 #if VERIFY_OBJS_PROCESSED
1176   _scan_obj_cl.objs_processed = 0;
1177   ThreadLocalObjQueue::objs_enqueued = 0;
1178 #endif
1179 
1180   // Statistics
1181   double now = os::elapsedTime();
1182   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1183   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1184   _remark_times.add((now - start) * 1000.0);
1185 
1186   g1p->record_concurrent_mark_remark_end();
1187 }
1188 
1189 // Base class of the closures that finalize and verify the
1190 // liveness counting data.
1191 class CMCountDataClosureBase: public HeapRegionClosure {
1192 protected:
1193   ConcurrentMark* _cm;
1194   BitMap* _region_bm;
1195   BitMap* _card_bm;
1196 
1197   void set_card_bitmap_range(BitMap::idx_t start_idx, BitMap::idx_t last_idx) {
1198     assert(start_idx <= last_idx, "sanity");
1199 
1200     // Set the inclusive bit range [start_idx, last_idx].
1201     // For small ranges (up to 8 cards) use a simple loop; otherwise
1202     // use par_at_put_range.
1203     if ((last_idx - start_idx) < 8) {
1204       for (BitMap::idx_t i = start_idx; i <= last_idx; i += 1) {
1205         _card_bm->par_set_bit(i);
1206       }
1207     } else {
1208       assert(last_idx < _card_bm->size(), "sanity");
1209       // Note BitMap::par_at_put_range() is exclusive.
1210       _card_bm->par_at_put_range(start_idx, last_idx+1, true);
1211     }
1212   }
1213 
1214   // It takes a region that's not empty (i.e., it has at least one
1215   // live object in it and sets its corresponding bit on the region
1216   // bitmap to 1. If the region is "starts humongous" it will also set
1217   // to 1 the bits on the region bitmap that correspond to its
1218   // associated "continues humongous" regions.
1219   void set_bit_for_region(HeapRegion* hr) {
1220     assert(!hr->continuesHumongous(), "should have filtered those out");
1221 
1222     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1223     if (!hr->startsHumongous()) {
1224       // Normal (non-humongous) case: just set the bit.
1225       _region_bm->par_at_put(index, true);
1226     } else {
1227       // Starts humongous case: calculate how many regions are part of
1228       // this humongous region and then set the bit range.
1229       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1230       _region_bm->par_at_put_range(index, end_index, true);
1231     }
1232   }
1233 
1234 public:
1235   CMCountDataClosureBase(ConcurrentMark *cm,
1236                          BitMap* region_bm, BitMap* card_bm):
1237     _cm(cm), _region_bm(region_bm), _card_bm(card_bm) { }
1238 };
1239 
1240 // Closure that calculates the # live objects per region. Used
1241 // for verification purposes during the cleanup pause.
1242 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1243   CMBitMapRO* _bm;
1244   size_t _region_marked_bytes;
1245 
1246 public:
1247   CalcLiveObjectsClosure(CMBitMapRO *bm, ConcurrentMark *cm,
1248                          BitMap* region_bm, BitMap* card_bm) :
1249     CMCountDataClosureBase(cm, region_bm, card_bm),
1250     _bm(bm), _region_marked_bytes(0) { }
1251 
1252   bool doHeapRegion(HeapRegion* hr) {
1253 
1254     if (hr->continuesHumongous()) {
1255       // We will ignore these here and process them when their
1256       // associated "starts humongous" region is processed (see
1257       // set_bit_for_heap_region()). Note that we cannot rely on their
1258       // associated "starts humongous" region to have their bit set to
1259       // 1 since, due to the region chunking in the parallel region
1260       // iteration, a "continues humongous" region might be visited
1261       // before its associated "starts humongous".
1262       return false;
1263     }
1264 
1265     HeapWord* nextTop = hr->next_top_at_mark_start();
1266     HeapWord* start   = hr->bottom();
1267 
1268     assert(start <= hr->end() && start <= nextTop && nextTop <= hr->end(),
1269            err_msg("Preconditions not met - "
1270                    "start: "PTR_FORMAT", nextTop: "PTR_FORMAT", end: "PTR_FORMAT,
1271                    start, nextTop, hr->end()));
1272 
1273     // Find the first marked object at or after "start".
1274     start = _bm->getNextMarkedWordAddress(start, nextTop);
1275 
1276     size_t marked_bytes = 0;
1277 
1278     while (start < nextTop) {
1279       oop obj = oop(start);
1280       int obj_sz = obj->size();
1281       HeapWord* obj_last = start + obj_sz - 1;
1282 
1283       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1284       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(obj_last);
1285 
1286       // Set the bits in the card BM for this object (inclusive).
1287       set_card_bitmap_range(start_idx, last_idx);
1288 
1289       // Add the size of this object to the number of marked bytes.
1290       marked_bytes += (size_t)obj_sz * HeapWordSize;
1291 
1292       // Find the next marked object after this one.
1293       start = _bm->getNextMarkedWordAddress(obj_last + 1, nextTop);
1294     }
1295 
1296     // Mark the allocated-since-marking portion...
1297     HeapWord* top = hr->top();
1298     if (nextTop < top) {
1299       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(nextTop);
1300       BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top - 1);
1301 
1302       set_card_bitmap_range(start_idx, last_idx);
1303 
1304       // This definitely means the region has live objects.
1305       set_bit_for_region(hr);
1306     }
1307 
1308     // Update the live region bitmap.
1309     if (marked_bytes > 0) {
1310       set_bit_for_region(hr);
1311     }
1312 
1313     // Set the marked bytes for the current region so that
1314     // it can be queried by a calling verificiation routine
1315     _region_marked_bytes = marked_bytes;
1316 
1317     return false;
1318   }
1319 
1320   size_t region_marked_bytes() const { return _region_marked_bytes; }
1321 };
1322 
1323 // Heap region closure used for verifying the counting data
1324 // that was accumulated concurrently and aggregated during
1325 // the remark pause. This closure is applied to the heap
1326 // regions during the STW cleanup pause.
1327 
1328 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1329   ConcurrentMark* _cm;
1330   CalcLiveObjectsClosure _calc_cl;
1331   BitMap* _region_bm;   // Region BM to be verified
1332   BitMap* _card_bm;     // Card BM to be verified
1333   bool _verbose;        // verbose output?
1334 
1335   BitMap* _exp_region_bm; // Expected Region BM values
1336   BitMap* _exp_card_bm;   // Expected card BM values
1337 
1338   int _failures;
1339 
1340 public:
1341   VerifyLiveObjectDataHRClosure(ConcurrentMark* cm,
1342                                 BitMap* region_bm,
1343                                 BitMap* card_bm,
1344                                 BitMap* exp_region_bm,
1345                                 BitMap* exp_card_bm,
1346                                 bool verbose) :
1347     _cm(cm),
1348     _calc_cl(_cm->nextMarkBitMap(), _cm, exp_region_bm, exp_card_bm),
1349     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1350     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1351     _failures(0) { }
1352 
1353   int failures() const { return _failures; }
1354 
1355   bool doHeapRegion(HeapRegion* hr) {
1356     if (hr->continuesHumongous()) {
1357       // We will ignore these here and process them when their
1358       // associated "starts humongous" region is processed (see
1359       // set_bit_for_heap_region()). Note that we cannot rely on their
1360       // associated "starts humongous" region to have their bit set to
1361       // 1 since, due to the region chunking in the parallel region
1362       // iteration, a "continues humongous" region might be visited
1363       // before its associated "starts humongous".
1364       return false;
1365     }
1366 
1367     int failures = 0;
1368 
1369     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1370     // this region and set the corresponding bits in the expected region
1371     // and card bitmaps.
1372     bool res = _calc_cl.doHeapRegion(hr);
1373     assert(res == false, "should be continuing");
1374 
1375     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1376                     Mutex::_no_safepoint_check_flag);
1377 
1378     // Verify the marked bytes for this region.
1379     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1380     size_t act_marked_bytes = hr->next_marked_bytes();
1381 
1382     // We're not OK if expected marked bytes > actual marked bytes. It means
1383     // we have missed accounting some objects during the actual marking.
1384     if (exp_marked_bytes > act_marked_bytes) {
1385       if (_verbose) {
1386         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1387                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1388                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1389       }
1390       failures += 1;
1391     }
1392 
1393     // Verify the bit, for this region, in the actual and expected
1394     // (which was just calculated) region bit maps.
1395     // We're not OK if the bit in the calculated expected region
1396     // bitmap is set and the bit in the actual region bitmap is not.
1397     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1398 
1399     bool expected = _exp_region_bm->at(index);
1400     bool actual = _region_bm->at(index);
1401     if (expected && !actual) {
1402       if (_verbose) {
1403         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1404                                "expected: %s, actual: %s",
1405                                hr->hrs_index(),
1406                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1407       }
1408       failures += 1;
1409     }
1410 
1411     // Verify that the card bit maps for the cards spanned by the current
1412     // region match. We have an error if we have a set bit in the expected
1413     // bit map and the corresponding bit in the actual bitmap is not set.
1414 
1415     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1416     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1417 
1418     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1419       expected = _exp_card_bm->at(i);
1420       actual = _card_bm->at(i);
1421 
1422       if (expected && !actual) {
1423         if (_verbose) {
1424           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1425                                  "expected: %s, actual: %s",
1426                                  hr->hrs_index(), i,
1427                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1428         }
1429         failures += 1;
1430       }
1431     }
1432 
1433     if (failures > 0 && _verbose)  {
1434       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1435                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1436                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1437                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1438     }
1439 
1440     _failures += failures;
1441 
1442     // We could stop iteration over the heap when we
1443     // find the first violating region by returning true.
1444     return false;
1445   }
1446 };
1447 
1448 
1449 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1450 protected:
1451   G1CollectedHeap* _g1h;
1452   ConcurrentMark* _cm;
1453   BitMap* _actual_region_bm;
1454   BitMap* _actual_card_bm;
1455 
1456   uint    _n_workers;
1457 
1458   BitMap* _expected_region_bm;
1459   BitMap* _expected_card_bm;
1460 
1461   int  _failures;
1462   bool _verbose;
1463 
1464 public:
1465   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1466                             BitMap* region_bm, BitMap* card_bm,
1467                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1468     : AbstractGangTask("G1 verify final counting"),
1469       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1470       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1471       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1472       _failures(0), _verbose(false),
1473       _n_workers(0) {
1474     assert(VerifyDuringGC, "don't call this otherwise");
1475 
1476     // Use the value already set as the number of active threads
1477     // in the call to run_task().
1478     if (G1CollectedHeap::use_parallel_gc_threads()) {
1479       assert( _g1h->workers()->active_workers() > 0,
1480         "Should have been previously set");
1481       _n_workers = _g1h->workers()->active_workers();
1482     } else {
1483       _n_workers = 1;
1484     }
1485 
1486     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1487     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1488 
1489     _verbose = _cm->verbose_medium();
1490   }
1491 
1492   void work(uint worker_id) {
1493     assert(worker_id < _n_workers, "invariant");
1494 
1495     VerifyLiveObjectDataHRClosure verify_cl(_cm,
1496                                             _actual_region_bm, _actual_card_bm,
1497                                             _expected_region_bm,
1498                                             _expected_card_bm,
1499                                             _verbose);
1500 
1501     if (G1CollectedHeap::use_parallel_gc_threads()) {
1502       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1503                                             worker_id,
1504                                             _n_workers,
1505                                             HeapRegion::VerifyCountClaimValue);
1506     } else {
1507       _g1h->heap_region_iterate(&verify_cl);
1508     }
1509 
1510     Atomic::add(verify_cl.failures(), &_failures);
1511   }
1512 
1513   int failures() const { return _failures; }
1514 };
1515 
1516 // Closure that finalizes the liveness counting data.
1517 // Used during the cleanup pause.
1518 // Sets the bits corresponding to the interval [NTAMS, top]
1519 // (which contains the implicitly live objects) in the
1520 // card liveness bitmap. Also sets the bit for each region,
1521 // containing live data, in the region liveness bitmap.
1522 
1523 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1524  public:
1525   FinalCountDataUpdateClosure(ConcurrentMark* cm,
1526                               BitMap* region_bm,
1527                               BitMap* card_bm) :
1528     CMCountDataClosureBase(cm, region_bm, card_bm) { }
1529 
1530   bool doHeapRegion(HeapRegion* hr) {
1531 
1532     if (hr->continuesHumongous()) {
1533       // We will ignore these here and process them when their
1534       // associated "starts humongous" region is processed (see
1535       // set_bit_for_heap_region()). Note that we cannot rely on their
1536       // associated "starts humongous" region to have their bit set to
1537       // 1 since, due to the region chunking in the parallel region
1538       // iteration, a "continues humongous" region might be visited
1539       // before its associated "starts humongous".
1540       return false;
1541     }
1542 
1543     HeapWord* ntams = hr->next_top_at_mark_start();
1544     HeapWord* top   = hr->top();
1545 
1546     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1547 
1548     // Mark the allocated-since-marking portion...
1549     if (ntams < top) {
1550       // This definitely means the region has live objects.
1551       set_bit_for_region(hr);
1552     }
1553 
1554     // Now set the bits for [ntams, top]
1555     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1556     BitMap::idx_t last_idx = _cm->card_bitmap_index_for(top);
1557     set_card_bitmap_range(start_idx, last_idx);
1558 
1559     // Set the bit for the region if it contains live data
1560     if (hr->next_marked_bytes() > 0) {
1561       set_bit_for_region(hr);
1562     }
1563 
1564     return false;
1565   }
1566 };
1567 
1568 class G1ParFinalCountTask: public AbstractGangTask {
1569 protected:
1570   G1CollectedHeap* _g1h;
1571   ConcurrentMark* _cm;
1572   BitMap* _actual_region_bm;
1573   BitMap* _actual_card_bm;
1574 
1575   uint    _n_workers;
1576 
1577 public:
1578   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1579     : AbstractGangTask("G1 final counting"),
1580       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1581       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1582       _n_workers(0) {
1583     // Use the value already set as the number of active threads
1584     // in the call to run_task().
1585     if (G1CollectedHeap::use_parallel_gc_threads()) {
1586       assert( _g1h->workers()->active_workers() > 0,
1587         "Should have been previously set");
1588       _n_workers = _g1h->workers()->active_workers();
1589     } else {
1590       _n_workers = 1;
1591     }
1592   }
1593 
1594   void work(uint worker_id) {
1595     assert(worker_id < _n_workers, "invariant");
1596 
1597     FinalCountDataUpdateClosure final_update_cl(_cm,
1598                                                 _actual_region_bm,
1599                                                 _actual_card_bm);
1600 
1601     if (G1CollectedHeap::use_parallel_gc_threads()) {
1602       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1603                                             worker_id,
1604                                             _n_workers,
1605                                             HeapRegion::FinalCountClaimValue);
1606     } else {
1607       _g1h->heap_region_iterate(&final_update_cl);
1608     }
1609   }
1610 };
1611 
1612 class G1ParNoteEndTask;
1613 
1614 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1615   G1CollectedHeap* _g1;
1616   int _worker_num;
1617   size_t _max_live_bytes;
1618   uint _regions_claimed;
1619   size_t _freed_bytes;
1620   FreeRegionList* _local_cleanup_list;
1621   OldRegionSet* _old_proxy_set;
1622   HumongousRegionSet* _humongous_proxy_set;
1623   HRRSCleanupTask* _hrrs_cleanup_task;
1624   double _claimed_region_time;
1625   double _max_region_time;
1626 
1627 public:
1628   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1629                              int worker_num,
1630                              FreeRegionList* local_cleanup_list,
1631                              OldRegionSet* old_proxy_set,
1632                              HumongousRegionSet* humongous_proxy_set,
1633                              HRRSCleanupTask* hrrs_cleanup_task) :
1634     _g1(g1), _worker_num(worker_num),
1635     _max_live_bytes(0), _regions_claimed(0),
1636     _freed_bytes(0),
1637     _claimed_region_time(0.0), _max_region_time(0.0),
1638     _local_cleanup_list(local_cleanup_list),
1639     _old_proxy_set(old_proxy_set),
1640     _humongous_proxy_set(humongous_proxy_set),
1641     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1642 
1643   size_t freed_bytes() { return _freed_bytes; }
1644 
1645   bool doHeapRegion(HeapRegion *hr) {
1646     if (hr->continuesHumongous()) {
1647       return false;
1648     }
1649     // We use a claim value of zero here because all regions
1650     // were claimed with value 1 in the FinalCount task.
1651     _g1->reset_gc_time_stamps(hr);
1652     double start = os::elapsedTime();
1653     _regions_claimed++;
1654     hr->note_end_of_marking();
1655     _max_live_bytes += hr->max_live_bytes();
1656     _g1->free_region_if_empty(hr,
1657                               &_freed_bytes,
1658                               _local_cleanup_list,
1659                               _old_proxy_set,
1660                               _humongous_proxy_set,
1661                               _hrrs_cleanup_task,
1662                               true /* par */);
1663     double region_time = (os::elapsedTime() - start);
1664     _claimed_region_time += region_time;
1665     if (region_time > _max_region_time) {
1666       _max_region_time = region_time;
1667     }
1668     return false;
1669   }
1670 
1671   size_t max_live_bytes() { return _max_live_bytes; }
1672   uint regions_claimed() { return _regions_claimed; }
1673   double claimed_region_time_sec() { return _claimed_region_time; }
1674   double max_region_time_sec() { return _max_region_time; }
1675 };
1676 
1677 class G1ParNoteEndTask: public AbstractGangTask {
1678   friend class G1NoteEndOfConcMarkClosure;
1679 
1680 protected:
1681   G1CollectedHeap* _g1h;
1682   size_t _max_live_bytes;
1683   size_t _freed_bytes;
1684   FreeRegionList* _cleanup_list;
1685 
1686 public:
1687   G1ParNoteEndTask(G1CollectedHeap* g1h,
1688                    FreeRegionList* cleanup_list) :
1689     AbstractGangTask("G1 note end"), _g1h(g1h),
1690     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1691 
1692   void work(uint worker_id) {
1693     double start = os::elapsedTime();
1694     FreeRegionList local_cleanup_list("Local Cleanup List");
1695     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1696     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1697     HRRSCleanupTask hrrs_cleanup_task;
1698     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1699                                            &old_proxy_set,
1700                                            &humongous_proxy_set,
1701                                            &hrrs_cleanup_task);
1702     if (G1CollectedHeap::use_parallel_gc_threads()) {
1703       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1704                                             _g1h->workers()->active_workers(),
1705                                             HeapRegion::NoteEndClaimValue);
1706     } else {
1707       _g1h->heap_region_iterate(&g1_note_end);
1708     }
1709     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1710 
1711     // Now update the lists
1712     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1713                                             NULL /* free_list */,
1714                                             &old_proxy_set,
1715                                             &humongous_proxy_set,
1716                                             true /* par */);
1717     {
1718       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1719       _max_live_bytes += g1_note_end.max_live_bytes();
1720       _freed_bytes += g1_note_end.freed_bytes();
1721 
1722       // If we iterate over the global cleanup list at the end of
1723       // cleanup to do this printing we will not guarantee to only
1724       // generate output for the newly-reclaimed regions (the list
1725       // might not be empty at the beginning of cleanup; we might
1726       // still be working on its previous contents). So we do the
1727       // printing here, before we append the new regions to the global
1728       // cleanup list.
1729 
1730       G1HRPrinter* hr_printer = _g1h->hr_printer();
1731       if (hr_printer->is_active()) {
1732         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1733         while (iter.more_available()) {
1734           HeapRegion* hr = iter.get_next();
1735           hr_printer->cleanup(hr);
1736         }
1737       }
1738 
1739       _cleanup_list->add_as_tail(&local_cleanup_list);
1740       assert(local_cleanup_list.is_empty(), "post-condition");
1741 
1742       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1743     }
1744   }
1745   size_t max_live_bytes() { return _max_live_bytes; }
1746   size_t freed_bytes() { return _freed_bytes; }
1747 };
1748 
1749 class G1ParScrubRemSetTask: public AbstractGangTask {
1750 protected:
1751   G1RemSet* _g1rs;
1752   BitMap* _region_bm;
1753   BitMap* _card_bm;
1754 public:
1755   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1756                        BitMap* region_bm, BitMap* card_bm) :
1757     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1758     _region_bm(region_bm), _card_bm(card_bm) { }
1759 
1760   void work(uint worker_id) {
1761     if (G1CollectedHeap::use_parallel_gc_threads()) {
1762       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1763                        HeapRegion::ScrubRemSetClaimValue);
1764     } else {
1765       _g1rs->scrub(_region_bm, _card_bm);
1766     }
1767   }
1768 
1769 };
1770 
1771 void ConcurrentMark::cleanup() {
1772   // world is stopped at this checkpoint
1773   assert(SafepointSynchronize::is_at_safepoint(),
1774          "world should be stopped");
1775   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1776 
1777   // If a full collection has happened, we shouldn't do this.
1778   if (has_aborted()) {
1779     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1780     return;
1781   }
1782 
1783   HRSPhaseSetter x(HRSPhaseCleanup);
1784   g1h->verify_region_sets_optional();
1785 
1786   if (VerifyDuringGC) {
1787     HandleMark hm;  // handle scope
1788     gclog_or_tty->print(" VerifyDuringGC:(before)");
1789     Universe::heap()->prepare_for_verify();
1790     Universe::verify(/* silent */ false,
1791                      /* option */ VerifyOption_G1UsePrevMarking);
1792   }
1793 
1794   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1795   g1p->record_concurrent_mark_cleanup_start();
1796 
1797   double start = os::elapsedTime();
1798 
1799   HeapRegionRemSet::reset_for_cleanup_tasks();
1800 
1801   uint n_workers;
1802 
1803   // Do counting once more with the world stopped for good measure.
1804   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1805 
1806   if (G1CollectedHeap::use_parallel_gc_threads()) {
1807    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1808            "sanity check");
1809 
1810     g1h->set_par_threads();
1811     n_workers = g1h->n_par_threads();
1812     assert(g1h->n_par_threads() == n_workers,
1813            "Should not have been reset");
1814     g1h->workers()->run_task(&g1_par_count_task);
1815     // Done with the parallel phase so reset to 0.
1816     g1h->set_par_threads(0);
1817 
1818     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1819            "sanity check");
1820   } else {
1821     n_workers = 1;
1822     g1_par_count_task.work(0);
1823   }
1824 
1825   if (VerifyDuringGC) {
1826     // Verify that the counting data accumulated during marking matches
1827     // that calculated by walking the marking bitmap.
1828 
1829     // Bitmaps to hold expected values
1830     BitMap expected_region_bm(_region_bm.size(), false);
1831     BitMap expected_card_bm(_card_bm.size(), false);
1832 
1833     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1834                                                  &_region_bm,
1835                                                  &_card_bm,
1836                                                  &expected_region_bm,
1837                                                  &expected_card_bm);
1838 
1839     if (G1CollectedHeap::use_parallel_gc_threads()) {
1840       g1h->set_par_threads((int)n_workers);
1841       g1h->workers()->run_task(&g1_par_verify_task);
1842       // Done with the parallel phase so reset to 0.
1843       g1h->set_par_threads(0);
1844 
1845       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1846              "sanity check");
1847     } else {
1848       g1_par_verify_task.work(0);
1849     }
1850 
1851     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1852   }
1853 
1854   size_t start_used_bytes = g1h->used();
1855   g1h->set_marking_complete();
1856 
1857   double count_end = os::elapsedTime();
1858   double this_final_counting_time = (count_end - start);
1859   _total_counting_time += this_final_counting_time;
1860 
1861   if (G1PrintRegionLivenessInfo) {
1862     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1863     _g1h->heap_region_iterate(&cl);
1864   }
1865 
1866   // Install newly created mark bitMap as "prev".
1867   swapMarkBitMaps();
1868 
1869   g1h->reset_gc_time_stamp();
1870 
1871   // Note end of marking in all heap regions.
1872   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1873   if (G1CollectedHeap::use_parallel_gc_threads()) {
1874     g1h->set_par_threads((int)n_workers);
1875     g1h->workers()->run_task(&g1_par_note_end_task);
1876     g1h->set_par_threads(0);
1877 
1878     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1879            "sanity check");
1880   } else {
1881     g1_par_note_end_task.work(0);
1882   }
1883   g1h->check_gc_time_stamps();
1884 
1885   if (!cleanup_list_is_empty()) {
1886     // The cleanup list is not empty, so we'll have to process it
1887     // concurrently. Notify anyone else that might be wanting free
1888     // regions that there will be more free regions coming soon.
1889     g1h->set_free_regions_coming();
1890   }
1891 
1892   // call below, since it affects the metric by which we sort the heap
1893   // regions.
1894   if (G1ScrubRemSets) {
1895     double rs_scrub_start = os::elapsedTime();
1896     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1897     if (G1CollectedHeap::use_parallel_gc_threads()) {
1898       g1h->set_par_threads((int)n_workers);
1899       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1900       g1h->set_par_threads(0);
1901 
1902       assert(g1h->check_heap_region_claim_values(
1903                                             HeapRegion::ScrubRemSetClaimValue),
1904              "sanity check");
1905     } else {
1906       g1_par_scrub_rs_task.work(0);
1907     }
1908 
1909     double rs_scrub_end = os::elapsedTime();
1910     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1911     _total_rs_scrub_time += this_rs_scrub_time;
1912   }
1913 
1914   // this will also free any regions totally full of garbage objects,
1915   // and sort the regions.
1916   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1917 
1918   // Statistics.
1919   double end = os::elapsedTime();
1920   _cleanup_times.add((end - start) * 1000.0);
1921 
1922   if (G1Log::fine()) {
1923     g1h->print_size_transition(gclog_or_tty,
1924                                start_used_bytes,
1925                                g1h->used(),
1926                                g1h->capacity());
1927   }
1928 
1929   // Clean up will have freed any regions completely full of garbage.
1930   // Update the soft reference policy with the new heap occupancy.
1931   Universe::update_heap_info_at_gc();
1932 
1933   // We need to make this be a "collection" so any collection pause that
1934   // races with it goes around and waits for completeCleanup to finish.
1935   g1h->increment_total_collections();
1936 
1937   // We reclaimed old regions so we should calculate the sizes to make
1938   // sure we update the old gen/space data.
1939   g1h->g1mm()->update_sizes();
1940 
1941   if (VerifyDuringGC) {
1942     HandleMark hm;  // handle scope
1943     gclog_or_tty->print(" VerifyDuringGC:(after)");
1944     Universe::heap()->prepare_for_verify();
1945     Universe::verify(/* silent */ false,
1946                      /* option */ VerifyOption_G1UsePrevMarking);
1947   }
1948 
1949   g1h->verify_region_sets_optional();
1950 }
1951 
1952 void ConcurrentMark::completeCleanup() {
1953   if (has_aborted()) return;
1954 
1955   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1956 
1957   _cleanup_list.verify_optional();
1958   FreeRegionList tmp_free_list("Tmp Free List");
1959 
1960   if (G1ConcRegionFreeingVerbose) {
1961     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1962                            "cleanup list has %u entries",
1963                            _cleanup_list.length());
1964   }
1965 
1966   // Noone else should be accessing the _cleanup_list at this point,
1967   // so it's not necessary to take any locks
1968   while (!_cleanup_list.is_empty()) {
1969     HeapRegion* hr = _cleanup_list.remove_head();
1970     assert(hr != NULL, "the list was not empty");
1971     hr->par_clear();
1972     tmp_free_list.add_as_tail(hr);
1973 
1974     // Instead of adding one region at a time to the secondary_free_list,
1975     // we accumulate them in the local list and move them a few at a
1976     // time. This also cuts down on the number of notify_all() calls
1977     // we do during this process. We'll also append the local list when
1978     // _cleanup_list is empty (which means we just removed the last
1979     // region from the _cleanup_list).
1980     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1981         _cleanup_list.is_empty()) {
1982       if (G1ConcRegionFreeingVerbose) {
1983         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1984                                "appending %u entries to the secondary_free_list, "
1985                                "cleanup list still has %u entries",
1986                                tmp_free_list.length(),
1987                                _cleanup_list.length());
1988       }
1989 
1990       {
1991         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1992         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1993         SecondaryFreeList_lock->notify_all();
1994       }
1995 
1996       if (G1StressConcRegionFreeing) {
1997         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1998           os::sleep(Thread::current(), (jlong) 1, false);
1999         }
2000       }
2001     }
2002   }
2003   assert(tmp_free_list.is_empty(), "post-condition");
2004 }
2005 
2006 // Support closures for reference procssing in G1
2007 
2008 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2009   HeapWord* addr = (HeapWord*)obj;
2010   return addr != NULL &&
2011          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2012 }
2013 
2014 class G1CMKeepAliveClosure: public OopClosure {
2015   G1CollectedHeap* _g1;
2016   ConcurrentMark*  _cm;
2017  public:
2018   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2019     _g1(g1), _cm(cm) {
2020     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2021   }
2022 
2023   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2024   virtual void do_oop(      oop* p) { do_oop_work(p); }
2025 
2026   template <class T> void do_oop_work(T* p) {
2027     oop obj = oopDesc::load_decode_heap_oop(p);
2028     HeapWord* addr = (HeapWord*)obj;
2029 
2030     if (_cm->verbose_high()) {
2031       gclog_or_tty->print_cr("\t[0] we're looking at location "
2032                              "*"PTR_FORMAT" = "PTR_FORMAT,
2033                              p, (void*) obj);
2034     }
2035 
2036     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2037       _cm->mark_and_count(obj);
2038       _cm->mark_stack_push(obj);
2039     }
2040   }
2041 };
2042 
2043 class G1CMDrainMarkingStackClosure: public VoidClosure {
2044   ConcurrentMark*               _cm;
2045   CMMarkStack*                  _markStack;
2046   G1CMKeepAliveClosure*         _oopClosure;
2047  public:
2048   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2049                                G1CMKeepAliveClosure* oopClosure) :
2050     _cm(cm),
2051     _markStack(markStack),
2052     _oopClosure(oopClosure) { }
2053 
2054   void do_void() {
2055     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2056   }
2057 };
2058 
2059 // 'Keep Alive' closure used by parallel reference processing.
2060 // An instance of this closure is used in the parallel reference processing
2061 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2062 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2063 // placed on to discovered ref lists once so we can mark and push with no
2064 // need to check whether the object has already been marked. Using the
2065 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2066 // operating on the global mark stack. This means that an individual
2067 // worker would be doing lock-free pushes while it processes its own
2068 // discovered ref list followed by drain call. If the discovered ref lists
2069 // are unbalanced then this could cause interference with the other
2070 // workers. Using a CMTask (and its embedded local data structures)
2071 // avoids that potential interference.
2072 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2073   ConcurrentMark*  _cm;
2074   CMTask*          _task;
2075   int              _ref_counter_limit;
2076   int              _ref_counter;
2077  public:
2078   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2079     _cm(cm), _task(task),
2080     _ref_counter_limit(G1RefProcDrainInterval) {
2081     assert(_ref_counter_limit > 0, "sanity");
2082     _ref_counter = _ref_counter_limit;
2083   }
2084 
2085   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2086   virtual void do_oop(      oop* p) { do_oop_work(p); }
2087 
2088   template <class T> void do_oop_work(T* p) {
2089     if (!_cm->has_overflown()) {
2090       oop obj = oopDesc::load_decode_heap_oop(p);
2091       if (_cm->verbose_high()) {
2092         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2093                                "*"PTR_FORMAT" = "PTR_FORMAT,
2094                                _task->task_id(), p, (void*) obj);
2095       }
2096 
2097       _task->deal_with_reference(obj);
2098       _ref_counter--;
2099 
2100       if (_ref_counter == 0) {
2101         // We have dealt with _ref_counter_limit references, pushing them and objects
2102         // reachable from them on to the local stack (and possibly the global stack).
2103         // Call do_marking_step() to process these entries. We call the routine in a
2104         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2105         // with the entries that we've pushed as a result of the deal_with_reference
2106         // calls above) or we overflow.
2107         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2108         // while there may still be some work to do. (See the comment at the
2109         // beginning of CMTask::do_marking_step() for those conditions - one of which
2110         // is reaching the specified time target.) It is only when
2111         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2112         // that the marking has completed.
2113         do {
2114           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2115           _task->do_marking_step(mark_step_duration_ms,
2116                                  false /* do_stealing    */,
2117                                  false /* do_termination */);
2118         } while (_task->has_aborted() && !_cm->has_overflown());
2119         _ref_counter = _ref_counter_limit;
2120       }
2121     } else {
2122       if (_cm->verbose_high()) {
2123          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2124       }
2125     }
2126   }
2127 };
2128 
2129 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2130   ConcurrentMark* _cm;
2131   CMTask* _task;
2132  public:
2133   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2134     _cm(cm), _task(task) { }
2135 
2136   void do_void() {
2137     do {
2138       if (_cm->verbose_high()) {
2139         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2140                                _task->task_id());
2141       }
2142 
2143       // We call CMTask::do_marking_step() to completely drain the local and
2144       // global marking stacks. The routine is called in a loop, which we'll
2145       // exit if there's nothing more to do (i.e. we'completely drained the
2146       // entries that were pushed as a result of applying the
2147       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2148       // lists above) or we overflow the global marking stack.
2149       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2150       // while there may still be some work to do. (See the comment at the
2151       // beginning of CMTask::do_marking_step() for those conditions - one of which
2152       // is reaching the specified time target.) It is only when
2153       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2154       // that the marking has completed.
2155 
2156       _task->do_marking_step(1000000000.0 /* something very large */,
2157                              true /* do_stealing    */,
2158                              true /* do_termination */);
2159     } while (_task->has_aborted() && !_cm->has_overflown());
2160   }
2161 };
2162 
2163 // Implementation of AbstractRefProcTaskExecutor for parallel
2164 // reference processing at the end of G1 concurrent marking
2165 
2166 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2167 private:
2168   G1CollectedHeap* _g1h;
2169   ConcurrentMark*  _cm;
2170   WorkGang*        _workers;
2171   int              _active_workers;
2172 
2173 public:
2174   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2175                         ConcurrentMark* cm,
2176                         WorkGang* workers,
2177                         int n_workers) :
2178     _g1h(g1h), _cm(cm),
2179     _workers(workers), _active_workers(n_workers) { }
2180 
2181   // Executes the given task using concurrent marking worker threads.
2182   virtual void execute(ProcessTask& task);
2183   virtual void execute(EnqueueTask& task);
2184 };
2185 
2186 class G1CMRefProcTaskProxy: public AbstractGangTask {
2187   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2188   ProcessTask&     _proc_task;
2189   G1CollectedHeap* _g1h;
2190   ConcurrentMark*  _cm;
2191 
2192 public:
2193   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2194                      G1CollectedHeap* g1h,
2195                      ConcurrentMark* cm) :
2196     AbstractGangTask("Process reference objects in parallel"),
2197     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2198 
2199   virtual void work(uint worker_id) {
2200     CMTask* marking_task = _cm->task(worker_id);
2201     G1CMIsAliveClosure g1_is_alive(_g1h);
2202     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2203     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2204 
2205     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2206   }
2207 };
2208 
2209 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2210   assert(_workers != NULL, "Need parallel worker threads.");
2211 
2212   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2213 
2214   // We need to reset the phase for each task execution so that
2215   // the termination protocol of CMTask::do_marking_step works.
2216   _cm->set_phase(_active_workers, false /* concurrent */);
2217   _g1h->set_par_threads(_active_workers);
2218   _workers->run_task(&proc_task_proxy);
2219   _g1h->set_par_threads(0);
2220 }
2221 
2222 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2223   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2224   EnqueueTask& _enq_task;
2225 
2226 public:
2227   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2228     AbstractGangTask("Enqueue reference objects in parallel"),
2229     _enq_task(enq_task) { }
2230 
2231   virtual void work(uint worker_id) {
2232     _enq_task.work(worker_id);
2233   }
2234 };
2235 
2236 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2237   assert(_workers != NULL, "Need parallel worker threads.");
2238 
2239   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2240 
2241   _g1h->set_par_threads(_active_workers);
2242   _workers->run_task(&enq_task_proxy);
2243   _g1h->set_par_threads(0);
2244 }
2245 
2246 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2247   ResourceMark rm;
2248   HandleMark   hm;
2249 
2250   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2251 
2252   // Is alive closure.
2253   G1CMIsAliveClosure g1_is_alive(g1h);
2254 
2255   // Inner scope to exclude the cleaning of the string and symbol
2256   // tables from the displayed time.
2257   {
2258     if (G1Log::finer()) {
2259       gclog_or_tty->put(' ');
2260     }
2261     TraceTime t("GC ref-proc", G1Log::finer(), false, gclog_or_tty);
2262 
2263     ReferenceProcessor* rp = g1h->ref_processor_cm();
2264 
2265     // See the comment in G1CollectedHeap::ref_processing_init()
2266     // about how reference processing currently works in G1.
2267 
2268     // Process weak references.
2269     rp->setup_policy(clear_all_soft_refs);
2270     assert(_markStack.isEmpty(), "mark stack should be empty");
2271 
2272     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2273     G1CMDrainMarkingStackClosure
2274       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2275 
2276     // We use the work gang from the G1CollectedHeap and we utilize all
2277     // the worker threads.
2278     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2279     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2280 
2281     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2282                                               g1h->workers(), active_workers);
2283 
2284     if (rp->processing_is_mt()) {
2285       // Set the degree of MT here.  If the discovery is done MT, there
2286       // may have been a different number of threads doing the discovery
2287       // and a different number of discovered lists may have Ref objects.
2288       // That is OK as long as the Reference lists are balanced (see
2289       // balance_all_queues() and balance_queues()).
2290       rp->set_active_mt_degree(active_workers);
2291 
2292       rp->process_discovered_references(&g1_is_alive,
2293                                       &g1_keep_alive,
2294                                       &g1_drain_mark_stack,
2295                                       &par_task_executor);
2296 
2297       // The work routines of the parallel keep_alive and drain_marking_stack
2298       // will set the has_overflown flag if we overflow the global marking
2299       // stack.
2300     } else {
2301       rp->process_discovered_references(&g1_is_alive,
2302                                         &g1_keep_alive,
2303                                         &g1_drain_mark_stack,
2304                                         NULL);
2305     }
2306 
2307     assert(_markStack.overflow() || _markStack.isEmpty(),
2308             "mark stack should be empty (unless it overflowed)");
2309     if (_markStack.overflow()) {
2310       // Should have been done already when we tried to push an
2311       // entry on to the global mark stack. But let's do it again.
2312       set_has_overflown();
2313     }
2314 
2315     if (rp->processing_is_mt()) {
2316       assert(rp->num_q() == active_workers, "why not");
2317       rp->enqueue_discovered_references(&par_task_executor);
2318     } else {
2319       rp->enqueue_discovered_references();
2320     }
2321 
2322     rp->verify_no_references_recorded();
2323     assert(!rp->discovery_enabled(), "Post condition");
2324   }
2325 
2326   // Now clean up stale oops in StringTable
2327   StringTable::unlink(&g1_is_alive);
2328   // Clean up unreferenced symbols in symbol table.
2329   SymbolTable::unlink();
2330 }
2331 
2332 void ConcurrentMark::swapMarkBitMaps() {
2333   CMBitMapRO* temp = _prevMarkBitMap;
2334   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2335   _nextMarkBitMap  = (CMBitMap*)  temp;
2336 }
2337 
2338 class CMRemarkTask: public AbstractGangTask {
2339 private:
2340   ConcurrentMark *_cm;
2341 
2342 public:
2343   void work(uint worker_id) {
2344     // Since all available tasks are actually started, we should
2345     // only proceed if we're supposed to be actived.
2346     if (worker_id < _cm->active_tasks()) {
2347       CMTask* task = _cm->task(worker_id);
2348       task->record_start_time();
2349       do {
2350         task->do_marking_step(1000000000.0 /* something very large */,
2351                               true /* do_stealing    */,
2352                               true /* do_termination */);
2353       } while (task->has_aborted() && !_cm->has_overflown());
2354       // If we overflow, then we do not want to restart. We instead
2355       // want to abort remark and do concurrent marking again.
2356       task->record_end_time();
2357     }
2358   }
2359 
2360   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2361     AbstractGangTask("Par Remark"), _cm(cm) {
2362     _cm->terminator()->reset_for_reuse(active_workers);
2363   }
2364 };
2365 
2366 void ConcurrentMark::checkpointRootsFinalWork() {
2367   ResourceMark rm;
2368   HandleMark   hm;
2369   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2370 
2371   g1h->ensure_parsability(false);
2372 
2373   if (G1CollectedHeap::use_parallel_gc_threads()) {
2374     G1CollectedHeap::StrongRootsScope srs(g1h);
2375     // this is remark, so we'll use up all active threads
2376     uint active_workers = g1h->workers()->active_workers();
2377     if (active_workers == 0) {
2378       assert(active_workers > 0, "Should have been set earlier");
2379       active_workers = (uint) ParallelGCThreads;
2380       g1h->workers()->set_active_workers(active_workers);
2381     }
2382     set_phase(active_workers, false /* concurrent */);
2383     // Leave _parallel_marking_threads at it's
2384     // value originally calculated in the ConcurrentMark
2385     // constructor and pass values of the active workers
2386     // through the gang in the task.
2387 
2388     CMRemarkTask remarkTask(this, active_workers);
2389     g1h->set_par_threads(active_workers);
2390     g1h->workers()->run_task(&remarkTask);
2391     g1h->set_par_threads(0);
2392   } else {
2393     G1CollectedHeap::StrongRootsScope srs(g1h);
2394     // this is remark, so we'll use up all available threads
2395     uint active_workers = 1;
2396     set_phase(active_workers, false /* concurrent */);
2397 
2398     CMRemarkTask remarkTask(this, active_workers);
2399     // We will start all available threads, even if we decide that the
2400     // active_workers will be fewer. The extra ones will just bail out
2401     // immediately.
2402     remarkTask.work(0);
2403   }
2404   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2405   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2406 
2407   print_stats();
2408 
2409 #if VERIFY_OBJS_PROCESSED
2410   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2411     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2412                            _scan_obj_cl.objs_processed,
2413                            ThreadLocalObjQueue::objs_enqueued);
2414     guarantee(_scan_obj_cl.objs_processed ==
2415               ThreadLocalObjQueue::objs_enqueued,
2416               "Different number of objs processed and enqueued.");
2417   }
2418 #endif
2419 }
2420 
2421 #ifndef PRODUCT
2422 
2423 class PrintReachableOopClosure: public OopClosure {
2424 private:
2425   G1CollectedHeap* _g1h;
2426   outputStream*    _out;
2427   VerifyOption     _vo;
2428   bool             _all;
2429 
2430 public:
2431   PrintReachableOopClosure(outputStream* out,
2432                            VerifyOption  vo,
2433                            bool          all) :
2434     _g1h(G1CollectedHeap::heap()),
2435     _out(out), _vo(vo), _all(all) { }
2436 
2437   void do_oop(narrowOop* p) { do_oop_work(p); }
2438   void do_oop(      oop* p) { do_oop_work(p); }
2439 
2440   template <class T> void do_oop_work(T* p) {
2441     oop         obj = oopDesc::load_decode_heap_oop(p);
2442     const char* str = NULL;
2443     const char* str2 = "";
2444 
2445     if (obj == NULL) {
2446       str = "";
2447     } else if (!_g1h->is_in_g1_reserved(obj)) {
2448       str = " O";
2449     } else {
2450       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2451       guarantee(hr != NULL, "invariant");
2452       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2453       bool marked = _g1h->is_marked(obj, _vo);
2454 
2455       if (over_tams) {
2456         str = " >";
2457         if (marked) {
2458           str2 = " AND MARKED";
2459         }
2460       } else if (marked) {
2461         str = " M";
2462       } else {
2463         str = " NOT";
2464       }
2465     }
2466 
2467     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2468                    p, (void*) obj, str, str2);
2469   }
2470 };
2471 
2472 class PrintReachableObjectClosure : public ObjectClosure {
2473 private:
2474   G1CollectedHeap* _g1h;
2475   outputStream*    _out;
2476   VerifyOption     _vo;
2477   bool             _all;
2478   HeapRegion*      _hr;
2479 
2480 public:
2481   PrintReachableObjectClosure(outputStream* out,
2482                               VerifyOption  vo,
2483                               bool          all,
2484                               HeapRegion*   hr) :
2485     _g1h(G1CollectedHeap::heap()),
2486     _out(out), _vo(vo), _all(all), _hr(hr) { }
2487 
2488   void do_object(oop o) {
2489     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2490     bool marked = _g1h->is_marked(o, _vo);
2491     bool print_it = _all || over_tams || marked;
2492 
2493     if (print_it) {
2494       _out->print_cr(" "PTR_FORMAT"%s",
2495                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2496       PrintReachableOopClosure oopCl(_out, _vo, _all);
2497       o->oop_iterate(&oopCl);
2498     }
2499   }
2500 };
2501 
2502 class PrintReachableRegionClosure : public HeapRegionClosure {
2503 private:
2504   G1CollectedHeap* _g1h;
2505   outputStream*    _out;
2506   VerifyOption     _vo;
2507   bool             _all;
2508 
2509 public:
2510   bool doHeapRegion(HeapRegion* hr) {
2511     HeapWord* b = hr->bottom();
2512     HeapWord* e = hr->end();
2513     HeapWord* t = hr->top();
2514     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2515     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2516                    "TAMS: "PTR_FORMAT, b, e, t, p);
2517     _out->cr();
2518 
2519     HeapWord* from = b;
2520     HeapWord* to   = t;
2521 
2522     if (to > from) {
2523       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2524       _out->cr();
2525       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2526       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2527       _out->cr();
2528     }
2529 
2530     return false;
2531   }
2532 
2533   PrintReachableRegionClosure(outputStream* out,
2534                               VerifyOption  vo,
2535                               bool          all) :
2536     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2537 };
2538 
2539 void ConcurrentMark::print_reachable(const char* str,
2540                                      VerifyOption vo,
2541                                      bool all) {
2542   gclog_or_tty->cr();
2543   gclog_or_tty->print_cr("== Doing heap dump... ");
2544 
2545   if (G1PrintReachableBaseFile == NULL) {
2546     gclog_or_tty->print_cr("  #### error: no base file defined");
2547     return;
2548   }
2549 
2550   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2551       (JVM_MAXPATHLEN - 1)) {
2552     gclog_or_tty->print_cr("  #### error: file name too long");
2553     return;
2554   }
2555 
2556   char file_name[JVM_MAXPATHLEN];
2557   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2558   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2559 
2560   fileStream fout(file_name);
2561   if (!fout.is_open()) {
2562     gclog_or_tty->print_cr("  #### error: could not open file");
2563     return;
2564   }
2565 
2566   outputStream* out = &fout;
2567   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2568   out->cr();
2569 
2570   out->print_cr("--- ITERATING OVER REGIONS");
2571   out->cr();
2572   PrintReachableRegionClosure rcl(out, vo, all);
2573   _g1h->heap_region_iterate(&rcl);
2574   out->cr();
2575 
2576   gclog_or_tty->print_cr("  done");
2577   gclog_or_tty->flush();
2578 }
2579 
2580 #endif // PRODUCT
2581 
2582 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2583   // Note we are overriding the read-only view of the prev map here, via
2584   // the cast.
2585   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2586 }
2587 
2588 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2589   _nextMarkBitMap->clearRange(mr);
2590 }
2591 
2592 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2593   clearRangePrevBitmap(mr);
2594   clearRangeNextBitmap(mr);
2595 }
2596 
2597 HeapRegion*
2598 ConcurrentMark::claim_region(int task_num) {
2599   // "checkpoint" the finger
2600   HeapWord* finger = _finger;
2601 
2602   // _heap_end will not change underneath our feet; it only changes at
2603   // yield points.
2604   while (finger < _heap_end) {
2605     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2606 
2607     // Note on how this code handles humongous regions. In the
2608     // normal case the finger will reach the start of a "starts
2609     // humongous" (SH) region. Its end will either be the end of the
2610     // last "continues humongous" (CH) region in the sequence, or the
2611     // standard end of the SH region (if the SH is the only region in
2612     // the sequence). That way claim_region() will skip over the CH
2613     // regions. However, there is a subtle race between a CM thread
2614     // executing this method and a mutator thread doing a humongous
2615     // object allocation. The two are not mutually exclusive as the CM
2616     // thread does not need to hold the Heap_lock when it gets
2617     // here. So there is a chance that claim_region() will come across
2618     // a free region that's in the progress of becoming a SH or a CH
2619     // region. In the former case, it will either
2620     //   a) Miss the update to the region's end, in which case it will
2621     //      visit every subsequent CH region, will find their bitmaps
2622     //      empty, and do nothing, or
2623     //   b) Will observe the update of the region's end (in which case
2624     //      it will skip the subsequent CH regions).
2625     // If it comes across a region that suddenly becomes CH, the
2626     // scenario will be similar to b). So, the race between
2627     // claim_region() and a humongous object allocation might force us
2628     // to do a bit of unnecessary work (due to some unnecessary bitmap
2629     // iterations) but it should not introduce and correctness issues.
2630     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2631     HeapWord*   bottom        = curr_region->bottom();
2632     HeapWord*   end           = curr_region->end();
2633     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2634 
2635     if (verbose_low()) {
2636       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2637                              "["PTR_FORMAT", "PTR_FORMAT"), "
2638                              "limit = "PTR_FORMAT,
2639                              task_num, curr_region, bottom, end, limit);
2640     }
2641 
2642     // Is the gap between reading the finger and doing the CAS too long?
2643     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2644     if (res == finger) {
2645       // we succeeded
2646 
2647       // notice that _finger == end cannot be guaranteed here since,
2648       // someone else might have moved the finger even further
2649       assert(_finger >= end, "the finger should have moved forward");
2650 
2651       if (verbose_low()) {
2652         gclog_or_tty->print_cr("[%d] we were successful with region = "
2653                                PTR_FORMAT, task_num, curr_region);
2654       }
2655 
2656       if (limit > bottom) {
2657         if (verbose_low()) {
2658           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2659                                  "returning it ", task_num, curr_region);
2660         }
2661         return curr_region;
2662       } else {
2663         assert(limit == bottom,
2664                "the region limit should be at bottom");
2665         if (verbose_low()) {
2666           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2667                                  "returning NULL", task_num, curr_region);
2668         }
2669         // we return NULL and the caller should try calling
2670         // claim_region() again.
2671         return NULL;
2672       }
2673     } else {
2674       assert(_finger > finger, "the finger should have moved forward");
2675       if (verbose_low()) {
2676         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2677                                "global finger = "PTR_FORMAT", "
2678                                "our finger = "PTR_FORMAT,
2679                                task_num, _finger, finger);
2680       }
2681 
2682       // read it again
2683       finger = _finger;
2684     }
2685   }
2686 
2687   return NULL;
2688 }
2689 
2690 #ifndef PRODUCT
2691 enum VerifyNoCSetOopsPhase {
2692   VerifyNoCSetOopsStack,
2693   VerifyNoCSetOopsQueues,
2694   VerifyNoCSetOopsSATBCompleted,
2695   VerifyNoCSetOopsSATBThread
2696 };
2697 
2698 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2699 private:
2700   G1CollectedHeap* _g1h;
2701   VerifyNoCSetOopsPhase _phase;
2702   int _info;
2703 
2704   const char* phase_str() {
2705     switch (_phase) {
2706     case VerifyNoCSetOopsStack:         return "Stack";
2707     case VerifyNoCSetOopsQueues:        return "Queue";
2708     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2709     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2710     default:                            ShouldNotReachHere();
2711     }
2712     return NULL;
2713   }
2714 
2715   void do_object_work(oop obj) {
2716     guarantee(!_g1h->obj_in_cs(obj),
2717               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2718                       (void*) obj, phase_str(), _info));
2719   }
2720 
2721 public:
2722   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2723 
2724   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2725     _phase = phase;
2726     _info = info;
2727   }
2728 
2729   virtual void do_oop(oop* p) {
2730     oop obj = oopDesc::load_decode_heap_oop(p);
2731     do_object_work(obj);
2732   }
2733 
2734   virtual void do_oop(narrowOop* p) {
2735     // We should not come across narrow oops while scanning marking
2736     // stacks and SATB buffers.
2737     ShouldNotReachHere();
2738   }
2739 
2740   virtual void do_object(oop obj) {
2741     do_object_work(obj);
2742   }
2743 };
2744 
2745 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2746                                          bool verify_enqueued_buffers,
2747                                          bool verify_thread_buffers,
2748                                          bool verify_fingers) {
2749   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2750   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2751     return;
2752   }
2753 
2754   VerifyNoCSetOopsClosure cl;
2755 
2756   if (verify_stacks) {
2757     // Verify entries on the global mark stack
2758     cl.set_phase(VerifyNoCSetOopsStack);
2759     _markStack.oops_do(&cl);
2760 
2761     // Verify entries on the task queues
2762     for (int i = 0; i < (int) _max_task_num; i += 1) {
2763       cl.set_phase(VerifyNoCSetOopsQueues, i);
2764       OopTaskQueue* queue = _task_queues->queue(i);
2765       queue->oops_do(&cl);
2766     }
2767   }
2768 
2769   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2770 
2771   // Verify entries on the enqueued SATB buffers
2772   if (verify_enqueued_buffers) {
2773     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2774     satb_qs.iterate_completed_buffers_read_only(&cl);
2775   }
2776 
2777   // Verify entries on the per-thread SATB buffers
2778   if (verify_thread_buffers) {
2779     cl.set_phase(VerifyNoCSetOopsSATBThread);
2780     satb_qs.iterate_thread_buffers_read_only(&cl);
2781   }
2782 
2783   if (verify_fingers) {
2784     // Verify the global finger
2785     HeapWord* global_finger = finger();
2786     if (global_finger != NULL && global_finger < _heap_end) {
2787       // The global finger always points to a heap region boundary. We
2788       // use heap_region_containing_raw() to get the containing region
2789       // given that the global finger could be pointing to a free region
2790       // which subsequently becomes continues humongous. If that
2791       // happens, heap_region_containing() will return the bottom of the
2792       // corresponding starts humongous region and the check below will
2793       // not hold any more.
2794       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2795       guarantee(global_finger == global_hr->bottom(),
2796                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2797                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2798     }
2799 
2800     // Verify the task fingers
2801     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2802     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2803       CMTask* task = _tasks[i];
2804       HeapWord* task_finger = task->finger();
2805       if (task_finger != NULL && task_finger < _heap_end) {
2806         // See above note on the global finger verification.
2807         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2808         guarantee(task_finger == task_hr->bottom() ||
2809                   !task_hr->in_collection_set(),
2810                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2811                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2812       }
2813     }
2814   }
2815 }
2816 #endif // PRODUCT
2817 
2818 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2819   _markStack.setEmpty();
2820   _markStack.clear_overflow();
2821   if (clear_overflow) {
2822     clear_has_overflown();
2823   } else {
2824     assert(has_overflown(), "pre-condition");
2825   }
2826   _finger = _heap_start;
2827 
2828   for (int i = 0; i < (int)_max_task_num; ++i) {
2829     OopTaskQueue* queue = _task_queues->queue(i);
2830     queue->set_empty();
2831   }
2832 }
2833 
2834 // Aggregate the counting data that was constructed concurrently
2835 // with marking.
2836 class AggregateCountDataHRClosure: public HeapRegionClosure {
2837   ConcurrentMark* _cm;
2838   BitMap* _cm_card_bm;
2839   size_t _max_task_num;
2840 
2841  public:
2842   AggregateCountDataHRClosure(ConcurrentMark *cm,
2843                               BitMap* cm_card_bm,
2844                               size_t max_task_num) :
2845     _cm(cm), _cm_card_bm(cm_card_bm),
2846     _max_task_num(max_task_num) { }
2847 
2848   bool is_card_aligned(HeapWord* p) {
2849     return ((uintptr_t(p) & (CardTableModRefBS::card_size - 1)) == 0);
2850   }
2851 
2852   bool doHeapRegion(HeapRegion* hr) {
2853     if (hr->continuesHumongous()) {
2854       // We will ignore these here and process them when their
2855       // associated "starts humongous" region is processed.
2856       // Note that we cannot rely on their associated
2857       // "starts humongous" region to have their bit set to 1
2858       // since, due to the region chunking in the parallel region
2859       // iteration, a "continues humongous" region might be visited
2860       // before its associated "starts humongous".
2861       return false;
2862     }
2863 
2864     HeapWord* start = hr->bottom();
2865     HeapWord* limit = hr->next_top_at_mark_start();
2866     HeapWord* end = hr->end();
2867 
2868     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2869            err_msg("Preconditions not met - "
2870                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2871                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2872                    start, limit, hr->top(), hr->end()));
2873 
2874     assert(hr->next_marked_bytes() == 0, "Precondition");
2875 
2876     if (start == limit) {
2877       // NTAMS of this region has not been set so nothing to do.
2878       return false;
2879     }
2880 
2881     assert(is_card_aligned(start), "sanity");
2882     assert(is_card_aligned(end), "sanity");
2883 
2884     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2885     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2886     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2887 
2888     // If ntams is not card aligned then we bump the index for
2889     // limit so that we get the card spanning ntams.
2890     if (!is_card_aligned(limit)) {
2891       limit_idx += 1;
2892     }
2893 
2894     assert(limit_idx <= end_idx, "or else use atomics");
2895 
2896     // Aggregate the "stripe" in the count data associated with hr.
2897     uint hrs_index = hr->hrs_index();
2898     size_t marked_bytes = 0;
2899 
2900     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2901       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2902       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2903 
2904       // Fetch the marked_bytes in this region for task i and
2905       // add it to the running total for this region.
2906       marked_bytes += marked_bytes_array[hrs_index];
2907 
2908       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2909       // into the global card bitmap.
2910       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2911 
2912       while (scan_idx < limit_idx) {
2913         assert(task_card_bm->at(scan_idx) == true, "should be");
2914         _cm_card_bm->set_bit(scan_idx);
2915         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2916 
2917         // BitMap::get_next_one_offset() can handle the case when
2918         // its left_offset parameter is greater than its right_offset
2919         // parameter. If does, however, have an early exit if
2920         // left_offset == right_offset. So let's limit the value
2921         // passed in for left offset here.
2922         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2923         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2924       }
2925     }
2926 
2927     // Update the marked bytes for this region.
2928     hr->add_to_marked_bytes(marked_bytes);
2929 
2930     // Next heap region
2931     return false;
2932   }
2933 };
2934 
2935 class G1AggregateCountDataTask: public AbstractGangTask {
2936 protected:
2937   G1CollectedHeap* _g1h;
2938   ConcurrentMark* _cm;
2939   BitMap* _cm_card_bm;
2940   size_t _max_task_num;
2941   int _active_workers;
2942 
2943 public:
2944   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2945                            ConcurrentMark* cm,
2946                            BitMap* cm_card_bm,
2947                            size_t max_task_num,
2948                            int n_workers) :
2949     AbstractGangTask("Count Aggregation"),
2950     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2951     _max_task_num(max_task_num),
2952     _active_workers(n_workers) { }
2953 
2954   void work(uint worker_id) {
2955     AggregateCountDataHRClosure cl(_cm, _cm_card_bm, _max_task_num);
2956 
2957     if (G1CollectedHeap::use_parallel_gc_threads()) {
2958       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2959                                             _active_workers,
2960                                             HeapRegion::AggregateCountClaimValue);
2961     } else {
2962       _g1h->heap_region_iterate(&cl);
2963     }
2964   }
2965 };
2966 
2967 
2968 void ConcurrentMark::aggregate_count_data() {
2969   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
2970                         _g1h->workers()->active_workers() :
2971                         1);
2972 
2973   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2974                                            _max_task_num, n_workers);
2975 
2976   if (G1CollectedHeap::use_parallel_gc_threads()) {
2977     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2978            "sanity check");
2979     _g1h->set_par_threads(n_workers);
2980     _g1h->workers()->run_task(&g1_par_agg_task);
2981     _g1h->set_par_threads(0);
2982 
2983     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
2984            "sanity check");
2985     _g1h->reset_heap_region_claim_values();
2986   } else {
2987     g1_par_agg_task.work(0);
2988   }
2989 }
2990 
2991 // Clear the per-worker arrays used to store the per-region counting data
2992 void ConcurrentMark::clear_all_count_data() {
2993   // Clear the global card bitmap - it will be filled during
2994   // liveness count aggregation (during remark) and the
2995   // final counting task.
2996   _card_bm.clear();
2997 
2998   // Clear the global region bitmap - it will be filled as part
2999   // of the final counting task.
3000   _region_bm.clear();
3001 
3002   uint max_regions = _g1h->max_regions();
3003   assert(_max_task_num != 0, "unitialized");
3004 
3005   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3006     BitMap* task_card_bm = count_card_bitmap_for(i);
3007     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3008 
3009     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3010     assert(marked_bytes_array != NULL, "uninitialized");
3011 
3012     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3013     task_card_bm->clear();
3014   }
3015 }
3016 
3017 void ConcurrentMark::print_stats() {
3018   if (verbose_stats()) {
3019     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3020     for (size_t i = 0; i < _active_tasks; ++i) {
3021       _tasks[i]->print_stats();
3022       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3023     }
3024   }
3025 }
3026 
3027 // abandon current marking iteration due to a Full GC
3028 void ConcurrentMark::abort() {
3029   // Clear all marks to force marking thread to do nothing
3030   _nextMarkBitMap->clearAll();
3031   // Clear the liveness counting data
3032   clear_all_count_data();
3033   // Empty mark stack
3034   clear_marking_state();
3035   for (int i = 0; i < (int)_max_task_num; ++i) {
3036     _tasks[i]->clear_region_fields();
3037   }
3038   _has_aborted = true;
3039 
3040   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3041   satb_mq_set.abandon_partial_marking();
3042   // This can be called either during or outside marking, we'll read
3043   // the expected_active value from the SATB queue set.
3044   satb_mq_set.set_active_all_threads(
3045                                  false, /* new active value */
3046                                  satb_mq_set.is_active() /* expected_active */);
3047 }
3048 
3049 static void print_ms_time_info(const char* prefix, const char* name,
3050                                NumberSeq& ns) {
3051   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3052                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3053   if (ns.num() > 0) {
3054     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3055                            prefix, ns.sd(), ns.maximum());
3056   }
3057 }
3058 
3059 void ConcurrentMark::print_summary_info() {
3060   gclog_or_tty->print_cr(" Concurrent marking:");
3061   print_ms_time_info("  ", "init marks", _init_times);
3062   print_ms_time_info("  ", "remarks", _remark_times);
3063   {
3064     print_ms_time_info("     ", "final marks", _remark_mark_times);
3065     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3066 
3067   }
3068   print_ms_time_info("  ", "cleanups", _cleanup_times);
3069   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3070                          _total_counting_time,
3071                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3072                           (double)_cleanup_times.num()
3073                          : 0.0));
3074   if (G1ScrubRemSets) {
3075     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3076                            _total_rs_scrub_time,
3077                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3078                             (double)_cleanup_times.num()
3079                            : 0.0));
3080   }
3081   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3082                          (_init_times.sum() + _remark_times.sum() +
3083                           _cleanup_times.sum())/1000.0);
3084   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3085                 "(%8.2f s marking).",
3086                 cmThread()->vtime_accum(),
3087                 cmThread()->vtime_mark_accum());
3088 }
3089 
3090 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3091   _parallel_workers->print_worker_threads_on(st);
3092 }
3093 
3094 // We take a break if someone is trying to stop the world.
3095 bool ConcurrentMark::do_yield_check(uint worker_id) {
3096   if (should_yield()) {
3097     if (worker_id == 0) {
3098       _g1h->g1_policy()->record_concurrent_pause();
3099     }
3100     cmThread()->yield();
3101     return true;
3102   } else {
3103     return false;
3104   }
3105 }
3106 
3107 bool ConcurrentMark::should_yield() {
3108   return cmThread()->should_yield();
3109 }
3110 
3111 bool ConcurrentMark::containing_card_is_marked(void* p) {
3112   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3113   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3114 }
3115 
3116 bool ConcurrentMark::containing_cards_are_marked(void* start,
3117                                                  void* last) {
3118   return containing_card_is_marked(start) &&
3119          containing_card_is_marked(last);
3120 }
3121 
3122 #ifndef PRODUCT
3123 // for debugging purposes
3124 void ConcurrentMark::print_finger() {
3125   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3126                          _heap_start, _heap_end, _finger);
3127   for (int i = 0; i < (int) _max_task_num; ++i) {
3128     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3129   }
3130   gclog_or_tty->print_cr("");
3131 }
3132 #endif
3133 
3134 void CMTask::scan_object(oop obj) {
3135   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3136 
3137   if (_cm->verbose_high()) {
3138     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3139                            _task_id, (void*) obj);
3140   }
3141 
3142   size_t obj_size = obj->size();
3143   _words_scanned += obj_size;
3144 
3145   obj->oop_iterate(_cm_oop_closure);
3146   statsOnly( ++_objs_scanned );
3147   check_limits();
3148 }
3149 
3150 // Closure for iteration over bitmaps
3151 class CMBitMapClosure : public BitMapClosure {
3152 private:
3153   // the bitmap that is being iterated over
3154   CMBitMap*                   _nextMarkBitMap;
3155   ConcurrentMark*             _cm;
3156   CMTask*                     _task;
3157 
3158 public:
3159   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3160     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3161 
3162   bool do_bit(size_t offset) {
3163     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3164     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3165     assert( addr < _cm->finger(), "invariant");
3166 
3167     statsOnly( _task->increase_objs_found_on_bitmap() );
3168     assert(addr >= _task->finger(), "invariant");
3169 
3170     // We move that task's local finger along.
3171     _task->move_finger_to(addr);
3172 
3173     _task->scan_object(oop(addr));
3174     // we only partially drain the local queue and global stack
3175     _task->drain_local_queue(true);
3176     _task->drain_global_stack(true);
3177 
3178     // if the has_aborted flag has been raised, we need to bail out of
3179     // the iteration
3180     return !_task->has_aborted();
3181   }
3182 };
3183 
3184 // Closure for iterating over objects, currently only used for
3185 // processing SATB buffers.
3186 class CMObjectClosure : public ObjectClosure {
3187 private:
3188   CMTask* _task;
3189 
3190 public:
3191   void do_object(oop obj) {
3192     _task->deal_with_reference(obj);
3193   }
3194 
3195   CMObjectClosure(CMTask* task) : _task(task) { }
3196 };
3197 
3198 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3199                                ConcurrentMark* cm,
3200                                CMTask* task)
3201   : _g1h(g1h), _cm(cm), _task(task) {
3202   assert(_ref_processor == NULL, "should be initialized to NULL");
3203 
3204   if (G1UseConcMarkReferenceProcessing) {
3205     _ref_processor = g1h->ref_processor_cm();
3206     assert(_ref_processor != NULL, "should not be NULL");
3207   }
3208 }
3209 
3210 void CMTask::setup_for_region(HeapRegion* hr) {
3211   // Separated the asserts so that we know which one fires.
3212   assert(hr != NULL,
3213         "claim_region() should have filtered out continues humongous regions");
3214   assert(!hr->continuesHumongous(),
3215         "claim_region() should have filtered out continues humongous regions");
3216 
3217   if (_cm->verbose_low()) {
3218     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3219                            _task_id, hr);
3220   }
3221 
3222   _curr_region  = hr;
3223   _finger       = hr->bottom();
3224   update_region_limit();
3225 }
3226 
3227 void CMTask::update_region_limit() {
3228   HeapRegion* hr            = _curr_region;
3229   HeapWord* bottom          = hr->bottom();
3230   HeapWord* limit           = hr->next_top_at_mark_start();
3231 
3232   if (limit == bottom) {
3233     if (_cm->verbose_low()) {
3234       gclog_or_tty->print_cr("[%d] found an empty region "
3235                              "["PTR_FORMAT", "PTR_FORMAT")",
3236                              _task_id, bottom, limit);
3237     }
3238     // The region was collected underneath our feet.
3239     // We set the finger to bottom to ensure that the bitmap
3240     // iteration that will follow this will not do anything.
3241     // (this is not a condition that holds when we set the region up,
3242     // as the region is not supposed to be empty in the first place)
3243     _finger = bottom;
3244   } else if (limit >= _region_limit) {
3245     assert(limit >= _finger, "peace of mind");
3246   } else {
3247     assert(limit < _region_limit, "only way to get here");
3248     // This can happen under some pretty unusual circumstances.  An
3249     // evacuation pause empties the region underneath our feet (NTAMS
3250     // at bottom). We then do some allocation in the region (NTAMS
3251     // stays at bottom), followed by the region being used as a GC
3252     // alloc region (NTAMS will move to top() and the objects
3253     // originally below it will be grayed). All objects now marked in
3254     // the region are explicitly grayed, if below the global finger,
3255     // and we do not need in fact to scan anything else. So, we simply
3256     // set _finger to be limit to ensure that the bitmap iteration
3257     // doesn't do anything.
3258     _finger = limit;
3259   }
3260 
3261   _region_limit = limit;
3262 }
3263 
3264 void CMTask::giveup_current_region() {
3265   assert(_curr_region != NULL, "invariant");
3266   if (_cm->verbose_low()) {
3267     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3268                            _task_id, _curr_region);
3269   }
3270   clear_region_fields();
3271 }
3272 
3273 void CMTask::clear_region_fields() {
3274   // Values for these three fields that indicate that we're not
3275   // holding on to a region.
3276   _curr_region   = NULL;
3277   _finger        = NULL;
3278   _region_limit  = NULL;
3279 }
3280 
3281 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3282   if (cm_oop_closure == NULL) {
3283     assert(_cm_oop_closure != NULL, "invariant");
3284   } else {
3285     assert(_cm_oop_closure == NULL, "invariant");
3286   }
3287   _cm_oop_closure = cm_oop_closure;
3288 }
3289 
3290 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3291   guarantee(nextMarkBitMap != NULL, "invariant");
3292 
3293   if (_cm->verbose_low()) {
3294     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3295   }
3296 
3297   _nextMarkBitMap                = nextMarkBitMap;
3298   clear_region_fields();
3299 
3300   _calls                         = 0;
3301   _elapsed_time_ms               = 0.0;
3302   _termination_time_ms           = 0.0;
3303   _termination_start_time_ms     = 0.0;
3304 
3305 #if _MARKING_STATS_
3306   _local_pushes                  = 0;
3307   _local_pops                    = 0;
3308   _local_max_size                = 0;
3309   _objs_scanned                  = 0;
3310   _global_pushes                 = 0;
3311   _global_pops                   = 0;
3312   _global_max_size               = 0;
3313   _global_transfers_to           = 0;
3314   _global_transfers_from         = 0;
3315   _regions_claimed               = 0;
3316   _objs_found_on_bitmap          = 0;
3317   _satb_buffers_processed        = 0;
3318   _steal_attempts                = 0;
3319   _steals                        = 0;
3320   _aborted                       = 0;
3321   _aborted_overflow              = 0;
3322   _aborted_cm_aborted            = 0;
3323   _aborted_yield                 = 0;
3324   _aborted_timed_out             = 0;
3325   _aborted_satb                  = 0;
3326   _aborted_termination           = 0;
3327 #endif // _MARKING_STATS_
3328 }
3329 
3330 bool CMTask::should_exit_termination() {
3331   regular_clock_call();
3332   // This is called when we are in the termination protocol. We should
3333   // quit if, for some reason, this task wants to abort or the global
3334   // stack is not empty (this means that we can get work from it).
3335   return !_cm->mark_stack_empty() || has_aborted();
3336 }
3337 
3338 void CMTask::reached_limit() {
3339   assert(_words_scanned >= _words_scanned_limit ||
3340          _refs_reached >= _refs_reached_limit ,
3341          "shouldn't have been called otherwise");
3342   regular_clock_call();
3343 }
3344 
3345 void CMTask::regular_clock_call() {
3346   if (has_aborted()) return;
3347 
3348   // First, we need to recalculate the words scanned and refs reached
3349   // limits for the next clock call.
3350   recalculate_limits();
3351 
3352   // During the regular clock call we do the following
3353 
3354   // (1) If an overflow has been flagged, then we abort.
3355   if (_cm->has_overflown()) {
3356     set_has_aborted();
3357     return;
3358   }
3359 
3360   // If we are not concurrent (i.e. we're doing remark) we don't need
3361   // to check anything else. The other steps are only needed during
3362   // the concurrent marking phase.
3363   if (!concurrent()) return;
3364 
3365   // (2) If marking has been aborted for Full GC, then we also abort.
3366   if (_cm->has_aborted()) {
3367     set_has_aborted();
3368     statsOnly( ++_aborted_cm_aborted );
3369     return;
3370   }
3371 
3372   double curr_time_ms = os::elapsedVTime() * 1000.0;
3373 
3374   // (3) If marking stats are enabled, then we update the step history.
3375 #if _MARKING_STATS_
3376   if (_words_scanned >= _words_scanned_limit) {
3377     ++_clock_due_to_scanning;
3378   }
3379   if (_refs_reached >= _refs_reached_limit) {
3380     ++_clock_due_to_marking;
3381   }
3382 
3383   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3384   _interval_start_time_ms = curr_time_ms;
3385   _all_clock_intervals_ms.add(last_interval_ms);
3386 
3387   if (_cm->verbose_medium()) {
3388       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3389                         "scanned = %d%s, refs reached = %d%s",
3390                         _task_id, last_interval_ms,
3391                         _words_scanned,
3392                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3393                         _refs_reached,
3394                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3395   }
3396 #endif // _MARKING_STATS_
3397 
3398   // (4) We check whether we should yield. If we have to, then we abort.
3399   if (_cm->should_yield()) {
3400     // We should yield. To do this we abort the task. The caller is
3401     // responsible for yielding.
3402     set_has_aborted();
3403     statsOnly( ++_aborted_yield );
3404     return;
3405   }
3406 
3407   // (5) We check whether we've reached our time quota. If we have,
3408   // then we abort.
3409   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3410   if (elapsed_time_ms > _time_target_ms) {
3411     set_has_aborted();
3412     _has_timed_out = true;
3413     statsOnly( ++_aborted_timed_out );
3414     return;
3415   }
3416 
3417   // (6) Finally, we check whether there are enough completed STAB
3418   // buffers available for processing. If there are, we abort.
3419   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3420   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3421     if (_cm->verbose_low()) {
3422       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3423                              _task_id);
3424     }
3425     // we do need to process SATB buffers, we'll abort and restart
3426     // the marking task to do so
3427     set_has_aborted();
3428     statsOnly( ++_aborted_satb );
3429     return;
3430   }
3431 }
3432 
3433 void CMTask::recalculate_limits() {
3434   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3435   _words_scanned_limit      = _real_words_scanned_limit;
3436 
3437   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3438   _refs_reached_limit       = _real_refs_reached_limit;
3439 }
3440 
3441 void CMTask::decrease_limits() {
3442   // This is called when we believe that we're going to do an infrequent
3443   // operation which will increase the per byte scanned cost (i.e. move
3444   // entries to/from the global stack). It basically tries to decrease the
3445   // scanning limit so that the clock is called earlier.
3446 
3447   if (_cm->verbose_medium()) {
3448     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3449   }
3450 
3451   _words_scanned_limit = _real_words_scanned_limit -
3452     3 * words_scanned_period / 4;
3453   _refs_reached_limit  = _real_refs_reached_limit -
3454     3 * refs_reached_period / 4;
3455 }
3456 
3457 void CMTask::move_entries_to_global_stack() {
3458   // local array where we'll store the entries that will be popped
3459   // from the local queue
3460   oop buffer[global_stack_transfer_size];
3461 
3462   int n = 0;
3463   oop obj;
3464   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3465     buffer[n] = obj;
3466     ++n;
3467   }
3468 
3469   if (n > 0) {
3470     // we popped at least one entry from the local queue
3471 
3472     statsOnly( ++_global_transfers_to; _local_pops += n );
3473 
3474     if (!_cm->mark_stack_push(buffer, n)) {
3475       if (_cm->verbose_low()) {
3476         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3477                                _task_id);
3478       }
3479       set_has_aborted();
3480     } else {
3481       // the transfer was successful
3482 
3483       if (_cm->verbose_medium()) {
3484         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3485                                _task_id, n);
3486       }
3487       statsOnly( int tmp_size = _cm->mark_stack_size();
3488                  if (tmp_size > _global_max_size) {
3489                    _global_max_size = tmp_size;
3490                  }
3491                  _global_pushes += n );
3492     }
3493   }
3494 
3495   // this operation was quite expensive, so decrease the limits
3496   decrease_limits();
3497 }
3498 
3499 void CMTask::get_entries_from_global_stack() {
3500   // local array where we'll store the entries that will be popped
3501   // from the global stack.
3502   oop buffer[global_stack_transfer_size];
3503   int n;
3504   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3505   assert(n <= global_stack_transfer_size,
3506          "we should not pop more than the given limit");
3507   if (n > 0) {
3508     // yes, we did actually pop at least one entry
3509 
3510     statsOnly( ++_global_transfers_from; _global_pops += n );
3511     if (_cm->verbose_medium()) {
3512       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3513                              _task_id, n);
3514     }
3515     for (int i = 0; i < n; ++i) {
3516       bool success = _task_queue->push(buffer[i]);
3517       // We only call this when the local queue is empty or under a
3518       // given target limit. So, we do not expect this push to fail.
3519       assert(success, "invariant");
3520     }
3521 
3522     statsOnly( int tmp_size = _task_queue->size();
3523                if (tmp_size > _local_max_size) {
3524                  _local_max_size = tmp_size;
3525                }
3526                _local_pushes += n );
3527   }
3528 
3529   // this operation was quite expensive, so decrease the limits
3530   decrease_limits();
3531 }
3532 
3533 void CMTask::drain_local_queue(bool partially) {
3534   if (has_aborted()) return;
3535 
3536   // Decide what the target size is, depending whether we're going to
3537   // drain it partially (so that other tasks can steal if they run out
3538   // of things to do) or totally (at the very end).
3539   size_t target_size;
3540   if (partially) {
3541     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3542   } else {
3543     target_size = 0;
3544   }
3545 
3546   if (_task_queue->size() > target_size) {
3547     if (_cm->verbose_high()) {
3548       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3549                              _task_id, target_size);
3550     }
3551 
3552     oop obj;
3553     bool ret = _task_queue->pop_local(obj);
3554     while (ret) {
3555       statsOnly( ++_local_pops );
3556 
3557       if (_cm->verbose_high()) {
3558         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3559                                (void*) obj);
3560       }
3561 
3562       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3563       assert(!_g1h->is_on_master_free_list(
3564                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3565 
3566       scan_object(obj);
3567 
3568       if (_task_queue->size() <= target_size || has_aborted()) {
3569         ret = false;
3570       } else {
3571         ret = _task_queue->pop_local(obj);
3572       }
3573     }
3574 
3575     if (_cm->verbose_high()) {
3576       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3577                              _task_id, _task_queue->size());
3578     }
3579   }
3580 }
3581 
3582 void CMTask::drain_global_stack(bool partially) {
3583   if (has_aborted()) return;
3584 
3585   // We have a policy to drain the local queue before we attempt to
3586   // drain the global stack.
3587   assert(partially || _task_queue->size() == 0, "invariant");
3588 
3589   // Decide what the target size is, depending whether we're going to
3590   // drain it partially (so that other tasks can steal if they run out
3591   // of things to do) or totally (at the very end).  Notice that,
3592   // because we move entries from the global stack in chunks or
3593   // because another task might be doing the same, we might in fact
3594   // drop below the target. But, this is not a problem.
3595   size_t target_size;
3596   if (partially) {
3597     target_size = _cm->partial_mark_stack_size_target();
3598   } else {
3599     target_size = 0;
3600   }
3601 
3602   if (_cm->mark_stack_size() > target_size) {
3603     if (_cm->verbose_low()) {
3604       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3605                              _task_id, target_size);
3606     }
3607 
3608     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3609       get_entries_from_global_stack();
3610       drain_local_queue(partially);
3611     }
3612 
3613     if (_cm->verbose_low()) {
3614       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3615                              _task_id, _cm->mark_stack_size());
3616     }
3617   }
3618 }
3619 
3620 // SATB Queue has several assumptions on whether to call the par or
3621 // non-par versions of the methods. this is why some of the code is
3622 // replicated. We should really get rid of the single-threaded version
3623 // of the code to simplify things.
3624 void CMTask::drain_satb_buffers() {
3625   if (has_aborted()) return;
3626 
3627   // We set this so that the regular clock knows that we're in the
3628   // middle of draining buffers and doesn't set the abort flag when it
3629   // notices that SATB buffers are available for draining. It'd be
3630   // very counter productive if it did that. :-)
3631   _draining_satb_buffers = true;
3632 
3633   CMObjectClosure oc(this);
3634   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3635   if (G1CollectedHeap::use_parallel_gc_threads()) {
3636     satb_mq_set.set_par_closure(_task_id, &oc);
3637   } else {
3638     satb_mq_set.set_closure(&oc);
3639   }
3640 
3641   // This keeps claiming and applying the closure to completed buffers
3642   // until we run out of buffers or we need to abort.
3643   if (G1CollectedHeap::use_parallel_gc_threads()) {
3644     while (!has_aborted() &&
3645            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3646       if (_cm->verbose_medium()) {
3647         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3648       }
3649       statsOnly( ++_satb_buffers_processed );
3650       regular_clock_call();
3651     }
3652   } else {
3653     while (!has_aborted() &&
3654            satb_mq_set.apply_closure_to_completed_buffer()) {
3655       if (_cm->verbose_medium()) {
3656         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3657       }
3658       statsOnly( ++_satb_buffers_processed );
3659       regular_clock_call();
3660     }
3661   }
3662 
3663   if (!concurrent() && !has_aborted()) {
3664     // We should only do this during remark.
3665     if (G1CollectedHeap::use_parallel_gc_threads()) {
3666       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3667     } else {
3668       satb_mq_set.iterate_closure_all_threads();
3669     }
3670   }
3671 
3672   _draining_satb_buffers = false;
3673 
3674   assert(has_aborted() ||
3675          concurrent() ||
3676          satb_mq_set.completed_buffers_num() == 0, "invariant");
3677 
3678   if (G1CollectedHeap::use_parallel_gc_threads()) {
3679     satb_mq_set.set_par_closure(_task_id, NULL);
3680   } else {
3681     satb_mq_set.set_closure(NULL);
3682   }
3683 
3684   // again, this was a potentially expensive operation, decrease the
3685   // limits to get the regular clock call early
3686   decrease_limits();
3687 }
3688 
3689 void CMTask::print_stats() {
3690   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3691                          _task_id, _calls);
3692   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3693                          _elapsed_time_ms, _termination_time_ms);
3694   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3695                          _step_times_ms.num(), _step_times_ms.avg(),
3696                          _step_times_ms.sd());
3697   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3698                          _step_times_ms.maximum(), _step_times_ms.sum());
3699 
3700 #if _MARKING_STATS_
3701   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3702                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3703                          _all_clock_intervals_ms.sd());
3704   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3705                          _all_clock_intervals_ms.maximum(),
3706                          _all_clock_intervals_ms.sum());
3707   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3708                          _clock_due_to_scanning, _clock_due_to_marking);
3709   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3710                          _objs_scanned, _objs_found_on_bitmap);
3711   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3712                          _local_pushes, _local_pops, _local_max_size);
3713   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3714                          _global_pushes, _global_pops, _global_max_size);
3715   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3716                          _global_transfers_to,_global_transfers_from);
3717   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3718   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3719   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3720                          _steal_attempts, _steals);
3721   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3722   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3723                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3724   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3725                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3726 #endif // _MARKING_STATS_
3727 }
3728 
3729 /*****************************************************************************
3730 
3731     The do_marking_step(time_target_ms) method is the building block
3732     of the parallel marking framework. It can be called in parallel
3733     with other invocations of do_marking_step() on different tasks
3734     (but only one per task, obviously) and concurrently with the
3735     mutator threads, or during remark, hence it eliminates the need
3736     for two versions of the code. When called during remark, it will
3737     pick up from where the task left off during the concurrent marking
3738     phase. Interestingly, tasks are also claimable during evacuation
3739     pauses too, since do_marking_step() ensures that it aborts before
3740     it needs to yield.
3741 
3742     The data structures that is uses to do marking work are the
3743     following:
3744 
3745       (1) Marking Bitmap. If there are gray objects that appear only
3746       on the bitmap (this happens either when dealing with an overflow
3747       or when the initial marking phase has simply marked the roots
3748       and didn't push them on the stack), then tasks claim heap
3749       regions whose bitmap they then scan to find gray objects. A
3750       global finger indicates where the end of the last claimed region
3751       is. A local finger indicates how far into the region a task has
3752       scanned. The two fingers are used to determine how to gray an
3753       object (i.e. whether simply marking it is OK, as it will be
3754       visited by a task in the future, or whether it needs to be also
3755       pushed on a stack).
3756 
3757       (2) Local Queue. The local queue of the task which is accessed
3758       reasonably efficiently by the task. Other tasks can steal from
3759       it when they run out of work. Throughout the marking phase, a
3760       task attempts to keep its local queue short but not totally
3761       empty, so that entries are available for stealing by other
3762       tasks. Only when there is no more work, a task will totally
3763       drain its local queue.
3764 
3765       (3) Global Mark Stack. This handles local queue overflow. During
3766       marking only sets of entries are moved between it and the local
3767       queues, as access to it requires a mutex and more fine-grain
3768       interaction with it which might cause contention. If it
3769       overflows, then the marking phase should restart and iterate
3770       over the bitmap to identify gray objects. Throughout the marking
3771       phase, tasks attempt to keep the global mark stack at a small
3772       length but not totally empty, so that entries are available for
3773       popping by other tasks. Only when there is no more work, tasks
3774       will totally drain the global mark stack.
3775 
3776       (4) SATB Buffer Queue. This is where completed SATB buffers are
3777       made available. Buffers are regularly removed from this queue
3778       and scanned for roots, so that the queue doesn't get too
3779       long. During remark, all completed buffers are processed, as
3780       well as the filled in parts of any uncompleted buffers.
3781 
3782     The do_marking_step() method tries to abort when the time target
3783     has been reached. There are a few other cases when the
3784     do_marking_step() method also aborts:
3785 
3786       (1) When the marking phase has been aborted (after a Full GC).
3787 
3788       (2) When a global overflow (on the global stack) has been
3789       triggered. Before the task aborts, it will actually sync up with
3790       the other tasks to ensure that all the marking data structures
3791       (local queues, stacks, fingers etc.)  are re-initialised so that
3792       when do_marking_step() completes, the marking phase can
3793       immediately restart.
3794 
3795       (3) When enough completed SATB buffers are available. The
3796       do_marking_step() method only tries to drain SATB buffers right
3797       at the beginning. So, if enough buffers are available, the
3798       marking step aborts and the SATB buffers are processed at
3799       the beginning of the next invocation.
3800 
3801       (4) To yield. when we have to yield then we abort and yield
3802       right at the end of do_marking_step(). This saves us from a lot
3803       of hassle as, by yielding we might allow a Full GC. If this
3804       happens then objects will be compacted underneath our feet, the
3805       heap might shrink, etc. We save checking for this by just
3806       aborting and doing the yield right at the end.
3807 
3808     From the above it follows that the do_marking_step() method should
3809     be called in a loop (or, otherwise, regularly) until it completes.
3810 
3811     If a marking step completes without its has_aborted() flag being
3812     true, it means it has completed the current marking phase (and
3813     also all other marking tasks have done so and have all synced up).
3814 
3815     A method called regular_clock_call() is invoked "regularly" (in
3816     sub ms intervals) throughout marking. It is this clock method that
3817     checks all the abort conditions which were mentioned above and
3818     decides when the task should abort. A work-based scheme is used to
3819     trigger this clock method: when the number of object words the
3820     marking phase has scanned or the number of references the marking
3821     phase has visited reach a given limit. Additional invocations to
3822     the method clock have been planted in a few other strategic places
3823     too. The initial reason for the clock method was to avoid calling
3824     vtime too regularly, as it is quite expensive. So, once it was in
3825     place, it was natural to piggy-back all the other conditions on it
3826     too and not constantly check them throughout the code.
3827 
3828  *****************************************************************************/
3829 
3830 void CMTask::do_marking_step(double time_target_ms,
3831                              bool do_stealing,
3832                              bool do_termination) {
3833   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3834   assert(concurrent() == _cm->concurrent(), "they should be the same");
3835 
3836   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3837   assert(_task_queues != NULL, "invariant");
3838   assert(_task_queue != NULL, "invariant");
3839   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3840 
3841   assert(!_claimed,
3842          "only one thread should claim this task at any one time");
3843 
3844   // OK, this doesn't safeguard again all possible scenarios, as it is
3845   // possible for two threads to set the _claimed flag at the same
3846   // time. But it is only for debugging purposes anyway and it will
3847   // catch most problems.
3848   _claimed = true;
3849 
3850   _start_time_ms = os::elapsedVTime() * 1000.0;
3851   statsOnly( _interval_start_time_ms = _start_time_ms );
3852 
3853   double diff_prediction_ms =
3854     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3855   _time_target_ms = time_target_ms - diff_prediction_ms;
3856 
3857   // set up the variables that are used in the work-based scheme to
3858   // call the regular clock method
3859   _words_scanned = 0;
3860   _refs_reached  = 0;
3861   recalculate_limits();
3862 
3863   // clear all flags
3864   clear_has_aborted();
3865   _has_timed_out = false;
3866   _draining_satb_buffers = false;
3867 
3868   ++_calls;
3869 
3870   if (_cm->verbose_low()) {
3871     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3872                            "target = %1.2lfms >>>>>>>>>>",
3873                            _task_id, _calls, _time_target_ms);
3874   }
3875 
3876   // Set up the bitmap and oop closures. Anything that uses them is
3877   // eventually called from this method, so it is OK to allocate these
3878   // statically.
3879   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3880   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3881   set_cm_oop_closure(&cm_oop_closure);
3882 
3883   if (_cm->has_overflown()) {
3884     // This can happen if the mark stack overflows during a GC pause
3885     // and this task, after a yield point, restarts. We have to abort
3886     // as we need to get into the overflow protocol which happens
3887     // right at the end of this task.
3888     set_has_aborted();
3889   }
3890 
3891   // First drain any available SATB buffers. After this, we will not
3892   // look at SATB buffers before the next invocation of this method.
3893   // If enough completed SATB buffers are queued up, the regular clock
3894   // will abort this task so that it restarts.
3895   drain_satb_buffers();
3896   // ...then partially drain the local queue and the global stack
3897   drain_local_queue(true);
3898   drain_global_stack(true);
3899 
3900   do {
3901     if (!has_aborted() && _curr_region != NULL) {
3902       // This means that we're already holding on to a region.
3903       assert(_finger != NULL, "if region is not NULL, then the finger "
3904              "should not be NULL either");
3905 
3906       // We might have restarted this task after an evacuation pause
3907       // which might have evacuated the region we're holding on to
3908       // underneath our feet. Let's read its limit again to make sure
3909       // that we do not iterate over a region of the heap that
3910       // contains garbage (update_region_limit() will also move
3911       // _finger to the start of the region if it is found empty).
3912       update_region_limit();
3913       // We will start from _finger not from the start of the region,
3914       // as we might be restarting this task after aborting half-way
3915       // through scanning this region. In this case, _finger points to
3916       // the address where we last found a marked object. If this is a
3917       // fresh region, _finger points to start().
3918       MemRegion mr = MemRegion(_finger, _region_limit);
3919 
3920       if (_cm->verbose_low()) {
3921         gclog_or_tty->print_cr("[%d] we're scanning part "
3922                                "["PTR_FORMAT", "PTR_FORMAT") "
3923                                "of region "PTR_FORMAT,
3924                                _task_id, _finger, _region_limit, _curr_region);
3925       }
3926 
3927       // Let's iterate over the bitmap of the part of the
3928       // region that is left.
3929       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3930         // We successfully completed iterating over the region. Now,
3931         // let's give up the region.
3932         giveup_current_region();
3933         regular_clock_call();
3934       } else {
3935         assert(has_aborted(), "currently the only way to do so");
3936         // The only way to abort the bitmap iteration is to return
3937         // false from the do_bit() method. However, inside the
3938         // do_bit() method we move the _finger to point to the
3939         // object currently being looked at. So, if we bail out, we
3940         // have definitely set _finger to something non-null.
3941         assert(_finger != NULL, "invariant");
3942 
3943         // Region iteration was actually aborted. So now _finger
3944         // points to the address of the object we last scanned. If we
3945         // leave it there, when we restart this task, we will rescan
3946         // the object. It is easy to avoid this. We move the finger by
3947         // enough to point to the next possible object header (the
3948         // bitmap knows by how much we need to move it as it knows its
3949         // granularity).
3950         assert(_finger < _region_limit, "invariant");
3951         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3952         // Check if bitmap iteration was aborted while scanning the last object
3953         if (new_finger >= _region_limit) {
3954           giveup_current_region();
3955         } else {
3956           move_finger_to(new_finger);
3957         }
3958       }
3959     }
3960     // At this point we have either completed iterating over the
3961     // region we were holding on to, or we have aborted.
3962 
3963     // We then partially drain the local queue and the global stack.
3964     // (Do we really need this?)
3965     drain_local_queue(true);
3966     drain_global_stack(true);
3967 
3968     // Read the note on the claim_region() method on why it might
3969     // return NULL with potentially more regions available for
3970     // claiming and why we have to check out_of_regions() to determine
3971     // whether we're done or not.
3972     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3973       // We are going to try to claim a new region. We should have
3974       // given up on the previous one.
3975       // Separated the asserts so that we know which one fires.
3976       assert(_curr_region  == NULL, "invariant");
3977       assert(_finger       == NULL, "invariant");
3978       assert(_region_limit == NULL, "invariant");
3979       if (_cm->verbose_low()) {
3980         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3981       }
3982       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3983       if (claimed_region != NULL) {
3984         // Yes, we managed to claim one
3985         statsOnly( ++_regions_claimed );
3986 
3987         if (_cm->verbose_low()) {
3988           gclog_or_tty->print_cr("[%d] we successfully claimed "
3989                                  "region "PTR_FORMAT,
3990                                  _task_id, claimed_region);
3991         }
3992 
3993         setup_for_region(claimed_region);
3994         assert(_curr_region == claimed_region, "invariant");
3995       }
3996       // It is important to call the regular clock here. It might take
3997       // a while to claim a region if, for example, we hit a large
3998       // block of empty regions. So we need to call the regular clock
3999       // method once round the loop to make sure it's called
4000       // frequently enough.
4001       regular_clock_call();
4002     }
4003 
4004     if (!has_aborted() && _curr_region == NULL) {
4005       assert(_cm->out_of_regions(),
4006              "at this point we should be out of regions");
4007     }
4008   } while ( _curr_region != NULL && !has_aborted());
4009 
4010   if (!has_aborted()) {
4011     // We cannot check whether the global stack is empty, since other
4012     // tasks might be pushing objects to it concurrently.
4013     assert(_cm->out_of_regions(),
4014            "at this point we should be out of regions");
4015 
4016     if (_cm->verbose_low()) {
4017       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4018     }
4019 
4020     // Try to reduce the number of available SATB buffers so that
4021     // remark has less work to do.
4022     drain_satb_buffers();
4023   }
4024 
4025   // Since we've done everything else, we can now totally drain the
4026   // local queue and global stack.
4027   drain_local_queue(false);
4028   drain_global_stack(false);
4029 
4030   // Attempt at work stealing from other task's queues.
4031   if (do_stealing && !has_aborted()) {
4032     // We have not aborted. This means that we have finished all that
4033     // we could. Let's try to do some stealing...
4034 
4035     // We cannot check whether the global stack is empty, since other
4036     // tasks might be pushing objects to it concurrently.
4037     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4038            "only way to reach here");
4039 
4040     if (_cm->verbose_low()) {
4041       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4042     }
4043 
4044     while (!has_aborted()) {
4045       oop obj;
4046       statsOnly( ++_steal_attempts );
4047 
4048       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4049         if (_cm->verbose_medium()) {
4050           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4051                                  _task_id, (void*) obj);
4052         }
4053 
4054         statsOnly( ++_steals );
4055 
4056         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4057                "any stolen object should be marked");
4058         scan_object(obj);
4059 
4060         // And since we're towards the end, let's totally drain the
4061         // local queue and global stack.
4062         drain_local_queue(false);
4063         drain_global_stack(false);
4064       } else {
4065         break;
4066       }
4067     }
4068   }
4069 
4070   // If we are about to wrap up and go into termination, check if we
4071   // should raise the overflow flag.
4072   if (do_termination && !has_aborted()) {
4073     if (_cm->force_overflow()->should_force()) {
4074       _cm->set_has_overflown();
4075       regular_clock_call();
4076     }
4077   }
4078 
4079   // We still haven't aborted. Now, let's try to get into the
4080   // termination protocol.
4081   if (do_termination && !has_aborted()) {
4082     // We cannot check whether the global stack is empty, since other
4083     // tasks might be concurrently pushing objects on it.
4084     // Separated the asserts so that we know which one fires.
4085     assert(_cm->out_of_regions(), "only way to reach here");
4086     assert(_task_queue->size() == 0, "only way to reach here");
4087 
4088     if (_cm->verbose_low()) {
4089       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4090     }
4091 
4092     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4093     // The CMTask class also extends the TerminatorTerminator class,
4094     // hence its should_exit_termination() method will also decide
4095     // whether to exit the termination protocol or not.
4096     bool finished = _cm->terminator()->offer_termination(this);
4097     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4098     _termination_time_ms +=
4099       termination_end_time_ms - _termination_start_time_ms;
4100 
4101     if (finished) {
4102       // We're all done.
4103 
4104       if (_task_id == 0) {
4105         // let's allow task 0 to do this
4106         if (concurrent()) {
4107           assert(_cm->concurrent_marking_in_progress(), "invariant");
4108           // we need to set this to false before the next
4109           // safepoint. This way we ensure that the marking phase
4110           // doesn't observe any more heap expansions.
4111           _cm->clear_concurrent_marking_in_progress();
4112         }
4113       }
4114 
4115       // We can now guarantee that the global stack is empty, since
4116       // all other tasks have finished. We separated the guarantees so
4117       // that, if a condition is false, we can immediately find out
4118       // which one.
4119       guarantee(_cm->out_of_regions(), "only way to reach here");
4120       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4121       guarantee(_task_queue->size() == 0, "only way to reach here");
4122       guarantee(!_cm->has_overflown(), "only way to reach here");
4123       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4124 
4125       if (_cm->verbose_low()) {
4126         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4127       }
4128     } else {
4129       // Apparently there's more work to do. Let's abort this task. It
4130       // will restart it and we can hopefully find more things to do.
4131 
4132       if (_cm->verbose_low()) {
4133         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4134                                _task_id);
4135       }
4136 
4137       set_has_aborted();
4138       statsOnly( ++_aborted_termination );
4139     }
4140   }
4141 
4142   // Mainly for debugging purposes to make sure that a pointer to the
4143   // closure which was statically allocated in this frame doesn't
4144   // escape it by accident.
4145   set_cm_oop_closure(NULL);
4146   double end_time_ms = os::elapsedVTime() * 1000.0;
4147   double elapsed_time_ms = end_time_ms - _start_time_ms;
4148   // Update the step history.
4149   _step_times_ms.add(elapsed_time_ms);
4150 
4151   if (has_aborted()) {
4152     // The task was aborted for some reason.
4153 
4154     statsOnly( ++_aborted );
4155 
4156     if (_has_timed_out) {
4157       double diff_ms = elapsed_time_ms - _time_target_ms;
4158       // Keep statistics of how well we did with respect to hitting
4159       // our target only if we actually timed out (if we aborted for
4160       // other reasons, then the results might get skewed).
4161       _marking_step_diffs_ms.add(diff_ms);
4162     }
4163 
4164     if (_cm->has_overflown()) {
4165       // This is the interesting one. We aborted because a global
4166       // overflow was raised. This means we have to restart the
4167       // marking phase and start iterating over regions. However, in
4168       // order to do this we have to make sure that all tasks stop
4169       // what they are doing and re-initialise in a safe manner. We
4170       // will achieve this with the use of two barrier sync points.
4171 
4172       if (_cm->verbose_low()) {
4173         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4174       }
4175 
4176       _cm->enter_first_sync_barrier(_task_id);
4177       // When we exit this sync barrier we know that all tasks have
4178       // stopped doing marking work. So, it's now safe to
4179       // re-initialise our data structures. At the end of this method,
4180       // task 0 will clear the global data structures.
4181 
4182       statsOnly( ++_aborted_overflow );
4183 
4184       // We clear the local state of this task...
4185       clear_region_fields();
4186 
4187       // ...and enter the second barrier.
4188       _cm->enter_second_sync_barrier(_task_id);
4189       // At this point everything has bee re-initialised and we're
4190       // ready to restart.
4191     }
4192 
4193     if (_cm->verbose_low()) {
4194       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4195                              "elapsed = %1.2lfms <<<<<<<<<<",
4196                              _task_id, _time_target_ms, elapsed_time_ms);
4197       if (_cm->has_aborted()) {
4198         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4199                                _task_id);
4200       }
4201     }
4202   } else {
4203     if (_cm->verbose_low()) {
4204       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4205                              "elapsed = %1.2lfms <<<<<<<<<<",
4206                              _task_id, _time_target_ms, elapsed_time_ms);
4207     }
4208   }
4209 
4210   _claimed = false;
4211 }
4212 
4213 CMTask::CMTask(int task_id,
4214                ConcurrentMark* cm,
4215                size_t* marked_bytes,
4216                BitMap* card_bm,
4217                CMTaskQueue* task_queue,
4218                CMTaskQueueSet* task_queues)
4219   : _g1h(G1CollectedHeap::heap()),
4220     _task_id(task_id), _cm(cm),
4221     _claimed(false),
4222     _nextMarkBitMap(NULL), _hash_seed(17),
4223     _task_queue(task_queue),
4224     _task_queues(task_queues),
4225     _cm_oop_closure(NULL),
4226     _marked_bytes_array(marked_bytes),
4227     _card_bm(card_bm) {
4228   guarantee(task_queue != NULL, "invariant");
4229   guarantee(task_queues != NULL, "invariant");
4230 
4231   statsOnly( _clock_due_to_scanning = 0;
4232              _clock_due_to_marking  = 0 );
4233 
4234   _marking_step_diffs_ms.add(0.5);
4235 }
4236 
4237 // These are formatting macros that are used below to ensure
4238 // consistent formatting. The *_H_* versions are used to format the
4239 // header for a particular value and they should be kept consistent
4240 // with the corresponding macro. Also note that most of the macros add
4241 // the necessary white space (as a prefix) which makes them a bit
4242 // easier to compose.
4243 
4244 // All the output lines are prefixed with this string to be able to
4245 // identify them easily in a large log file.
4246 #define G1PPRL_LINE_PREFIX            "###"
4247 
4248 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4249 #ifdef _LP64
4250 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4251 #else // _LP64
4252 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4253 #endif // _LP64
4254 
4255 // For per-region info
4256 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4257 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4258 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4259 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4260 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4261 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4262 
4263 // For summary info
4264 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4265 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4266 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4267 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4268 
4269 G1PrintRegionLivenessInfoClosure::
4270 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4271   : _out(out),
4272     _total_used_bytes(0), _total_capacity_bytes(0),
4273     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4274     _hum_used_bytes(0), _hum_capacity_bytes(0),
4275     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4276   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4277   MemRegion g1_committed = g1h->g1_committed();
4278   MemRegion g1_reserved = g1h->g1_reserved();
4279   double now = os::elapsedTime();
4280 
4281   // Print the header of the output.
4282   _out->cr();
4283   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4284   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4285                  G1PPRL_SUM_ADDR_FORMAT("committed")
4286                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4287                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4288                  g1_committed.start(), g1_committed.end(),
4289                  g1_reserved.start(), g1_reserved.end(),
4290                  HeapRegion::GrainBytes);
4291   _out->print_cr(G1PPRL_LINE_PREFIX);
4292   _out->print_cr(G1PPRL_LINE_PREFIX
4293                  G1PPRL_TYPE_H_FORMAT
4294                  G1PPRL_ADDR_BASE_H_FORMAT
4295                  G1PPRL_BYTE_H_FORMAT
4296                  G1PPRL_BYTE_H_FORMAT
4297                  G1PPRL_BYTE_H_FORMAT
4298                  G1PPRL_DOUBLE_H_FORMAT,
4299                  "type", "address-range",
4300                  "used", "prev-live", "next-live", "gc-eff");
4301   _out->print_cr(G1PPRL_LINE_PREFIX
4302                  G1PPRL_TYPE_H_FORMAT
4303                  G1PPRL_ADDR_BASE_H_FORMAT
4304                  G1PPRL_BYTE_H_FORMAT
4305                  G1PPRL_BYTE_H_FORMAT
4306                  G1PPRL_BYTE_H_FORMAT
4307                  G1PPRL_DOUBLE_H_FORMAT,
4308                  "", "",
4309                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4310 }
4311 
4312 // It takes as a parameter a reference to one of the _hum_* fields, it
4313 // deduces the corresponding value for a region in a humongous region
4314 // series (either the region size, or what's left if the _hum_* field
4315 // is < the region size), and updates the _hum_* field accordingly.
4316 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4317   size_t bytes = 0;
4318   // The > 0 check is to deal with the prev and next live bytes which
4319   // could be 0.
4320   if (*hum_bytes > 0) {
4321     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4322     *hum_bytes -= bytes;
4323   }
4324   return bytes;
4325 }
4326 
4327 // It deduces the values for a region in a humongous region series
4328 // from the _hum_* fields and updates those accordingly. It assumes
4329 // that that _hum_* fields have already been set up from the "starts
4330 // humongous" region and we visit the regions in address order.
4331 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4332                                                      size_t* capacity_bytes,
4333                                                      size_t* prev_live_bytes,
4334                                                      size_t* next_live_bytes) {
4335   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4336   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4337   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4338   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4339   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4340 }
4341 
4342 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4343   const char* type = "";
4344   HeapWord* bottom       = r->bottom();
4345   HeapWord* end          = r->end();
4346   size_t capacity_bytes  = r->capacity();
4347   size_t used_bytes      = r->used();
4348   size_t prev_live_bytes = r->live_bytes();
4349   size_t next_live_bytes = r->next_live_bytes();
4350   double gc_eff          = r->gc_efficiency();
4351   if (r->used() == 0) {
4352     type = "FREE";
4353   } else if (r->is_survivor()) {
4354     type = "SURV";
4355   } else if (r->is_young()) {
4356     type = "EDEN";
4357   } else if (r->startsHumongous()) {
4358     type = "HUMS";
4359 
4360     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4361            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4362            "they should have been zeroed after the last time we used them");
4363     // Set up the _hum_* fields.
4364     _hum_capacity_bytes  = capacity_bytes;
4365     _hum_used_bytes      = used_bytes;
4366     _hum_prev_live_bytes = prev_live_bytes;
4367     _hum_next_live_bytes = next_live_bytes;
4368     get_hum_bytes(&used_bytes, &capacity_bytes,
4369                   &prev_live_bytes, &next_live_bytes);
4370     end = bottom + HeapRegion::GrainWords;
4371   } else if (r->continuesHumongous()) {
4372     type = "HUMC";
4373     get_hum_bytes(&used_bytes, &capacity_bytes,
4374                   &prev_live_bytes, &next_live_bytes);
4375     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4376   } else {
4377     type = "OLD";
4378   }
4379 
4380   _total_used_bytes      += used_bytes;
4381   _total_capacity_bytes  += capacity_bytes;
4382   _total_prev_live_bytes += prev_live_bytes;
4383   _total_next_live_bytes += next_live_bytes;
4384 
4385   // Print a line for this particular region.
4386   _out->print_cr(G1PPRL_LINE_PREFIX
4387                  G1PPRL_TYPE_FORMAT
4388                  G1PPRL_ADDR_BASE_FORMAT
4389                  G1PPRL_BYTE_FORMAT
4390                  G1PPRL_BYTE_FORMAT
4391                  G1PPRL_BYTE_FORMAT
4392                  G1PPRL_DOUBLE_FORMAT,
4393                  type, bottom, end,
4394                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4395 
4396   return false;
4397 }
4398 
4399 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4400   // Print the footer of the output.
4401   _out->print_cr(G1PPRL_LINE_PREFIX);
4402   _out->print_cr(G1PPRL_LINE_PREFIX
4403                  " SUMMARY"
4404                  G1PPRL_SUM_MB_FORMAT("capacity")
4405                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4406                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4407                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4408                  bytes_to_mb(_total_capacity_bytes),
4409                  bytes_to_mb(_total_used_bytes),
4410                  perc(_total_used_bytes, _total_capacity_bytes),
4411                  bytes_to_mb(_total_prev_live_bytes),
4412                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4413                  bytes_to_mb(_total_next_live_bytes),
4414                  perc(_total_next_live_bytes, _total_capacity_bytes));
4415   _out->cr();
4416 }