1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  30 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  33 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  34 #include "gc_implementation/g1/g1Log.hpp"
  35 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  36 #include "gc_implementation/g1/g1RemSet.hpp"
  37 #include "gc_implementation/g1/g1StringDedup.hpp"
  38 #include "gc_implementation/g1/heapRegion.inline.hpp"
  39 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  40 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  41 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  42 #include "gc_implementation/shared/vmGCOperations.hpp"
  43 #include "gc_implementation/shared/gcTimer.hpp"
  44 #include "gc_implementation/shared/gcTrace.hpp"
  45 #include "gc_implementation/shared/gcTraceTime.hpp"
  46 #include "memory/allocation.hpp"
  47 #include "memory/genOopClosures.inline.hpp"
  48 #include "memory/referencePolicy.hpp"
  49 #include "memory/resourceArea.hpp"
  50 #include "memory/strongRootsScope.hpp"
  51 #include "oops/oop.inline.hpp"
  52 #include "runtime/handles.inline.hpp"
  53 #include "runtime/java.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/prefetch.inline.hpp"
  56 #include "services/memTracker.hpp"
  57 #include "utilities/taskqueue.inline.hpp"
  58 
  59 // Concurrent marking bit map wrapper
  60 
  61 CMBitMapRO::CMBitMapRO(int shifter) :
  62   _bm(),
  63   _shifter(shifter) {
  64   _bmStartWord = 0;
  65   _bmWordSize = 0;
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  69                                                const HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  87                                                  const HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 #ifndef PRODUCT
 107 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 108   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 109   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 110          "size inconsistency");
 111   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 112          _bmWordSize  == heap_rs.word_size();
 113 }
 114 #endif
 115 
 116 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 117   _bm.print_on_error(st, prefix);
 118 }
 119 
 120 size_t CMBitMap::compute_size(size_t heap_size) {
 121   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 122 }
 123 
 124 size_t CMBitMap::mark_distance() {
 125   return MinObjAlignmentInBytes * BitsPerByte;
 126 }
 127 
 128 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 129   _bmStartWord = heap.start();
 130   _bmWordSize = heap.word_size();
 131 
 132   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 133   _bm.set_size(_bmWordSize >> _shifter);
 134 
 135   storage->set_mapping_changed_listener(&_listener);
 136 }
 137 
 138 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 139   if (zero_filled) {
 140     return;
 141   }
 142   // We need to clear the bitmap on commit, removing any existing information.
 143   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 144   _bm->clearRange(mr);
 145 }
 146 
 147 // Closure used for clearing the given mark bitmap.
 148 class ClearBitmapHRClosure : public HeapRegionClosure {
 149  private:
 150   ConcurrentMark* _cm;
 151   CMBitMap* _bitmap;
 152   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 153  public:
 154   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 155     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 156   }
 157 
 158   virtual bool doHeapRegion(HeapRegion* r) {
 159     size_t const chunk_size_in_words = M / HeapWordSize;
 160 
 161     HeapWord* cur = r->bottom();
 162     HeapWord* const end = r->end();
 163 
 164     while (cur < end) {
 165       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 166       _bitmap->clearRange(mr);
 167 
 168       cur += chunk_size_in_words;
 169 
 170       // Abort iteration if after yielding the marking has been aborted.
 171       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 172         return true;
 173       }
 174       // Repeat the asserts from before the start of the closure. We will do them
 175       // as asserts here to minimize their overhead on the product. However, we
 176       // will have them as guarantees at the beginning / end of the bitmap
 177       // clearing to get some checking in the product.
 178       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 179       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 180     }
 181 
 182     return false;
 183   }
 184 };
 185 
 186 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 187   ClearBitmapHRClosure* _cl;
 188   HeapRegionClaimer     _hrclaimer;
 189   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 190 
 191 public:
 192   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 193       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 194 
 195   void work(uint worker_id) {
 196     SuspendibleThreadSetJoiner sts_join(_suspendible);
 197     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 198   }
 199 };
 200 
 201 void CMBitMap::clearAll() {
 202   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 203   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 204   uint n_workers = g1h->workers()->active_workers();
 205   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 206   g1h->workers()->run_task(&task);
 207   guarantee(cl.complete(), "Must have completed iteration.");
 208   return;
 209 }
 210 
 211 void CMBitMap::markRange(MemRegion mr) {
 212   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 213   assert(!mr.is_empty(), "unexpected empty region");
 214   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 215           ((HeapWord *) mr.end())),
 216          "markRange memory region end is not card aligned");
 217   // convert address range into offset range
 218   _bm.at_put_range(heapWordToOffset(mr.start()),
 219                    heapWordToOffset(mr.end()), true);
 220 }
 221 
 222 void CMBitMap::clearRange(MemRegion mr) {
 223   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 224   assert(!mr.is_empty(), "unexpected empty region");
 225   // convert address range into offset range
 226   _bm.at_put_range(heapWordToOffset(mr.start()),
 227                    heapWordToOffset(mr.end()), false);
 228 }
 229 
 230 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 231                                             HeapWord* end_addr) {
 232   HeapWord* start = getNextMarkedWordAddress(addr);
 233   start = MIN2(start, end_addr);
 234   HeapWord* end   = getNextUnmarkedWordAddress(start);
 235   end = MIN2(end, end_addr);
 236   assert(start <= end, "Consistency check");
 237   MemRegion mr(start, end);
 238   if (!mr.is_empty()) {
 239     clearRange(mr);
 240   }
 241   return mr;
 242 }
 243 
 244 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 245   _base(NULL), _cm(cm)
 246 #ifdef ASSERT
 247   , _drain_in_progress(false)
 248   , _drain_in_progress_yields(false)
 249 #endif
 250 {}
 251 
 252 bool CMMarkStack::allocate(size_t capacity) {
 253   // allocate a stack of the requisite depth
 254   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 255   if (!rs.is_reserved()) {
 256     warning("ConcurrentMark MarkStack allocation failure");
 257     return false;
 258   }
 259   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 260   if (!_virtual_space.initialize(rs, rs.size())) {
 261     warning("ConcurrentMark MarkStack backing store failure");
 262     // Release the virtual memory reserved for the marking stack
 263     rs.release();
 264     return false;
 265   }
 266   assert(_virtual_space.committed_size() == rs.size(),
 267          "Didn't reserve backing store for all of ConcurrentMark stack?");
 268   _base = (oop*) _virtual_space.low();
 269   setEmpty();
 270   _capacity = (jint) capacity;
 271   _saved_index = -1;
 272   _should_expand = false;
 273   NOT_PRODUCT(_max_depth = 0);
 274   return true;
 275 }
 276 
 277 void CMMarkStack::expand() {
 278   // Called, during remark, if we've overflown the marking stack during marking.
 279   assert(isEmpty(), "stack should been emptied while handling overflow");
 280   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 281   // Clear expansion flag
 282   _should_expand = false;
 283   if (_capacity == (jint) MarkStackSizeMax) {
 284     if (PrintGCDetails && Verbose) {
 285       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 286     }
 287     return;
 288   }
 289   // Double capacity if possible
 290   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 291   // Do not give up existing stack until we have managed to
 292   // get the double capacity that we desired.
 293   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 294                                                            sizeof(oop)));
 295   if (rs.is_reserved()) {
 296     // Release the backing store associated with old stack
 297     _virtual_space.release();
 298     // Reinitialize virtual space for new stack
 299     if (!_virtual_space.initialize(rs, rs.size())) {
 300       fatal("Not enough swap for expanded marking stack capacity");
 301     }
 302     _base = (oop*)(_virtual_space.low());
 303     _index = 0;
 304     _capacity = new_capacity;
 305   } else {
 306     if (PrintGCDetails && Verbose) {
 307       // Failed to double capacity, continue;
 308       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 309                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 310                           _capacity / K, new_capacity / K);
 311     }
 312   }
 313 }
 314 
 315 void CMMarkStack::set_should_expand() {
 316   // If we're resetting the marking state because of an
 317   // marking stack overflow, record that we should, if
 318   // possible, expand the stack.
 319   _should_expand = _cm->has_overflown();
 320 }
 321 
 322 CMMarkStack::~CMMarkStack() {
 323   if (_base != NULL) {
 324     _base = NULL;
 325     _virtual_space.release();
 326   }
 327 }
 328 
 329 void CMMarkStack::par_push(oop ptr) {
 330   while (true) {
 331     if (isFull()) {
 332       _overflow = true;
 333       return;
 334     }
 335     // Otherwise...
 336     jint index = _index;
 337     jint next_index = index+1;
 338     jint res = Atomic::cmpxchg(next_index, &_index, index);
 339     if (res == index) {
 340       _base[index] = ptr;
 341       // Note that we don't maintain this atomically.  We could, but it
 342       // doesn't seem necessary.
 343       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 344       return;
 345     }
 346     // Otherwise, we need to try again.
 347   }
 348 }
 349 
 350 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 351   while (true) {
 352     if (isFull()) {
 353       _overflow = true;
 354       return;
 355     }
 356     // Otherwise...
 357     jint index = _index;
 358     jint next_index = index + n;
 359     if (next_index > _capacity) {
 360       _overflow = true;
 361       return;
 362     }
 363     jint res = Atomic::cmpxchg(next_index, &_index, index);
 364     if (res == index) {
 365       for (int i = 0; i < n; i++) {
 366         int  ind = index + i;
 367         assert(ind < _capacity, "By overflow test above.");
 368         _base[ind] = ptr_arr[i];
 369       }
 370       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 371       return;
 372     }
 373     // Otherwise, we need to try again.
 374   }
 375 }
 376 
 377 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 378   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 379   jint start = _index;
 380   jint next_index = start + n;
 381   if (next_index > _capacity) {
 382     _overflow = true;
 383     return;
 384   }
 385   // Otherwise.
 386   _index = next_index;
 387   for (int i = 0; i < n; i++) {
 388     int ind = start + i;
 389     assert(ind < _capacity, "By overflow test above.");
 390     _base[ind] = ptr_arr[i];
 391   }
 392   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 393 }
 394 
 395 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 396   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 397   jint index = _index;
 398   if (index == 0) {
 399     *n = 0;
 400     return false;
 401   } else {
 402     int k = MIN2(max, index);
 403     jint  new_ind = index - k;
 404     for (int j = 0; j < k; j++) {
 405       ptr_arr[j] = _base[new_ind + j];
 406     }
 407     _index = new_ind;
 408     *n = k;
 409     return true;
 410   }
 411 }
 412 
 413 template<class OopClosureClass>
 414 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 415   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 416          || SafepointSynchronize::is_at_safepoint(),
 417          "Drain recursion must be yield-safe.");
 418   bool res = true;
 419   debug_only(_drain_in_progress = true);
 420   debug_only(_drain_in_progress_yields = yield_after);
 421   while (!isEmpty()) {
 422     oop newOop = pop();
 423     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 424     assert(newOop->is_oop(), "Expected an oop");
 425     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 426            "only grey objects on this stack");
 427     newOop->oop_iterate(cl);
 428     if (yield_after && _cm->do_yield_check()) {
 429       res = false;
 430       break;
 431     }
 432   }
 433   debug_only(_drain_in_progress = false);
 434   return res;
 435 }
 436 
 437 void CMMarkStack::note_start_of_gc() {
 438   assert(_saved_index == -1,
 439          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 440   _saved_index = _index;
 441 }
 442 
 443 void CMMarkStack::note_end_of_gc() {
 444   // This is intentionally a guarantee, instead of an assert. If we
 445   // accidentally add something to the mark stack during GC, it
 446   // will be a correctness issue so it's better if we crash. we'll
 447   // only check this once per GC anyway, so it won't be a performance
 448   // issue in any way.
 449   guarantee(_saved_index == _index,
 450             err_msg("saved index: %d index: %d", _saved_index, _index));
 451   _saved_index = -1;
 452 }
 453 
 454 void CMMarkStack::oops_do(OopClosure* f) {
 455   assert(_saved_index == _index,
 456          err_msg("saved index: %d index: %d", _saved_index, _index));
 457   for (int i = 0; i < _index; i += 1) {
 458     f->do_oop(&_base[i]);
 459   }
 460 }
 461 
 462 CMRootRegions::CMRootRegions() :
 463   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 464   _should_abort(false),  _next_survivor(NULL) { }
 465 
 466 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 467   _young_list = g1h->young_list();
 468   _cm = cm;
 469 }
 470 
 471 void CMRootRegions::prepare_for_scan() {
 472   assert(!scan_in_progress(), "pre-condition");
 473 
 474   // Currently, only survivors can be root regions.
 475   assert(_next_survivor == NULL, "pre-condition");
 476   _next_survivor = _young_list->first_survivor_region();
 477   _scan_in_progress = (_next_survivor != NULL);
 478   _should_abort = false;
 479 }
 480 
 481 HeapRegion* CMRootRegions::claim_next() {
 482   if (_should_abort) {
 483     // If someone has set the should_abort flag, we return NULL to
 484     // force the caller to bail out of their loop.
 485     return NULL;
 486   }
 487 
 488   // Currently, only survivors can be root regions.
 489   HeapRegion* res = _next_survivor;
 490   if (res != NULL) {
 491     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 492     // Read it again in case it changed while we were waiting for the lock.
 493     res = _next_survivor;
 494     if (res != NULL) {
 495       if (res == _young_list->last_survivor_region()) {
 496         // We just claimed the last survivor so store NULL to indicate
 497         // that we're done.
 498         _next_survivor = NULL;
 499       } else {
 500         _next_survivor = res->get_next_young_region();
 501       }
 502     } else {
 503       // Someone else claimed the last survivor while we were trying
 504       // to take the lock so nothing else to do.
 505     }
 506   }
 507   assert(res == NULL || res->is_survivor(), "post-condition");
 508 
 509   return res;
 510 }
 511 
 512 void CMRootRegions::scan_finished() {
 513   assert(scan_in_progress(), "pre-condition");
 514 
 515   // Currently, only survivors can be root regions.
 516   if (!_should_abort) {
 517     assert(_next_survivor == NULL, "we should have claimed all survivors");
 518   }
 519   _next_survivor = NULL;
 520 
 521   {
 522     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 523     _scan_in_progress = false;
 524     RootRegionScan_lock->notify_all();
 525   }
 526 }
 527 
 528 bool CMRootRegions::wait_until_scan_finished() {
 529   if (!scan_in_progress()) return false;
 530 
 531   {
 532     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 533     while (scan_in_progress()) {
 534       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 535     }
 536   }
 537   return true;
 538 }
 539 
 540 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 541 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 542 #endif // _MSC_VER
 543 
 544 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 545   return MAX2((n_par_threads + 2) / 4, 1U);
 546 }
 547 
 548 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 549   _g1h(g1h),
 550   _markBitMap1(),
 551   _markBitMap2(),
 552   _parallel_marking_threads(0),
 553   _max_parallel_marking_threads(0),
 554   _sleep_factor(0.0),
 555   _marking_task_overhead(1.0),
 556   _cleanup_sleep_factor(0.0),
 557   _cleanup_task_overhead(1.0),
 558   _cleanup_list("Cleanup List"),
 559   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 560   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 561             CardTableModRefBS::card_shift,
 562             false /* in_resource_area*/),
 563 
 564   _prevMarkBitMap(&_markBitMap1),
 565   _nextMarkBitMap(&_markBitMap2),
 566 
 567   _markStack(this),
 568   // _finger set in set_non_marking_state
 569 
 570   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 571   // _active_tasks set in set_non_marking_state
 572   // _tasks set inside the constructor
 573   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 574   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 575 
 576   _has_overflown(false),
 577   _concurrent(false),
 578   _has_aborted(false),
 579   _aborted_gc_id(GCId::undefined()),
 580   _restart_for_overflow(false),
 581   _concurrent_marking_in_progress(false),
 582 
 583   // _verbose_level set below
 584 
 585   _init_times(),
 586   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 587   _cleanup_times(),
 588   _total_counting_time(0.0),
 589   _total_rs_scrub_time(0.0),
 590 
 591   _parallel_workers(NULL),
 592 
 593   _count_card_bitmaps(NULL),
 594   _count_marked_bytes(NULL),
 595   _completed_initialization(false) {
 596   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 597   if (verbose_level < no_verbose) {
 598     verbose_level = no_verbose;
 599   }
 600   if (verbose_level > high_verbose) {
 601     verbose_level = high_verbose;
 602   }
 603   _verbose_level = verbose_level;
 604 
 605   if (verbose_low()) {
 606     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 607                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 608   }
 609 
 610   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 611   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 612 
 613   // Create & start a ConcurrentMark thread.
 614   _cmThread = new ConcurrentMarkThread(this);
 615   assert(cmThread() != NULL, "CM Thread should have been created");
 616   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 617   if (_cmThread->osthread() == NULL) {
 618       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 619   }
 620 
 621   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 622   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 623   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 624 
 625   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 626   satb_qs.set_buffer_size(G1SATBBufferSize);
 627 
 628   _root_regions.init(_g1h, this);
 629 
 630   if (ConcGCThreads > ParallelGCThreads) {
 631     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 632             "than ParallelGCThreads (" UINTX_FORMAT ").",
 633             ConcGCThreads, ParallelGCThreads);
 634     return;
 635   }
 636   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 637     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 638     // if both are set
 639     _sleep_factor             = 0.0;
 640     _marking_task_overhead    = 1.0;
 641   } else if (G1MarkingOverheadPercent > 0) {
 642     // We will calculate the number of parallel marking threads based
 643     // on a target overhead with respect to the soft real-time goal
 644     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 645     double overall_cm_overhead =
 646       (double) MaxGCPauseMillis * marking_overhead /
 647       (double) GCPauseIntervalMillis;
 648     double cpu_ratio = 1.0 / (double) os::processor_count();
 649     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 650     double marking_task_overhead =
 651       overall_cm_overhead / marking_thread_num *
 652                                               (double) os::processor_count();
 653     double sleep_factor =
 654                        (1.0 - marking_task_overhead) / marking_task_overhead;
 655 
 656     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 657     _sleep_factor             = sleep_factor;
 658     _marking_task_overhead    = marking_task_overhead;
 659   } else {
 660     // Calculate the number of parallel marking threads by scaling
 661     // the number of parallel GC threads.
 662     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 663     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 664     _sleep_factor             = 0.0;
 665     _marking_task_overhead    = 1.0;
 666   }
 667 
 668   assert(ConcGCThreads > 0, "Should have been set");
 669   _parallel_marking_threads = (uint) ConcGCThreads;
 670   _max_parallel_marking_threads = _parallel_marking_threads;
 671 
 672   if (parallel_marking_threads() > 1) {
 673     _cleanup_task_overhead = 1.0;
 674   } else {
 675     _cleanup_task_overhead = marking_task_overhead();
 676   }
 677   _cleanup_sleep_factor =
 678                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 679 
 680 #if 0
 681   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 682   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 683   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 684   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 685   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 686 #endif
 687 
 688   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 689        _max_parallel_marking_threads, false, true);
 690   if (_parallel_workers == NULL) {
 691     vm_exit_during_initialization("Failed necessary allocation.");
 692   } else {
 693     _parallel_workers->initialize_workers();
 694   }
 695 
 696   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 697     size_t mark_stack_size =
 698       MIN2(MarkStackSizeMax,
 699           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 700     // Verify that the calculated value for MarkStackSize is in range.
 701     // It would be nice to use the private utility routine from Arguments.
 702     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 703       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 704               "must be between 1 and " SIZE_FORMAT,
 705               mark_stack_size, MarkStackSizeMax);
 706       return;
 707     }
 708     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 709   } else {
 710     // Verify MarkStackSize is in range.
 711     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 712       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 713         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 714           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 715                   "must be between 1 and " SIZE_FORMAT,
 716                   MarkStackSize, MarkStackSizeMax);
 717           return;
 718         }
 719       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 720         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 721           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 722                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 723                   MarkStackSize, MarkStackSizeMax);
 724           return;
 725         }
 726       }
 727     }
 728   }
 729 
 730   if (!_markStack.allocate(MarkStackSize)) {
 731     warning("Failed to allocate CM marking stack");
 732     return;
 733   }
 734 
 735   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 736   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 737 
 738   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 739   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 740 
 741   BitMap::idx_t card_bm_size = _card_bm.size();
 742 
 743   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 744   _active_tasks = _max_worker_id;
 745 
 746   uint max_regions = _g1h->max_regions();
 747   for (uint i = 0; i < _max_worker_id; ++i) {
 748     CMTaskQueue* task_queue = new CMTaskQueue();
 749     task_queue->initialize();
 750     _task_queues->register_queue(i, task_queue);
 751 
 752     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 753     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 754 
 755     _tasks[i] = new CMTask(i, this,
 756                            _count_marked_bytes[i],
 757                            &_count_card_bitmaps[i],
 758                            task_queue, _task_queues);
 759 
 760     _accum_task_vtime[i] = 0.0;
 761   }
 762 
 763   // Calculate the card number for the bottom of the heap. Used
 764   // in biasing indexes into the accounting card bitmaps.
 765   _heap_bottom_card_num =
 766     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 767                                 CardTableModRefBS::card_shift);
 768 
 769   // Clear all the liveness counting data
 770   clear_all_count_data();
 771 
 772   // so that the call below can read a sensible value
 773   _heap_start = g1h->reserved_region().start();
 774   set_non_marking_state();
 775   _completed_initialization = true;
 776 }
 777 
 778 void ConcurrentMark::reset() {
 779   // Starting values for these two. This should be called in a STW
 780   // phase.
 781   MemRegion reserved = _g1h->g1_reserved();
 782   _heap_start = reserved.start();
 783   _heap_end   = reserved.end();
 784 
 785   // Separated the asserts so that we know which one fires.
 786   assert(_heap_start != NULL, "heap bounds should look ok");
 787   assert(_heap_end != NULL, "heap bounds should look ok");
 788   assert(_heap_start < _heap_end, "heap bounds should look ok");
 789 
 790   // Reset all the marking data structures and any necessary flags
 791   reset_marking_state();
 792 
 793   if (verbose_low()) {
 794     gclog_or_tty->print_cr("[global] resetting");
 795   }
 796 
 797   // We do reset all of them, since different phases will use
 798   // different number of active threads. So, it's easiest to have all
 799   // of them ready.
 800   for (uint i = 0; i < _max_worker_id; ++i) {
 801     _tasks[i]->reset(_nextMarkBitMap);
 802   }
 803 
 804   // we need this to make sure that the flag is on during the evac
 805   // pause with initial mark piggy-backed
 806   set_concurrent_marking_in_progress();
 807 }
 808 
 809 
 810 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 811   _markStack.set_should_expand();
 812   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 813   if (clear_overflow) {
 814     clear_has_overflown();
 815   } else {
 816     assert(has_overflown(), "pre-condition");
 817   }
 818   _finger = _heap_start;
 819 
 820   for (uint i = 0; i < _max_worker_id; ++i) {
 821     CMTaskQueue* queue = _task_queues->queue(i);
 822     queue->set_empty();
 823   }
 824 }
 825 
 826 void ConcurrentMark::set_concurrency(uint active_tasks) {
 827   assert(active_tasks <= _max_worker_id, "we should not have more");
 828 
 829   _active_tasks = active_tasks;
 830   // Need to update the three data structures below according to the
 831   // number of active threads for this phase.
 832   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 833   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 834   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 835 }
 836 
 837 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 838   set_concurrency(active_tasks);
 839 
 840   _concurrent = concurrent;
 841   // We propagate this to all tasks, not just the active ones.
 842   for (uint i = 0; i < _max_worker_id; ++i)
 843     _tasks[i]->set_concurrent(concurrent);
 844 
 845   if (concurrent) {
 846     set_concurrent_marking_in_progress();
 847   } else {
 848     // We currently assume that the concurrent flag has been set to
 849     // false before we start remark. At this point we should also be
 850     // in a STW phase.
 851     assert(!concurrent_marking_in_progress(), "invariant");
 852     assert(out_of_regions(),
 853            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 854                    p2i(_finger), p2i(_heap_end)));
 855   }
 856 }
 857 
 858 void ConcurrentMark::set_non_marking_state() {
 859   // We set the global marking state to some default values when we're
 860   // not doing marking.
 861   reset_marking_state();
 862   _active_tasks = 0;
 863   clear_concurrent_marking_in_progress();
 864 }
 865 
 866 ConcurrentMark::~ConcurrentMark() {
 867   // The ConcurrentMark instance is never freed.
 868   ShouldNotReachHere();
 869 }
 870 
 871 void ConcurrentMark::clearNextBitmap() {
 872   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 873 
 874   // Make sure that the concurrent mark thread looks to still be in
 875   // the current cycle.
 876   guarantee(cmThread()->during_cycle(), "invariant");
 877 
 878   // We are finishing up the current cycle by clearing the next
 879   // marking bitmap and getting it ready for the next cycle. During
 880   // this time no other cycle can start. So, let's make sure that this
 881   // is the case.
 882   guarantee(!g1h->mark_in_progress(), "invariant");
 883 
 884   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 885   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 886   _parallel_workers->run_task(&task);
 887 
 888   // Clear the liveness counting data. If the marking has been aborted, the abort()
 889   // call already did that.
 890   if (cl.complete()) {
 891     clear_all_count_data();
 892   }
 893 
 894   // Repeat the asserts from above.
 895   guarantee(cmThread()->during_cycle(), "invariant");
 896   guarantee(!g1h->mark_in_progress(), "invariant");
 897 }
 898 
 899 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 900   CMBitMap* _bitmap;
 901   bool _error;
 902  public:
 903   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 904   }
 905 
 906   virtual bool doHeapRegion(HeapRegion* r) {
 907     // This closure can be called concurrently to the mutator, so we must make sure
 908     // that the result of the getNextMarkedWordAddress() call is compared to the
 909     // value passed to it as limit to detect any found bits.
 910     // We can use the region's orig_end() for the limit and the comparison value
 911     // as it always contains the "real" end of the region that never changes and
 912     // has no side effects.
 913     // Due to the latter, there can also be no problem with the compiler generating
 914     // reloads of the orig_end() call.
 915     HeapWord* end = r->orig_end();
 916     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 917   }
 918 };
 919 
 920 bool ConcurrentMark::nextMarkBitmapIsClear() {
 921   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 922   _g1h->heap_region_iterate(&cl);
 923   return cl.complete();
 924 }
 925 
 926 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 927 public:
 928   bool doHeapRegion(HeapRegion* r) {
 929     if (!r->is_continues_humongous()) {
 930       r->note_start_of_marking();
 931     }
 932     return false;
 933   }
 934 };
 935 
 936 void ConcurrentMark::checkpointRootsInitialPre() {
 937   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 938   G1CollectorPolicy* g1p = g1h->g1_policy();
 939 
 940   _has_aborted = false;
 941 
 942   // Initialize marking structures. This has to be done in a STW phase.
 943   reset();
 944 
 945   // For each region note start of marking.
 946   NoteStartOfMarkHRClosure startcl;
 947   g1h->heap_region_iterate(&startcl);
 948 }
 949 
 950 
 951 void ConcurrentMark::checkpointRootsInitialPost() {
 952   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 953 
 954   // If we force an overflow during remark, the remark operation will
 955   // actually abort and we'll restart concurrent marking. If we always
 956   // force an overflow during remark we'll never actually complete the
 957   // marking phase. So, we initialize this here, at the start of the
 958   // cycle, so that at the remaining overflow number will decrease at
 959   // every remark and we'll eventually not need to cause one.
 960   force_overflow_stw()->init();
 961 
 962   // Start Concurrent Marking weak-reference discovery.
 963   ReferenceProcessor* rp = g1h->ref_processor_cm();
 964   // enable ("weak") refs discovery
 965   rp->enable_discovery();
 966   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 967 
 968   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 969   // This is the start of  the marking cycle, we're expected all
 970   // threads to have SATB queues with active set to false.
 971   satb_mq_set.set_active_all_threads(true, /* new active value */
 972                                      false /* expected_active */);
 973 
 974   _root_regions.prepare_for_scan();
 975 
 976   // update_g1_committed() will be called at the end of an evac pause
 977   // when marking is on. So, it's also called at the end of the
 978   // initial-mark pause to update the heap end, if the heap expands
 979   // during it. No need to call it here.
 980 }
 981 
 982 /*
 983  * Notice that in the next two methods, we actually leave the STS
 984  * during the barrier sync and join it immediately afterwards. If we
 985  * do not do this, the following deadlock can occur: one thread could
 986  * be in the barrier sync code, waiting for the other thread to also
 987  * sync up, whereas another one could be trying to yield, while also
 988  * waiting for the other threads to sync up too.
 989  *
 990  * Note, however, that this code is also used during remark and in
 991  * this case we should not attempt to leave / enter the STS, otherwise
 992  * we'll either hit an assert (debug / fastdebug) or deadlock
 993  * (product). So we should only leave / enter the STS if we are
 994  * operating concurrently.
 995  *
 996  * Because the thread that does the sync barrier has left the STS, it
 997  * is possible to be suspended for a Full GC or an evacuation pause
 998  * could occur. This is actually safe, since the entering the sync
 999  * barrier is one of the last things do_marking_step() does, and it
1000  * doesn't manipulate any data structures afterwards.
1001  */
1002 
1003 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1004   bool barrier_aborted;
1005 
1006   if (verbose_low()) {
1007     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
1008   }
1009 
1010   {
1011     SuspendibleThreadSetLeaver sts_leave(concurrent());
1012     barrier_aborted = !_first_overflow_barrier_sync.enter();
1013   }
1014 
1015   // at this point everyone should have synced up and not be doing any
1016   // more work
1017 
1018   if (verbose_low()) {
1019     if (barrier_aborted) {
1020       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1021     } else {
1022       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1023     }
1024   }
1025 
1026   if (barrier_aborted) {
1027     // If the barrier aborted we ignore the overflow condition and
1028     // just abort the whole marking phase as quickly as possible.
1029     return;
1030   }
1031 
1032   // If we're executing the concurrent phase of marking, reset the marking
1033   // state; otherwise the marking state is reset after reference processing,
1034   // during the remark pause.
1035   // If we reset here as a result of an overflow during the remark we will
1036   // see assertion failures from any subsequent set_concurrency_and_phase()
1037   // calls.
1038   if (concurrent()) {
1039     // let the task associated with with worker 0 do this
1040     if (worker_id == 0) {
1041       // task 0 is responsible for clearing the global data structures
1042       // We should be here because of an overflow. During STW we should
1043       // not clear the overflow flag since we rely on it being true when
1044       // we exit this method to abort the pause and restart concurrent
1045       // marking.
1046       reset_marking_state(true /* clear_overflow */);
1047       force_overflow()->update();
1048 
1049       if (G1Log::fine()) {
1050         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1051         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1052       }
1053     }
1054   }
1055 
1056   // after this, each task should reset its own data structures then
1057   // then go into the second barrier
1058 }
1059 
1060 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1061   bool barrier_aborted;
1062 
1063   if (verbose_low()) {
1064     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1065   }
1066 
1067   {
1068     SuspendibleThreadSetLeaver sts_leave(concurrent());
1069     barrier_aborted = !_second_overflow_barrier_sync.enter();
1070   }
1071 
1072   // at this point everything should be re-initialized and ready to go
1073 
1074   if (verbose_low()) {
1075     if (barrier_aborted) {
1076       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1077     } else {
1078       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1079     }
1080   }
1081 }
1082 
1083 #ifndef PRODUCT
1084 void ForceOverflowSettings::init() {
1085   _num_remaining = G1ConcMarkForceOverflow;
1086   _force = false;
1087   update();
1088 }
1089 
1090 void ForceOverflowSettings::update() {
1091   if (_num_remaining > 0) {
1092     _num_remaining -= 1;
1093     _force = true;
1094   } else {
1095     _force = false;
1096   }
1097 }
1098 
1099 bool ForceOverflowSettings::should_force() {
1100   if (_force) {
1101     _force = false;
1102     return true;
1103   } else {
1104     return false;
1105   }
1106 }
1107 #endif // !PRODUCT
1108 
1109 class CMConcurrentMarkingTask: public AbstractGangTask {
1110 private:
1111   ConcurrentMark*       _cm;
1112   ConcurrentMarkThread* _cmt;
1113 
1114 public:
1115   void work(uint worker_id) {
1116     assert(Thread::current()->is_ConcurrentGC_thread(),
1117            "this should only be done by a conc GC thread");
1118     ResourceMark rm;
1119 
1120     double start_vtime = os::elapsedVTime();
1121 
1122     {
1123       SuspendibleThreadSetJoiner sts_join;
1124 
1125       assert(worker_id < _cm->active_tasks(), "invariant");
1126       CMTask* the_task = _cm->task(worker_id);
1127       the_task->record_start_time();
1128       if (!_cm->has_aborted()) {
1129         do {
1130           double start_vtime_sec = os::elapsedVTime();
1131           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1132 
1133           the_task->do_marking_step(mark_step_duration_ms,
1134                                     true  /* do_termination */,
1135                                     false /* is_serial*/);
1136 
1137           double end_vtime_sec = os::elapsedVTime();
1138           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1139           _cm->clear_has_overflown();
1140 
1141           _cm->do_yield_check(worker_id);
1142 
1143           jlong sleep_time_ms;
1144           if (!_cm->has_aborted() && the_task->has_aborted()) {
1145             sleep_time_ms =
1146               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1147             {
1148               SuspendibleThreadSetLeaver sts_leave;
1149               os::sleep(Thread::current(), sleep_time_ms, false);
1150             }
1151           }
1152         } while (!_cm->has_aborted() && the_task->has_aborted());
1153       }
1154       the_task->record_end_time();
1155       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1156     }
1157 
1158     double end_vtime = os::elapsedVTime();
1159     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1160   }
1161 
1162   CMConcurrentMarkingTask(ConcurrentMark* cm,
1163                           ConcurrentMarkThread* cmt) :
1164       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1165 
1166   ~CMConcurrentMarkingTask() { }
1167 };
1168 
1169 // Calculates the number of active workers for a concurrent
1170 // phase.
1171 uint ConcurrentMark::calc_parallel_marking_threads() {
1172   uint n_conc_workers = 0;
1173   if (!UseDynamicNumberOfGCThreads ||
1174       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1175        !ForceDynamicNumberOfGCThreads)) {
1176     n_conc_workers = max_parallel_marking_threads();
1177   } else {
1178     n_conc_workers =
1179       AdaptiveSizePolicy::calc_default_active_workers(
1180                                    max_parallel_marking_threads(),
1181                                    1, /* Minimum workers */
1182                                    parallel_marking_threads(),
1183                                    Threads::number_of_non_daemon_threads());
1184     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1185     // that scaling has already gone into "_max_parallel_marking_threads".
1186   }
1187   assert(n_conc_workers > 0, "Always need at least 1");
1188   return n_conc_workers;
1189 }
1190 
1191 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1192   // Currently, only survivors can be root regions.
1193   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1194   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1195 
1196   const uintx interval = PrefetchScanIntervalInBytes;
1197   HeapWord* curr = hr->bottom();
1198   const HeapWord* end = hr->top();
1199   while (curr < end) {
1200     Prefetch::read(curr, interval);
1201     oop obj = oop(curr);
1202     int size = obj->oop_iterate(&cl);
1203     assert(size == obj->size(), "sanity");
1204     curr += size;
1205   }
1206 }
1207 
1208 class CMRootRegionScanTask : public AbstractGangTask {
1209 private:
1210   ConcurrentMark* _cm;
1211 
1212 public:
1213   CMRootRegionScanTask(ConcurrentMark* cm) :
1214     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1215 
1216   void work(uint worker_id) {
1217     assert(Thread::current()->is_ConcurrentGC_thread(),
1218            "this should only be done by a conc GC thread");
1219 
1220     CMRootRegions* root_regions = _cm->root_regions();
1221     HeapRegion* hr = root_regions->claim_next();
1222     while (hr != NULL) {
1223       _cm->scanRootRegion(hr, worker_id);
1224       hr = root_regions->claim_next();
1225     }
1226   }
1227 };
1228 
1229 void ConcurrentMark::scanRootRegions() {
1230   // Start of concurrent marking.
1231   ClassLoaderDataGraph::clear_claimed_marks();
1232 
1233   // scan_in_progress() will have been set to true only if there was
1234   // at least one root region to scan. So, if it's false, we
1235   // should not attempt to do any further work.
1236   if (root_regions()->scan_in_progress()) {
1237     _parallel_marking_threads = calc_parallel_marking_threads();
1238     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1239            "Maximum number of marking threads exceeded");
1240     uint active_workers = MAX2(1U, parallel_marking_threads());
1241 
1242     CMRootRegionScanTask task(this);
1243     _parallel_workers->set_active_workers(active_workers);
1244     _parallel_workers->run_task(&task);
1245 
1246     // It's possible that has_aborted() is true here without actually
1247     // aborting the survivor scan earlier. This is OK as it's
1248     // mainly used for sanity checking.
1249     root_regions()->scan_finished();
1250   }
1251 }
1252 
1253 void ConcurrentMark::markFromRoots() {
1254   // we might be tempted to assert that:
1255   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1256   //        "inconsistent argument?");
1257   // However that wouldn't be right, because it's possible that
1258   // a safepoint is indeed in progress as a younger generation
1259   // stop-the-world GC happens even as we mark in this generation.
1260 
1261   _restart_for_overflow = false;
1262   force_overflow_conc()->init();
1263 
1264   // _g1h has _n_par_threads
1265   _parallel_marking_threads = calc_parallel_marking_threads();
1266   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1267     "Maximum number of marking threads exceeded");
1268 
1269   uint active_workers = MAX2(1U, parallel_marking_threads());
1270 
1271   // Parallel task terminator is set in "set_concurrency_and_phase()"
1272   set_concurrency_and_phase(active_workers, true /* concurrent */);
1273 
1274   CMConcurrentMarkingTask markingTask(this, cmThread());
1275   _parallel_workers->set_active_workers(active_workers);
1276   // Don't set _n_par_threads because it affects MT in process_roots()
1277   // and the decisions on that MT processing is made elsewhere.
1278   assert(_parallel_workers->active_workers() > 0, "Should have been set");
1279   _parallel_workers->run_task(&markingTask);
1280   print_stats();
1281 }
1282 
1283 // Helper class to get rid of some boilerplate code.
1284 class G1CMTraceTime : public GCTraceTime {
1285   static bool doit_and_prepend(bool doit) {
1286     if (doit) {
1287       gclog_or_tty->put(' ');
1288     }
1289     return doit;
1290   }
1291 
1292  public:
1293   G1CMTraceTime(const char* title, bool doit)
1294     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1295         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1296   }
1297 };
1298 
1299 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1300   // world is stopped at this checkpoint
1301   assert(SafepointSynchronize::is_at_safepoint(),
1302          "world should be stopped");
1303 
1304   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1305 
1306   // If a full collection has happened, we shouldn't do this.
1307   if (has_aborted()) {
1308     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1309     return;
1310   }
1311 
1312   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1313 
1314   if (VerifyDuringGC) {
1315     HandleMark hm;  // handle scope
1316     g1h->prepare_for_verify();
1317     Universe::verify(VerifyOption_G1UsePrevMarking,
1318                      " VerifyDuringGC:(before)");
1319   }
1320   g1h->check_bitmaps("Remark Start");
1321 
1322   G1CollectorPolicy* g1p = g1h->g1_policy();
1323   g1p->record_concurrent_mark_remark_start();
1324 
1325   double start = os::elapsedTime();
1326 
1327   checkpointRootsFinalWork();
1328 
1329   double mark_work_end = os::elapsedTime();
1330 
1331   weakRefsWork(clear_all_soft_refs);
1332 
1333   if (has_overflown()) {
1334     // Oops.  We overflowed.  Restart concurrent marking.
1335     _restart_for_overflow = true;
1336     if (G1TraceMarkStackOverflow) {
1337       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1338     }
1339 
1340     // Verify the heap w.r.t. the previous marking bitmap.
1341     if (VerifyDuringGC) {
1342       HandleMark hm;  // handle scope
1343       g1h->prepare_for_verify();
1344       Universe::verify(VerifyOption_G1UsePrevMarking,
1345                        " VerifyDuringGC:(overflow)");
1346     }
1347 
1348     // Clear the marking state because we will be restarting
1349     // marking due to overflowing the global mark stack.
1350     reset_marking_state();
1351   } else {
1352     {
1353       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1354 
1355       // Aggregate the per-task counting data that we have accumulated
1356       // while marking.
1357       aggregate_count_data();
1358     }
1359 
1360     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1361     // We're done with marking.
1362     // This is the end of  the marking cycle, we're expected all
1363     // threads to have SATB queues with active set to true.
1364     satb_mq_set.set_active_all_threads(false, /* new active value */
1365                                        true /* expected_active */);
1366 
1367     if (VerifyDuringGC) {
1368       HandleMark hm;  // handle scope
1369       g1h->prepare_for_verify();
1370       Universe::verify(VerifyOption_G1UseNextMarking,
1371                        " VerifyDuringGC:(after)");
1372     }
1373     g1h->check_bitmaps("Remark End");
1374     assert(!restart_for_overflow(), "sanity");
1375     // Completely reset the marking state since marking completed
1376     set_non_marking_state();
1377   }
1378 
1379   // Expand the marking stack, if we have to and if we can.
1380   if (_markStack.should_expand()) {
1381     _markStack.expand();
1382   }
1383 
1384   // Statistics
1385   double now = os::elapsedTime();
1386   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1387   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1388   _remark_times.add((now - start) * 1000.0);
1389 
1390   g1p->record_concurrent_mark_remark_end();
1391 
1392   G1CMIsAliveClosure is_alive(g1h);
1393   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1394 }
1395 
1396 // Base class of the closures that finalize and verify the
1397 // liveness counting data.
1398 class CMCountDataClosureBase: public HeapRegionClosure {
1399 protected:
1400   G1CollectedHeap* _g1h;
1401   ConcurrentMark* _cm;
1402   CardTableModRefBS* _ct_bs;
1403 
1404   BitMap* _region_bm;
1405   BitMap* _card_bm;
1406 
1407   // Takes a region that's not empty (i.e., it has at least one
1408   // live object in it and sets its corresponding bit on the region
1409   // bitmap to 1. If the region is "starts humongous" it will also set
1410   // to 1 the bits on the region bitmap that correspond to its
1411   // associated "continues humongous" regions.
1412   void set_bit_for_region(HeapRegion* hr) {
1413     assert(!hr->is_continues_humongous(), "should have filtered those out");
1414 
1415     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1416     if (!hr->is_starts_humongous()) {
1417       // Normal (non-humongous) case: just set the bit.
1418       _region_bm->par_at_put(index, true);
1419     } else {
1420       // Starts humongous case: calculate how many regions are part of
1421       // this humongous region and then set the bit range.
1422       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1423       _region_bm->par_at_put_range(index, end_index, true);
1424     }
1425   }
1426 
1427 public:
1428   CMCountDataClosureBase(G1CollectedHeap* g1h,
1429                          BitMap* region_bm, BitMap* card_bm):
1430     _g1h(g1h), _cm(g1h->concurrent_mark()),
1431     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1432     _region_bm(region_bm), _card_bm(card_bm) { }
1433 };
1434 
1435 // Closure that calculates the # live objects per region. Used
1436 // for verification purposes during the cleanup pause.
1437 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1438   CMBitMapRO* _bm;
1439   size_t _region_marked_bytes;
1440 
1441 public:
1442   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1443                          BitMap* region_bm, BitMap* card_bm) :
1444     CMCountDataClosureBase(g1h, region_bm, card_bm),
1445     _bm(bm), _region_marked_bytes(0) { }
1446 
1447   bool doHeapRegion(HeapRegion* hr) {
1448 
1449     if (hr->is_continues_humongous()) {
1450       // We will ignore these here and process them when their
1451       // associated "starts humongous" region is processed (see
1452       // set_bit_for_heap_region()). Note that we cannot rely on their
1453       // associated "starts humongous" region to have their bit set to
1454       // 1 since, due to the region chunking in the parallel region
1455       // iteration, a "continues humongous" region might be visited
1456       // before its associated "starts humongous".
1457       return false;
1458     }
1459 
1460     HeapWord* ntams = hr->next_top_at_mark_start();
1461     HeapWord* start = hr->bottom();
1462 
1463     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1464            err_msg("Preconditions not met - "
1465                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1466                    p2i(start), p2i(ntams), p2i(hr->end())));
1467 
1468     // Find the first marked object at or after "start".
1469     start = _bm->getNextMarkedWordAddress(start, ntams);
1470 
1471     size_t marked_bytes = 0;
1472 
1473     while (start < ntams) {
1474       oop obj = oop(start);
1475       int obj_sz = obj->size();
1476       HeapWord* obj_end = start + obj_sz;
1477 
1478       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1479       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1480 
1481       // Note: if we're looking at the last region in heap - obj_end
1482       // could be actually just beyond the end of the heap; end_idx
1483       // will then correspond to a (non-existent) card that is also
1484       // just beyond the heap.
1485       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1486         // end of object is not card aligned - increment to cover
1487         // all the cards spanned by the object
1488         end_idx += 1;
1489       }
1490 
1491       // Set the bits in the card BM for the cards spanned by this object.
1492       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1493 
1494       // Add the size of this object to the number of marked bytes.
1495       marked_bytes += (size_t)obj_sz * HeapWordSize;
1496 
1497       // Find the next marked object after this one.
1498       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1499     }
1500 
1501     // Mark the allocated-since-marking portion...
1502     HeapWord* top = hr->top();
1503     if (ntams < top) {
1504       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1505       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1506 
1507       // Note: if we're looking at the last region in heap - top
1508       // could be actually just beyond the end of the heap; end_idx
1509       // will then correspond to a (non-existent) card that is also
1510       // just beyond the heap.
1511       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1512         // end of object is not card aligned - increment to cover
1513         // all the cards spanned by the object
1514         end_idx += 1;
1515       }
1516       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1517 
1518       // This definitely means the region has live objects.
1519       set_bit_for_region(hr);
1520     }
1521 
1522     // Update the live region bitmap.
1523     if (marked_bytes > 0) {
1524       set_bit_for_region(hr);
1525     }
1526 
1527     // Set the marked bytes for the current region so that
1528     // it can be queried by a calling verification routine
1529     _region_marked_bytes = marked_bytes;
1530 
1531     return false;
1532   }
1533 
1534   size_t region_marked_bytes() const { return _region_marked_bytes; }
1535 };
1536 
1537 // Heap region closure used for verifying the counting data
1538 // that was accumulated concurrently and aggregated during
1539 // the remark pause. This closure is applied to the heap
1540 // regions during the STW cleanup pause.
1541 
1542 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1543   G1CollectedHeap* _g1h;
1544   ConcurrentMark* _cm;
1545   CalcLiveObjectsClosure _calc_cl;
1546   BitMap* _region_bm;   // Region BM to be verified
1547   BitMap* _card_bm;     // Card BM to be verified
1548   bool _verbose;        // verbose output?
1549 
1550   BitMap* _exp_region_bm; // Expected Region BM values
1551   BitMap* _exp_card_bm;   // Expected card BM values
1552 
1553   int _failures;
1554 
1555 public:
1556   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1557                                 BitMap* region_bm,
1558                                 BitMap* card_bm,
1559                                 BitMap* exp_region_bm,
1560                                 BitMap* exp_card_bm,
1561                                 bool verbose) :
1562     _g1h(g1h), _cm(g1h->concurrent_mark()),
1563     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1564     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1565     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1566     _failures(0) { }
1567 
1568   int failures() const { return _failures; }
1569 
1570   bool doHeapRegion(HeapRegion* hr) {
1571     if (hr->is_continues_humongous()) {
1572       // We will ignore these here and process them when their
1573       // associated "starts humongous" region is processed (see
1574       // set_bit_for_heap_region()). Note that we cannot rely on their
1575       // associated "starts humongous" region to have their bit set to
1576       // 1 since, due to the region chunking in the parallel region
1577       // iteration, a "continues humongous" region might be visited
1578       // before its associated "starts humongous".
1579       return false;
1580     }
1581 
1582     int failures = 0;
1583 
1584     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1585     // this region and set the corresponding bits in the expected region
1586     // and card bitmaps.
1587     bool res = _calc_cl.doHeapRegion(hr);
1588     assert(res == false, "should be continuing");
1589 
1590     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1591                     Mutex::_no_safepoint_check_flag);
1592 
1593     // Verify the marked bytes for this region.
1594     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1595     size_t act_marked_bytes = hr->next_marked_bytes();
1596 
1597     // We're not OK if expected marked bytes > actual marked bytes. It means
1598     // we have missed accounting some objects during the actual marking.
1599     if (exp_marked_bytes > act_marked_bytes) {
1600       if (_verbose) {
1601         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1602                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1603                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1604       }
1605       failures += 1;
1606     }
1607 
1608     // Verify the bit, for this region, in the actual and expected
1609     // (which was just calculated) region bit maps.
1610     // We're not OK if the bit in the calculated expected region
1611     // bitmap is set and the bit in the actual region bitmap is not.
1612     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1613 
1614     bool expected = _exp_region_bm->at(index);
1615     bool actual = _region_bm->at(index);
1616     if (expected && !actual) {
1617       if (_verbose) {
1618         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1619                                "expected: %s, actual: %s",
1620                                hr->hrm_index(),
1621                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1622       }
1623       failures += 1;
1624     }
1625 
1626     // Verify that the card bit maps for the cards spanned by the current
1627     // region match. We have an error if we have a set bit in the expected
1628     // bit map and the corresponding bit in the actual bitmap is not set.
1629 
1630     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1631     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1632 
1633     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1634       expected = _exp_card_bm->at(i);
1635       actual = _card_bm->at(i);
1636 
1637       if (expected && !actual) {
1638         if (_verbose) {
1639           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1640                                  "expected: %s, actual: %s",
1641                                  hr->hrm_index(), i,
1642                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1643         }
1644         failures += 1;
1645       }
1646     }
1647 
1648     if (failures > 0 && _verbose)  {
1649       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1650                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1651                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1652                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1653     }
1654 
1655     _failures += failures;
1656 
1657     // We could stop iteration over the heap when we
1658     // find the first violating region by returning true.
1659     return false;
1660   }
1661 };
1662 
1663 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1664 protected:
1665   G1CollectedHeap* _g1h;
1666   ConcurrentMark* _cm;
1667   BitMap* _actual_region_bm;
1668   BitMap* _actual_card_bm;
1669 
1670   uint    _n_workers;
1671 
1672   BitMap* _expected_region_bm;
1673   BitMap* _expected_card_bm;
1674 
1675   int  _failures;
1676   bool _verbose;
1677 
1678   HeapRegionClaimer _hrclaimer;
1679 
1680 public:
1681   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1682                             BitMap* region_bm, BitMap* card_bm,
1683                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1684     : AbstractGangTask("G1 verify final counting"),
1685       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1686       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1687       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1688       _failures(0), _verbose(false),
1689       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1690     assert(VerifyDuringGC, "don't call this otherwise");
1691     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1692     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1693 
1694     _verbose = _cm->verbose_medium();
1695   }
1696 
1697   void work(uint worker_id) {
1698     assert(worker_id < _n_workers, "invariant");
1699 
1700     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1701                                             _actual_region_bm, _actual_card_bm,
1702                                             _expected_region_bm,
1703                                             _expected_card_bm,
1704                                             _verbose);
1705 
1706     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1707 
1708     Atomic::add(verify_cl.failures(), &_failures);
1709   }
1710 
1711   int failures() const { return _failures; }
1712 };
1713 
1714 // Closure that finalizes the liveness counting data.
1715 // Used during the cleanup pause.
1716 // Sets the bits corresponding to the interval [NTAMS, top]
1717 // (which contains the implicitly live objects) in the
1718 // card liveness bitmap. Also sets the bit for each region,
1719 // containing live data, in the region liveness bitmap.
1720 
1721 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1722  public:
1723   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1724                               BitMap* region_bm,
1725                               BitMap* card_bm) :
1726     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1727 
1728   bool doHeapRegion(HeapRegion* hr) {
1729 
1730     if (hr->is_continues_humongous()) {
1731       // We will ignore these here and process them when their
1732       // associated "starts humongous" region is processed (see
1733       // set_bit_for_heap_region()). Note that we cannot rely on their
1734       // associated "starts humongous" region to have their bit set to
1735       // 1 since, due to the region chunking in the parallel region
1736       // iteration, a "continues humongous" region might be visited
1737       // before its associated "starts humongous".
1738       return false;
1739     }
1740 
1741     HeapWord* ntams = hr->next_top_at_mark_start();
1742     HeapWord* top   = hr->top();
1743 
1744     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1745 
1746     // Mark the allocated-since-marking portion...
1747     if (ntams < top) {
1748       // This definitely means the region has live objects.
1749       set_bit_for_region(hr);
1750 
1751       // Now set the bits in the card bitmap for [ntams, top)
1752       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1753       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1754 
1755       // Note: if we're looking at the last region in heap - top
1756       // could be actually just beyond the end of the heap; end_idx
1757       // will then correspond to a (non-existent) card that is also
1758       // just beyond the heap.
1759       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1760         // end of object is not card aligned - increment to cover
1761         // all the cards spanned by the object
1762         end_idx += 1;
1763       }
1764 
1765       assert(end_idx <= _card_bm->size(),
1766              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1767                      end_idx, _card_bm->size()));
1768       assert(start_idx < _card_bm->size(),
1769              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1770                      start_idx, _card_bm->size()));
1771 
1772       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1773     }
1774 
1775     // Set the bit for the region if it contains live data
1776     if (hr->next_marked_bytes() > 0) {
1777       set_bit_for_region(hr);
1778     }
1779 
1780     return false;
1781   }
1782 };
1783 
1784 class G1ParFinalCountTask: public AbstractGangTask {
1785 protected:
1786   G1CollectedHeap* _g1h;
1787   ConcurrentMark* _cm;
1788   BitMap* _actual_region_bm;
1789   BitMap* _actual_card_bm;
1790 
1791   uint    _n_workers;
1792   HeapRegionClaimer _hrclaimer;
1793 
1794 public:
1795   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1796     : AbstractGangTask("G1 final counting"),
1797       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1798       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1799       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1800   }
1801 
1802   void work(uint worker_id) {
1803     assert(worker_id < _n_workers, "invariant");
1804 
1805     FinalCountDataUpdateClosure final_update_cl(_g1h,
1806                                                 _actual_region_bm,
1807                                                 _actual_card_bm);
1808 
1809     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1810   }
1811 };
1812 
1813 class G1ParNoteEndTask;
1814 
1815 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1816   G1CollectedHeap* _g1;
1817   size_t _max_live_bytes;
1818   uint _regions_claimed;
1819   size_t _freed_bytes;
1820   FreeRegionList* _local_cleanup_list;
1821   HeapRegionSetCount _old_regions_removed;
1822   HeapRegionSetCount _humongous_regions_removed;
1823   HRRSCleanupTask* _hrrs_cleanup_task;
1824   double _claimed_region_time;
1825   double _max_region_time;
1826 
1827 public:
1828   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1829                              FreeRegionList* local_cleanup_list,
1830                              HRRSCleanupTask* hrrs_cleanup_task) :
1831     _g1(g1),
1832     _max_live_bytes(0), _regions_claimed(0),
1833     _freed_bytes(0),
1834     _claimed_region_time(0.0), _max_region_time(0.0),
1835     _local_cleanup_list(local_cleanup_list),
1836     _old_regions_removed(),
1837     _humongous_regions_removed(),
1838     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1839 
1840   size_t freed_bytes() { return _freed_bytes; }
1841   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1842   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1843 
1844   bool doHeapRegion(HeapRegion *hr) {
1845     if (hr->is_continues_humongous()) {
1846       return false;
1847     }
1848     // We use a claim value of zero here because all regions
1849     // were claimed with value 1 in the FinalCount task.
1850     _g1->reset_gc_time_stamps(hr);
1851     double start = os::elapsedTime();
1852     _regions_claimed++;
1853     hr->note_end_of_marking();
1854     _max_live_bytes += hr->max_live_bytes();
1855 
1856     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1857       _freed_bytes += hr->used();
1858       hr->set_containing_set(NULL);
1859       if (hr->is_humongous()) {
1860         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1861         _humongous_regions_removed.increment(1u, hr->capacity());
1862         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1863       } else {
1864         _old_regions_removed.increment(1u, hr->capacity());
1865         _g1->free_region(hr, _local_cleanup_list, true);
1866       }
1867     } else {
1868       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1869     }
1870 
1871     double region_time = (os::elapsedTime() - start);
1872     _claimed_region_time += region_time;
1873     if (region_time > _max_region_time) {
1874       _max_region_time = region_time;
1875     }
1876     return false;
1877   }
1878 
1879   size_t max_live_bytes() { return _max_live_bytes; }
1880   uint regions_claimed() { return _regions_claimed; }
1881   double claimed_region_time_sec() { return _claimed_region_time; }
1882   double max_region_time_sec() { return _max_region_time; }
1883 };
1884 
1885 class G1ParNoteEndTask: public AbstractGangTask {
1886   friend class G1NoteEndOfConcMarkClosure;
1887 
1888 protected:
1889   G1CollectedHeap* _g1h;
1890   size_t _max_live_bytes;
1891   size_t _freed_bytes;
1892   FreeRegionList* _cleanup_list;
1893   HeapRegionClaimer _hrclaimer;
1894 
1895 public:
1896   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1897       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1898   }
1899 
1900   void work(uint worker_id) {
1901     FreeRegionList local_cleanup_list("Local Cleanup List");
1902     HRRSCleanupTask hrrs_cleanup_task;
1903     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1904                                            &hrrs_cleanup_task);
1905     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1906     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1907 
1908     // Now update the lists
1909     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1910     {
1911       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1912       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1913       _max_live_bytes += g1_note_end.max_live_bytes();
1914       _freed_bytes += g1_note_end.freed_bytes();
1915 
1916       // If we iterate over the global cleanup list at the end of
1917       // cleanup to do this printing we will not guarantee to only
1918       // generate output for the newly-reclaimed regions (the list
1919       // might not be empty at the beginning of cleanup; we might
1920       // still be working on its previous contents). So we do the
1921       // printing here, before we append the new regions to the global
1922       // cleanup list.
1923 
1924       G1HRPrinter* hr_printer = _g1h->hr_printer();
1925       if (hr_printer->is_active()) {
1926         FreeRegionListIterator iter(&local_cleanup_list);
1927         while (iter.more_available()) {
1928           HeapRegion* hr = iter.get_next();
1929           hr_printer->cleanup(hr);
1930         }
1931       }
1932 
1933       _cleanup_list->add_ordered(&local_cleanup_list);
1934       assert(local_cleanup_list.is_empty(), "post-condition");
1935 
1936       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1937     }
1938   }
1939   size_t max_live_bytes() { return _max_live_bytes; }
1940   size_t freed_bytes() { return _freed_bytes; }
1941 };
1942 
1943 class G1ParScrubRemSetTask: public AbstractGangTask {
1944 protected:
1945   G1RemSet* _g1rs;
1946   BitMap* _region_bm;
1947   BitMap* _card_bm;
1948   HeapRegionClaimer _hrclaimer;
1949 
1950 public:
1951   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1952       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1953   }
1954 
1955   void work(uint worker_id) {
1956     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1957   }
1958 
1959 };
1960 
1961 void ConcurrentMark::cleanup() {
1962   // world is stopped at this checkpoint
1963   assert(SafepointSynchronize::is_at_safepoint(),
1964          "world should be stopped");
1965   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1966 
1967   // If a full collection has happened, we shouldn't do this.
1968   if (has_aborted()) {
1969     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1970     return;
1971   }
1972 
1973   g1h->verify_region_sets_optional();
1974 
1975   if (VerifyDuringGC) {
1976     HandleMark hm;  // handle scope
1977     g1h->prepare_for_verify();
1978     Universe::verify(VerifyOption_G1UsePrevMarking,
1979                      " VerifyDuringGC:(before)");
1980   }
1981   g1h->check_bitmaps("Cleanup Start");
1982 
1983   G1CollectorPolicy* g1p = g1h->g1_policy();
1984   g1p->record_concurrent_mark_cleanup_start();
1985 
1986   double start = os::elapsedTime();
1987 
1988   HeapRegionRemSet::reset_for_cleanup_tasks();
1989 
1990   uint n_workers;
1991 
1992   // Do counting once more with the world stopped for good measure.
1993   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1994 
1995   g1h->set_par_threads();
1996   n_workers = g1h->n_par_threads();
1997   assert(g1h->n_par_threads() == n_workers,
1998          "Should not have been reset");
1999   g1h->workers()->run_task(&g1_par_count_task);
2000   // Done with the parallel phase so reset to 0.
2001   g1h->set_par_threads(0);
2002 
2003   if (VerifyDuringGC) {
2004     // Verify that the counting data accumulated during marking matches
2005     // that calculated by walking the marking bitmap.
2006 
2007     // Bitmaps to hold expected values
2008     BitMap expected_region_bm(_region_bm.size(), true);
2009     BitMap expected_card_bm(_card_bm.size(), true);
2010 
2011     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2012                                                  &_region_bm,
2013                                                  &_card_bm,
2014                                                  &expected_region_bm,
2015                                                  &expected_card_bm);
2016 
2017     g1h->set_par_threads((int)n_workers);
2018     g1h->workers()->run_task(&g1_par_verify_task);
2019     // Done with the parallel phase so reset to 0.
2020     g1h->set_par_threads(0);
2021 
2022     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2023   }
2024 
2025   size_t start_used_bytes = g1h->used();
2026   g1h->set_marking_complete();
2027 
2028   double count_end = os::elapsedTime();
2029   double this_final_counting_time = (count_end - start);
2030   _total_counting_time += this_final_counting_time;
2031 
2032   if (G1PrintRegionLivenessInfo) {
2033     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2034     _g1h->heap_region_iterate(&cl);
2035   }
2036 
2037   // Install newly created mark bitMap as "prev".
2038   swapMarkBitMaps();
2039 
2040   g1h->reset_gc_time_stamp();
2041 
2042   // Note end of marking in all heap regions.
2043   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
2044   g1h->set_par_threads((int)n_workers);
2045   g1h->workers()->run_task(&g1_par_note_end_task);
2046   g1h->set_par_threads(0);
2047   g1h->check_gc_time_stamps();
2048 
2049   if (!cleanup_list_is_empty()) {
2050     // The cleanup list is not empty, so we'll have to process it
2051     // concurrently. Notify anyone else that might be wanting free
2052     // regions that there will be more free regions coming soon.
2053     g1h->set_free_regions_coming();
2054   }
2055 
2056   // call below, since it affects the metric by which we sort the heap
2057   // regions.
2058   if (G1ScrubRemSets) {
2059     double rs_scrub_start = os::elapsedTime();
2060     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
2061     g1h->set_par_threads((int)n_workers);
2062     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2063     g1h->set_par_threads(0);
2064 
2065     double rs_scrub_end = os::elapsedTime();
2066     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2067     _total_rs_scrub_time += this_rs_scrub_time;
2068   }
2069 
2070   // this will also free any regions totally full of garbage objects,
2071   // and sort the regions.
2072   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2073 
2074   // Statistics.
2075   double end = os::elapsedTime();
2076   _cleanup_times.add((end - start) * 1000.0);
2077 
2078   if (G1Log::fine()) {
2079     g1h->g1_policy()->print_heap_transition(start_used_bytes);
2080   }
2081 
2082   // Clean up will have freed any regions completely full of garbage.
2083   // Update the soft reference policy with the new heap occupancy.
2084   Universe::update_heap_info_at_gc();
2085 
2086   if (VerifyDuringGC) {
2087     HandleMark hm;  // handle scope
2088     g1h->prepare_for_verify();
2089     Universe::verify(VerifyOption_G1UsePrevMarking,
2090                      " VerifyDuringGC:(after)");
2091   }
2092 
2093   g1h->check_bitmaps("Cleanup End");
2094 
2095   g1h->verify_region_sets_optional();
2096 
2097   // We need to make this be a "collection" so any collection pause that
2098   // races with it goes around and waits for completeCleanup to finish.
2099   g1h->increment_total_collections();
2100 
2101   // Clean out dead classes and update Metaspace sizes.
2102   if (ClassUnloadingWithConcurrentMark) {
2103     ClassLoaderDataGraph::purge();
2104   }
2105   MetaspaceGC::compute_new_size();
2106 
2107   // We reclaimed old regions so we should calculate the sizes to make
2108   // sure we update the old gen/space data.
2109   g1h->g1mm()->update_sizes();
2110   g1h->allocation_context_stats().update_after_mark();
2111 
2112   g1h->trace_heap_after_concurrent_cycle();
2113 }
2114 
2115 void ConcurrentMark::completeCleanup() {
2116   if (has_aborted()) return;
2117 
2118   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2119 
2120   _cleanup_list.verify_optional();
2121   FreeRegionList tmp_free_list("Tmp Free List");
2122 
2123   if (G1ConcRegionFreeingVerbose) {
2124     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2125                            "cleanup list has %u entries",
2126                            _cleanup_list.length());
2127   }
2128 
2129   // No one else should be accessing the _cleanup_list at this point,
2130   // so it is not necessary to take any locks
2131   while (!_cleanup_list.is_empty()) {
2132     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2133     assert(hr != NULL, "Got NULL from a non-empty list");
2134     hr->par_clear();
2135     tmp_free_list.add_ordered(hr);
2136 
2137     // Instead of adding one region at a time to the secondary_free_list,
2138     // we accumulate them in the local list and move them a few at a
2139     // time. This also cuts down on the number of notify_all() calls
2140     // we do during this process. We'll also append the local list when
2141     // _cleanup_list is empty (which means we just removed the last
2142     // region from the _cleanup_list).
2143     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2144         _cleanup_list.is_empty()) {
2145       if (G1ConcRegionFreeingVerbose) {
2146         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2147                                "appending %u entries to the secondary_free_list, "
2148                                "cleanup list still has %u entries",
2149                                tmp_free_list.length(),
2150                                _cleanup_list.length());
2151       }
2152 
2153       {
2154         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2155         g1h->secondary_free_list_add(&tmp_free_list);
2156         SecondaryFreeList_lock->notify_all();
2157       }
2158 #ifndef PRODUCT
2159       if (G1StressConcRegionFreeing) {
2160         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2161           os::sleep(Thread::current(), (jlong) 1, false);
2162         }
2163       }
2164 #endif
2165     }
2166   }
2167   assert(tmp_free_list.is_empty(), "post-condition");
2168 }
2169 
2170 // Supporting Object and Oop closures for reference discovery
2171 // and processing in during marking
2172 
2173 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2174   HeapWord* addr = (HeapWord*)obj;
2175   return addr != NULL &&
2176          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2177 }
2178 
2179 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2180 // Uses the CMTask associated with a worker thread (for serial reference
2181 // processing the CMTask for worker 0 is used) to preserve (mark) and
2182 // trace referent objects.
2183 //
2184 // Using the CMTask and embedded local queues avoids having the worker
2185 // threads operating on the global mark stack. This reduces the risk
2186 // of overflowing the stack - which we would rather avoid at this late
2187 // state. Also using the tasks' local queues removes the potential
2188 // of the workers interfering with each other that could occur if
2189 // operating on the global stack.
2190 
2191 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2192   ConcurrentMark* _cm;
2193   CMTask*         _task;
2194   int             _ref_counter_limit;
2195   int             _ref_counter;
2196   bool            _is_serial;
2197  public:
2198   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2199     _cm(cm), _task(task), _is_serial(is_serial),
2200     _ref_counter_limit(G1RefProcDrainInterval) {
2201     assert(_ref_counter_limit > 0, "sanity");
2202     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2203     _ref_counter = _ref_counter_limit;
2204   }
2205 
2206   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2207   virtual void do_oop(      oop* p) { do_oop_work(p); }
2208 
2209   template <class T> void do_oop_work(T* p) {
2210     if (!_cm->has_overflown()) {
2211       oop obj = oopDesc::load_decode_heap_oop(p);
2212       if (_cm->verbose_high()) {
2213         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2214                                "*"PTR_FORMAT" = "PTR_FORMAT,
2215                                _task->worker_id(), p2i(p), p2i((void*) obj));
2216       }
2217 
2218       _task->deal_with_reference(obj);
2219       _ref_counter--;
2220 
2221       if (_ref_counter == 0) {
2222         // We have dealt with _ref_counter_limit references, pushing them
2223         // and objects reachable from them on to the local stack (and
2224         // possibly the global stack). Call CMTask::do_marking_step() to
2225         // process these entries.
2226         //
2227         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2228         // there's nothing more to do (i.e. we're done with the entries that
2229         // were pushed as a result of the CMTask::deal_with_reference() calls
2230         // above) or we overflow.
2231         //
2232         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2233         // flag while there may still be some work to do. (See the comment at
2234         // the beginning of CMTask::do_marking_step() for those conditions -
2235         // one of which is reaching the specified time target.) It is only
2236         // when CMTask::do_marking_step() returns without setting the
2237         // has_aborted() flag that the marking step has completed.
2238         do {
2239           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2240           _task->do_marking_step(mark_step_duration_ms,
2241                                  false      /* do_termination */,
2242                                  _is_serial);
2243         } while (_task->has_aborted() && !_cm->has_overflown());
2244         _ref_counter = _ref_counter_limit;
2245       }
2246     } else {
2247       if (_cm->verbose_high()) {
2248          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2249       }
2250     }
2251   }
2252 };
2253 
2254 // 'Drain' oop closure used by both serial and parallel reference processing.
2255 // Uses the CMTask associated with a given worker thread (for serial
2256 // reference processing the CMtask for worker 0 is used). Calls the
2257 // do_marking_step routine, with an unbelievably large timeout value,
2258 // to drain the marking data structures of the remaining entries
2259 // added by the 'keep alive' oop closure above.
2260 
2261 class G1CMDrainMarkingStackClosure: public VoidClosure {
2262   ConcurrentMark* _cm;
2263   CMTask*         _task;
2264   bool            _is_serial;
2265  public:
2266   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2267     _cm(cm), _task(task), _is_serial(is_serial) {
2268     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2269   }
2270 
2271   void do_void() {
2272     do {
2273       if (_cm->verbose_high()) {
2274         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2275                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2276       }
2277 
2278       // We call CMTask::do_marking_step() to completely drain the local
2279       // and global marking stacks of entries pushed by the 'keep alive'
2280       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2281       //
2282       // CMTask::do_marking_step() is called in a loop, which we'll exit
2283       // if there's nothing more to do (i.e. we've completely drained the
2284       // entries that were pushed as a a result of applying the 'keep alive'
2285       // closure to the entries on the discovered ref lists) or we overflow
2286       // the global marking stack.
2287       //
2288       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2289       // flag while there may still be some work to do. (See the comment at
2290       // the beginning of CMTask::do_marking_step() for those conditions -
2291       // one of which is reaching the specified time target.) It is only
2292       // when CMTask::do_marking_step() returns without setting the
2293       // has_aborted() flag that the marking step has completed.
2294 
2295       _task->do_marking_step(1000000000.0 /* something very large */,
2296                              true         /* do_termination */,
2297                              _is_serial);
2298     } while (_task->has_aborted() && !_cm->has_overflown());
2299   }
2300 };
2301 
2302 // Implementation of AbstractRefProcTaskExecutor for parallel
2303 // reference processing at the end of G1 concurrent marking
2304 
2305 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2306 private:
2307   G1CollectedHeap* _g1h;
2308   ConcurrentMark*  _cm;
2309   WorkGang*        _workers;
2310   uint             _active_workers;
2311 
2312 public:
2313   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2314                           ConcurrentMark* cm,
2315                           WorkGang* workers,
2316                           uint n_workers) :
2317     _g1h(g1h), _cm(cm),
2318     _workers(workers), _active_workers(n_workers) { }
2319 
2320   // Executes the given task using concurrent marking worker threads.
2321   virtual void execute(ProcessTask& task);
2322   virtual void execute(EnqueueTask& task);
2323 };
2324 
2325 class G1CMRefProcTaskProxy: public AbstractGangTask {
2326   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2327   ProcessTask&     _proc_task;
2328   G1CollectedHeap* _g1h;
2329   ConcurrentMark*  _cm;
2330 
2331 public:
2332   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2333                      G1CollectedHeap* g1h,
2334                      ConcurrentMark* cm) :
2335     AbstractGangTask("Process reference objects in parallel"),
2336     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2337     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2338     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2339   }
2340 
2341   virtual void work(uint worker_id) {
2342     ResourceMark rm;
2343     HandleMark hm;
2344     CMTask* task = _cm->task(worker_id);
2345     G1CMIsAliveClosure g1_is_alive(_g1h);
2346     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2347     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2348 
2349     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2350   }
2351 };
2352 
2353 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2354   assert(_workers != NULL, "Need parallel worker threads.");
2355   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2356 
2357   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2358 
2359   // We need to reset the concurrency level before each
2360   // proxy task execution, so that the termination protocol
2361   // and overflow handling in CMTask::do_marking_step() knows
2362   // how many workers to wait for.
2363   _cm->set_concurrency(_active_workers);
2364   _g1h->set_par_threads(_active_workers);
2365   _workers->run_task(&proc_task_proxy);
2366   _g1h->set_par_threads(0);
2367 }
2368 
2369 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2370   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2371   EnqueueTask& _enq_task;
2372 
2373 public:
2374   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2375     AbstractGangTask("Enqueue reference objects in parallel"),
2376     _enq_task(enq_task) { }
2377 
2378   virtual void work(uint worker_id) {
2379     _enq_task.work(worker_id);
2380   }
2381 };
2382 
2383 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2384   assert(_workers != NULL, "Need parallel worker threads.");
2385   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2386 
2387   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2388 
2389   // Not strictly necessary but...
2390   //
2391   // We need to reset the concurrency level before each
2392   // proxy task execution, so that the termination protocol
2393   // and overflow handling in CMTask::do_marking_step() knows
2394   // how many workers to wait for.
2395   _cm->set_concurrency(_active_workers);
2396   _g1h->set_par_threads(_active_workers);
2397   _workers->run_task(&enq_task_proxy);
2398   _g1h->set_par_threads(0);
2399 }
2400 
2401 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2402   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2403 }
2404 
2405 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2406   if (has_overflown()) {
2407     // Skip processing the discovered references if we have
2408     // overflown the global marking stack. Reference objects
2409     // only get discovered once so it is OK to not
2410     // de-populate the discovered reference lists. We could have,
2411     // but the only benefit would be that, when marking restarts,
2412     // less reference objects are discovered.
2413     return;
2414   }
2415 
2416   ResourceMark rm;
2417   HandleMark   hm;
2418 
2419   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2420 
2421   // Is alive closure.
2422   G1CMIsAliveClosure g1_is_alive(g1h);
2423 
2424   // Inner scope to exclude the cleaning of the string and symbol
2425   // tables from the displayed time.
2426   {
2427     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2428 
2429     ReferenceProcessor* rp = g1h->ref_processor_cm();
2430 
2431     // See the comment in G1CollectedHeap::ref_processing_init()
2432     // about how reference processing currently works in G1.
2433 
2434     // Set the soft reference policy
2435     rp->setup_policy(clear_all_soft_refs);
2436     assert(_markStack.isEmpty(), "mark stack should be empty");
2437 
2438     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2439     // in serial reference processing. Note these closures are also
2440     // used for serially processing (by the the current thread) the
2441     // JNI references during parallel reference processing.
2442     //
2443     // These closures do not need to synchronize with the worker
2444     // threads involved in parallel reference processing as these
2445     // instances are executed serially by the current thread (e.g.
2446     // reference processing is not multi-threaded and is thus
2447     // performed by the current thread instead of a gang worker).
2448     //
2449     // The gang tasks involved in parallel reference processing create
2450     // their own instances of these closures, which do their own
2451     // synchronization among themselves.
2452     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2453     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2454 
2455     // We need at least one active thread. If reference processing
2456     // is not multi-threaded we use the current (VMThread) thread,
2457     // otherwise we use the work gang from the G1CollectedHeap and
2458     // we utilize all the worker threads we can.
2459     bool processing_is_mt = rp->processing_is_mt();
2460     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2461     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2462 
2463     // Parallel processing task executor.
2464     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2465                                               g1h->workers(), active_workers);
2466     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2467 
2468     // Set the concurrency level. The phase was already set prior to
2469     // executing the remark task.
2470     set_concurrency(active_workers);
2471 
2472     // Set the degree of MT processing here.  If the discovery was done MT,
2473     // the number of threads involved during discovery could differ from
2474     // the number of active workers.  This is OK as long as the discovered
2475     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2476     rp->set_active_mt_degree(active_workers);
2477 
2478     // Process the weak references.
2479     const ReferenceProcessorStats& stats =
2480         rp->process_discovered_references(&g1_is_alive,
2481                                           &g1_keep_alive,
2482                                           &g1_drain_mark_stack,
2483                                           executor,
2484                                           g1h->gc_timer_cm(),
2485                                           concurrent_gc_id());
2486     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2487 
2488     // The do_oop work routines of the keep_alive and drain_marking_stack
2489     // oop closures will set the has_overflown flag if we overflow the
2490     // global marking stack.
2491 
2492     assert(_markStack.overflow() || _markStack.isEmpty(),
2493             "mark stack should be empty (unless it overflowed)");
2494 
2495     if (_markStack.overflow()) {
2496       // This should have been done already when we tried to push an
2497       // entry on to the global mark stack. But let's do it again.
2498       set_has_overflown();
2499     }
2500 
2501     assert(rp->num_q() == active_workers, "why not");
2502 
2503     rp->enqueue_discovered_references(executor);
2504 
2505     rp->verify_no_references_recorded();
2506     assert(!rp->discovery_enabled(), "Post condition");
2507   }
2508 
2509   if (has_overflown()) {
2510     // We can not trust g1_is_alive if the marking stack overflowed
2511     return;
2512   }
2513 
2514   assert(_markStack.isEmpty(), "Marking should have completed");
2515 
2516   // Unload Klasses, String, Symbols, Code Cache, etc.
2517   {
2518     G1CMTraceTime trace("Unloading", G1Log::finer());
2519 
2520     if (ClassUnloadingWithConcurrentMark) {
2521       bool purged_classes;
2522 
2523       {
2524         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2525         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2526       }
2527 
2528       {
2529         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2530         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2531       }
2532     }
2533 
2534     if (G1StringDedup::is_enabled()) {
2535       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2536       G1StringDedup::unlink(&g1_is_alive);
2537     }
2538   }
2539 }
2540 
2541 void ConcurrentMark::swapMarkBitMaps() {
2542   CMBitMapRO* temp = _prevMarkBitMap;
2543   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2544   _nextMarkBitMap  = (CMBitMap*)  temp;
2545 }
2546 
2547 // Closure for marking entries in SATB buffers.
2548 class CMSATBBufferClosure : public SATBBufferClosure {
2549 private:
2550   CMTask* _task;
2551   G1CollectedHeap* _g1h;
2552 
2553   // This is very similar to CMTask::deal_with_reference, but with
2554   // more relaxed requirements for the argument, so this must be more
2555   // circumspect about treating the argument as an object.
2556   void do_entry(void* entry) const {
2557     _task->increment_refs_reached();
2558     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2559     if (entry < hr->next_top_at_mark_start()) {
2560       // Until we get here, we don't know whether entry refers to a valid
2561       // object; it could instead have been a stale reference.
2562       oop obj = static_cast<oop>(entry);
2563       assert(obj->is_oop(true /* ignore mark word */),
2564              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2565       _task->make_reference_grey(obj, hr);
2566     }
2567   }
2568 
2569 public:
2570   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2571     : _task(task), _g1h(g1h) { }
2572 
2573   virtual void do_buffer(void** buffer, size_t size) {
2574     for (size_t i = 0; i < size; ++i) {
2575       do_entry(buffer[i]);
2576     }
2577   }
2578 };
2579 
2580 class G1RemarkThreadsClosure : public ThreadClosure {
2581   CMSATBBufferClosure _cm_satb_cl;
2582   G1CMOopClosure _cm_cl;
2583   MarkingCodeBlobClosure _code_cl;
2584   int _thread_parity;
2585 
2586  public:
2587   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2588     _cm_satb_cl(task, g1h),
2589     _cm_cl(g1h, g1h->concurrent_mark(), task),
2590     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2591     _thread_parity(Threads::thread_claim_parity()) {}
2592 
2593   void do_thread(Thread* thread) {
2594     if (thread->is_Java_thread()) {
2595       if (thread->claim_oops_do(true, _thread_parity)) {
2596         JavaThread* jt = (JavaThread*)thread;
2597 
2598         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2599         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2600         // * Alive if on the stack of an executing method
2601         // * Weakly reachable otherwise
2602         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2603         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2604         jt->nmethods_do(&_code_cl);
2605 
2606         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2607       }
2608     } else if (thread->is_VM_thread()) {
2609       if (thread->claim_oops_do(true, _thread_parity)) {
2610         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2611       }
2612     }
2613   }
2614 };
2615 
2616 class CMRemarkTask: public AbstractGangTask {
2617 private:
2618   ConcurrentMark* _cm;
2619 public:
2620   void work(uint worker_id) {
2621     // Since all available tasks are actually started, we should
2622     // only proceed if we're supposed to be active.
2623     if (worker_id < _cm->active_tasks()) {
2624       CMTask* task = _cm->task(worker_id);
2625       task->record_start_time();
2626       {
2627         ResourceMark rm;
2628         HandleMark hm;
2629 
2630         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2631         Threads::threads_do(&threads_f);
2632       }
2633 
2634       do {
2635         task->do_marking_step(1000000000.0 /* something very large */,
2636                               true         /* do_termination       */,
2637                               false        /* is_serial            */);
2638       } while (task->has_aborted() && !_cm->has_overflown());
2639       // If we overflow, then we do not want to restart. We instead
2640       // want to abort remark and do concurrent marking again.
2641       task->record_end_time();
2642     }
2643   }
2644 
2645   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2646     AbstractGangTask("Par Remark"), _cm(cm) {
2647     _cm->terminator()->reset_for_reuse(active_workers);
2648   }
2649 };
2650 
2651 void ConcurrentMark::checkpointRootsFinalWork() {
2652   ResourceMark rm;
2653   HandleMark   hm;
2654   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2655 
2656   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2657 
2658   g1h->ensure_parsability(false);
2659 
2660   StrongRootsScope srs;
2661   // this is remark, so we'll use up all active threads
2662   uint active_workers = g1h->workers()->active_workers();
2663   if (active_workers == 0) {
2664     assert(active_workers > 0, "Should have been set earlier");
2665     active_workers = (uint) ParallelGCThreads;
2666     g1h->workers()->set_active_workers(active_workers);
2667   }
2668   set_concurrency_and_phase(active_workers, false /* concurrent */);
2669   // Leave _parallel_marking_threads at it's
2670   // value originally calculated in the ConcurrentMark
2671   // constructor and pass values of the active workers
2672   // through the gang in the task.
2673 
2674   CMRemarkTask remarkTask(this, active_workers);
2675   // We will start all available threads, even if we decide that the
2676   // active_workers will be fewer. The extra ones will just bail out
2677   // immediately.
2678   g1h->set_par_threads(active_workers);
2679   g1h->workers()->run_task(&remarkTask);
2680   g1h->set_par_threads(0);
2681 
2682   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2683   guarantee(has_overflown() ||
2684             satb_mq_set.completed_buffers_num() == 0,
2685             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2686                     BOOL_TO_STR(has_overflown()),
2687                     satb_mq_set.completed_buffers_num()));
2688 
2689   print_stats();
2690 }
2691 
2692 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2693   // Note we are overriding the read-only view of the prev map here, via
2694   // the cast.
2695   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2696 }
2697 
2698 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2699   _nextMarkBitMap->clearRange(mr);
2700 }
2701 
2702 HeapRegion*
2703 ConcurrentMark::claim_region(uint worker_id) {
2704   // "checkpoint" the finger
2705   HeapWord* finger = _finger;
2706 
2707   // _heap_end will not change underneath our feet; it only changes at
2708   // yield points.
2709   while (finger < _heap_end) {
2710     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2711 
2712     // Note on how this code handles humongous regions. In the
2713     // normal case the finger will reach the start of a "starts
2714     // humongous" (SH) region. Its end will either be the end of the
2715     // last "continues humongous" (CH) region in the sequence, or the
2716     // standard end of the SH region (if the SH is the only region in
2717     // the sequence). That way claim_region() will skip over the CH
2718     // regions. However, there is a subtle race between a CM thread
2719     // executing this method and a mutator thread doing a humongous
2720     // object allocation. The two are not mutually exclusive as the CM
2721     // thread does not need to hold the Heap_lock when it gets
2722     // here. So there is a chance that claim_region() will come across
2723     // a free region that's in the progress of becoming a SH or a CH
2724     // region. In the former case, it will either
2725     //   a) Miss the update to the region's end, in which case it will
2726     //      visit every subsequent CH region, will find their bitmaps
2727     //      empty, and do nothing, or
2728     //   b) Will observe the update of the region's end (in which case
2729     //      it will skip the subsequent CH regions).
2730     // If it comes across a region that suddenly becomes CH, the
2731     // scenario will be similar to b). So, the race between
2732     // claim_region() and a humongous object allocation might force us
2733     // to do a bit of unnecessary work (due to some unnecessary bitmap
2734     // iterations) but it should not introduce and correctness issues.
2735     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2736 
2737     // Above heap_region_containing_raw may return NULL as we always scan claim
2738     // until the end of the heap. In this case, just jump to the next region.
2739     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2740 
2741     // Is the gap between reading the finger and doing the CAS too long?
2742     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2743     if (res == finger && curr_region != NULL) {
2744       // we succeeded
2745       HeapWord*   bottom        = curr_region->bottom();
2746       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2747 
2748       if (verbose_low()) {
2749         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2750                                "["PTR_FORMAT", "PTR_FORMAT"), "
2751                                "limit = "PTR_FORMAT,
2752                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2753       }
2754 
2755       // notice that _finger == end cannot be guaranteed here since,
2756       // someone else might have moved the finger even further
2757       assert(_finger >= end, "the finger should have moved forward");
2758 
2759       if (verbose_low()) {
2760         gclog_or_tty->print_cr("[%u] we were successful with region = "
2761                                PTR_FORMAT, worker_id, p2i(curr_region));
2762       }
2763 
2764       if (limit > bottom) {
2765         if (verbose_low()) {
2766           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2767                                  "returning it ", worker_id, p2i(curr_region));
2768         }
2769         return curr_region;
2770       } else {
2771         assert(limit == bottom,
2772                "the region limit should be at bottom");
2773         if (verbose_low()) {
2774           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2775                                  "returning NULL", worker_id, p2i(curr_region));
2776         }
2777         // we return NULL and the caller should try calling
2778         // claim_region() again.
2779         return NULL;
2780       }
2781     } else {
2782       assert(_finger > finger, "the finger should have moved forward");
2783       if (verbose_low()) {
2784         if (curr_region == NULL) {
2785           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2786                                  "global finger = "PTR_FORMAT", "
2787                                  "our finger = "PTR_FORMAT,
2788                                  worker_id, p2i(_finger), p2i(finger));
2789         } else {
2790           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2791                                  "global finger = "PTR_FORMAT", "
2792                                  "our finger = "PTR_FORMAT,
2793                                  worker_id, p2i(_finger), p2i(finger));
2794         }
2795       }
2796 
2797       // read it again
2798       finger = _finger;
2799     }
2800   }
2801 
2802   return NULL;
2803 }
2804 
2805 #ifndef PRODUCT
2806 enum VerifyNoCSetOopsPhase {
2807   VerifyNoCSetOopsStack,
2808   VerifyNoCSetOopsQueues
2809 };
2810 
2811 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2812 private:
2813   G1CollectedHeap* _g1h;
2814   VerifyNoCSetOopsPhase _phase;
2815   int _info;
2816 
2817   const char* phase_str() {
2818     switch (_phase) {
2819     case VerifyNoCSetOopsStack:         return "Stack";
2820     case VerifyNoCSetOopsQueues:        return "Queue";
2821     default:                            ShouldNotReachHere();
2822     }
2823     return NULL;
2824   }
2825 
2826   void do_object_work(oop obj) {
2827     guarantee(!_g1h->obj_in_cs(obj),
2828               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2829                       p2i((void*) obj), phase_str(), _info));
2830   }
2831 
2832 public:
2833   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2834 
2835   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2836     _phase = phase;
2837     _info = info;
2838   }
2839 
2840   virtual void do_oop(oop* p) {
2841     oop obj = oopDesc::load_decode_heap_oop(p);
2842     do_object_work(obj);
2843   }
2844 
2845   virtual void do_oop(narrowOop* p) {
2846     // We should not come across narrow oops while scanning marking
2847     // stacks
2848     ShouldNotReachHere();
2849   }
2850 
2851   virtual void do_object(oop obj) {
2852     do_object_work(obj);
2853   }
2854 };
2855 
2856 void ConcurrentMark::verify_no_cset_oops() {
2857   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2858   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2859     return;
2860   }
2861 
2862   VerifyNoCSetOopsClosure cl;
2863 
2864   // Verify entries on the global mark stack
2865   cl.set_phase(VerifyNoCSetOopsStack);
2866   _markStack.oops_do(&cl);
2867 
2868   // Verify entries on the task queues
2869   for (uint i = 0; i < _max_worker_id; i += 1) {
2870     cl.set_phase(VerifyNoCSetOopsQueues, i);
2871     CMTaskQueue* queue = _task_queues->queue(i);
2872     queue->oops_do(&cl);
2873   }
2874 
2875   // Verify the global finger
2876   HeapWord* global_finger = finger();
2877   if (global_finger != NULL && global_finger < _heap_end) {
2878     // The global finger always points to a heap region boundary. We
2879     // use heap_region_containing_raw() to get the containing region
2880     // given that the global finger could be pointing to a free region
2881     // which subsequently becomes continues humongous. If that
2882     // happens, heap_region_containing() will return the bottom of the
2883     // corresponding starts humongous region and the check below will
2884     // not hold any more.
2885     // Since we always iterate over all regions, we might get a NULL HeapRegion
2886     // here.
2887     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2888     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2889               err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2890                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2891   }
2892 
2893   // Verify the task fingers
2894   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2895   for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2896     CMTask* task = _tasks[i];
2897     HeapWord* task_finger = task->finger();
2898     if (task_finger != NULL && task_finger < _heap_end) {
2899       // See above note on the global finger verification.
2900       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2901       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2902                 !task_hr->in_collection_set(),
2903                 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2904                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2905     }
2906   }
2907 }
2908 #endif // PRODUCT
2909 
2910 // Aggregate the counting data that was constructed concurrently
2911 // with marking.
2912 class AggregateCountDataHRClosure: public HeapRegionClosure {
2913   G1CollectedHeap* _g1h;
2914   ConcurrentMark* _cm;
2915   CardTableModRefBS* _ct_bs;
2916   BitMap* _cm_card_bm;
2917   uint _max_worker_id;
2918 
2919  public:
2920   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2921                               BitMap* cm_card_bm,
2922                               uint max_worker_id) :
2923     _g1h(g1h), _cm(g1h->concurrent_mark()),
2924     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2925     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2926 
2927   bool doHeapRegion(HeapRegion* hr) {
2928     if (hr->is_continues_humongous()) {
2929       // We will ignore these here and process them when their
2930       // associated "starts humongous" region is processed.
2931       // Note that we cannot rely on their associated
2932       // "starts humongous" region to have their bit set to 1
2933       // since, due to the region chunking in the parallel region
2934       // iteration, a "continues humongous" region might be visited
2935       // before its associated "starts humongous".
2936       return false;
2937     }
2938 
2939     HeapWord* start = hr->bottom();
2940     HeapWord* limit = hr->next_top_at_mark_start();
2941     HeapWord* end = hr->end();
2942 
2943     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2944            err_msg("Preconditions not met - "
2945                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2946                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2947                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2948 
2949     assert(hr->next_marked_bytes() == 0, "Precondition");
2950 
2951     if (start == limit) {
2952       // NTAMS of this region has not been set so nothing to do.
2953       return false;
2954     }
2955 
2956     // 'start' should be in the heap.
2957     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2958     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2959     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2960 
2961     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2962     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2963     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2964 
2965     // If ntams is not card aligned then we bump card bitmap index
2966     // for limit so that we get the all the cards spanned by
2967     // the object ending at ntams.
2968     // Note: if this is the last region in the heap then ntams
2969     // could be actually just beyond the end of the the heap;
2970     // limit_idx will then  correspond to a (non-existent) card
2971     // that is also outside the heap.
2972     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2973       limit_idx += 1;
2974     }
2975 
2976     assert(limit_idx <= end_idx, "or else use atomics");
2977 
2978     // Aggregate the "stripe" in the count data associated with hr.
2979     uint hrm_index = hr->hrm_index();
2980     size_t marked_bytes = 0;
2981 
2982     for (uint i = 0; i < _max_worker_id; i += 1) {
2983       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2984       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2985 
2986       // Fetch the marked_bytes in this region for task i and
2987       // add it to the running total for this region.
2988       marked_bytes += marked_bytes_array[hrm_index];
2989 
2990       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2991       // into the global card bitmap.
2992       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2993 
2994       while (scan_idx < limit_idx) {
2995         assert(task_card_bm->at(scan_idx) == true, "should be");
2996         _cm_card_bm->set_bit(scan_idx);
2997         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2998 
2999         // BitMap::get_next_one_offset() can handle the case when
3000         // its left_offset parameter is greater than its right_offset
3001         // parameter. It does, however, have an early exit if
3002         // left_offset == right_offset. So let's limit the value
3003         // passed in for left offset here.
3004         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3005         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3006       }
3007     }
3008 
3009     // Update the marked bytes for this region.
3010     hr->add_to_marked_bytes(marked_bytes);
3011 
3012     // Next heap region
3013     return false;
3014   }
3015 };
3016 
3017 class G1AggregateCountDataTask: public AbstractGangTask {
3018 protected:
3019   G1CollectedHeap* _g1h;
3020   ConcurrentMark* _cm;
3021   BitMap* _cm_card_bm;
3022   uint _max_worker_id;
3023   uint _active_workers;
3024   HeapRegionClaimer _hrclaimer;
3025 
3026 public:
3027   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3028                            ConcurrentMark* cm,
3029                            BitMap* cm_card_bm,
3030                            uint max_worker_id,
3031                            uint n_workers) :
3032       AbstractGangTask("Count Aggregation"),
3033       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3034       _max_worker_id(max_worker_id),
3035       _active_workers(n_workers),
3036       _hrclaimer(_active_workers) {
3037   }
3038 
3039   void work(uint worker_id) {
3040     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3041 
3042     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
3043   }
3044 };
3045 
3046 
3047 void ConcurrentMark::aggregate_count_data() {
3048   uint n_workers = _g1h->workers()->active_workers();
3049 
3050   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3051                                            _max_worker_id, n_workers);
3052 
3053   _g1h->set_par_threads(n_workers);
3054   _g1h->workers()->run_task(&g1_par_agg_task);
3055   _g1h->set_par_threads(0);
3056 }
3057 
3058 // Clear the per-worker arrays used to store the per-region counting data
3059 void ConcurrentMark::clear_all_count_data() {
3060   // Clear the global card bitmap - it will be filled during
3061   // liveness count aggregation (during remark) and the
3062   // final counting task.
3063   _card_bm.clear();
3064 
3065   // Clear the global region bitmap - it will be filled as part
3066   // of the final counting task.
3067   _region_bm.clear();
3068 
3069   uint max_regions = _g1h->max_regions();
3070   assert(_max_worker_id > 0, "uninitialized");
3071 
3072   for (uint i = 0; i < _max_worker_id; i += 1) {
3073     BitMap* task_card_bm = count_card_bitmap_for(i);
3074     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3075 
3076     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3077     assert(marked_bytes_array != NULL, "uninitialized");
3078 
3079     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3080     task_card_bm->clear();
3081   }
3082 }
3083 
3084 void ConcurrentMark::print_stats() {
3085   if (verbose_stats()) {
3086     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3087     for (size_t i = 0; i < _active_tasks; ++i) {
3088       _tasks[i]->print_stats();
3089       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3090     }
3091   }
3092 }
3093 
3094 // abandon current marking iteration due to a Full GC
3095 void ConcurrentMark::abort() {
3096   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3097   // concurrent bitmap clearing.
3098   _nextMarkBitMap->clearAll();
3099 
3100   // Note we cannot clear the previous marking bitmap here
3101   // since VerifyDuringGC verifies the objects marked during
3102   // a full GC against the previous bitmap.
3103 
3104   // Clear the liveness counting data
3105   clear_all_count_data();
3106   // Empty mark stack
3107   reset_marking_state();
3108   for (uint i = 0; i < _max_worker_id; ++i) {
3109     _tasks[i]->clear_region_fields();
3110   }
3111   _first_overflow_barrier_sync.abort();
3112   _second_overflow_barrier_sync.abort();
3113   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3114   if (!gc_id.is_undefined()) {
3115     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3116     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3117     _aborted_gc_id = gc_id;
3118    }
3119   _has_aborted = true;
3120 
3121   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3122   satb_mq_set.abandon_partial_marking();
3123   // This can be called either during or outside marking, we'll read
3124   // the expected_active value from the SATB queue set.
3125   satb_mq_set.set_active_all_threads(
3126                                  false, /* new active value */
3127                                  satb_mq_set.is_active() /* expected_active */);
3128 
3129   _g1h->trace_heap_after_concurrent_cycle();
3130   _g1h->register_concurrent_cycle_end();
3131 }
3132 
3133 const GCId& ConcurrentMark::concurrent_gc_id() {
3134   if (has_aborted()) {
3135     return _aborted_gc_id;
3136   }
3137   return _g1h->gc_tracer_cm()->gc_id();
3138 }
3139 
3140 static void print_ms_time_info(const char* prefix, const char* name,
3141                                NumberSeq& ns) {
3142   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3143                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3144   if (ns.num() > 0) {
3145     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3146                            prefix, ns.sd(), ns.maximum());
3147   }
3148 }
3149 
3150 void ConcurrentMark::print_summary_info() {
3151   gclog_or_tty->print_cr(" Concurrent marking:");
3152   print_ms_time_info("  ", "init marks", _init_times);
3153   print_ms_time_info("  ", "remarks", _remark_times);
3154   {
3155     print_ms_time_info("     ", "final marks", _remark_mark_times);
3156     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3157 
3158   }
3159   print_ms_time_info("  ", "cleanups", _cleanup_times);
3160   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3161                          _total_counting_time,
3162                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3163                           (double)_cleanup_times.num()
3164                          : 0.0));
3165   if (G1ScrubRemSets) {
3166     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3167                            _total_rs_scrub_time,
3168                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3169                             (double)_cleanup_times.num()
3170                            : 0.0));
3171   }
3172   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3173                          (_init_times.sum() + _remark_times.sum() +
3174                           _cleanup_times.sum())/1000.0);
3175   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3176                 "(%8.2f s marking).",
3177                 cmThread()->vtime_accum(),
3178                 cmThread()->vtime_mark_accum());
3179 }
3180 
3181 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3182   _parallel_workers->print_worker_threads_on(st);
3183 }
3184 
3185 void ConcurrentMark::print_on_error(outputStream* st) const {
3186   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3187       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3188   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3189   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3190 }
3191 
3192 // We take a break if someone is trying to stop the world.
3193 bool ConcurrentMark::do_yield_check(uint worker_id) {
3194   if (SuspendibleThreadSet::should_yield()) {
3195     if (worker_id == 0) {
3196       _g1h->g1_policy()->record_concurrent_pause();
3197     }
3198     SuspendibleThreadSet::yield();
3199     return true;
3200   } else {
3201     return false;
3202   }
3203 }
3204 
3205 #ifndef PRODUCT
3206 // for debugging purposes
3207 void ConcurrentMark::print_finger() {
3208   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3209                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3210   for (uint i = 0; i < _max_worker_id; ++i) {
3211     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3212   }
3213   gclog_or_tty->cr();
3214 }
3215 #endif
3216 
3217 template<bool scan>
3218 inline void CMTask::process_grey_object(oop obj) {
3219   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3220   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3221 
3222   if (_cm->verbose_high()) {
3223     gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3224                            _worker_id, p2i((void*) obj));
3225   }
3226 
3227   size_t obj_size = obj->size();
3228   _words_scanned += obj_size;
3229 
3230   if (scan) {
3231     obj->oop_iterate(_cm_oop_closure);
3232   }
3233   statsOnly( ++_objs_scanned );
3234   check_limits();
3235 }
3236 
3237 template void CMTask::process_grey_object<true>(oop);
3238 template void CMTask::process_grey_object<false>(oop);
3239 
3240 // Closure for iteration over bitmaps
3241 class CMBitMapClosure : public BitMapClosure {
3242 private:
3243   // the bitmap that is being iterated over
3244   CMBitMap*                   _nextMarkBitMap;
3245   ConcurrentMark*             _cm;
3246   CMTask*                     _task;
3247 
3248 public:
3249   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3250     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3251 
3252   bool do_bit(size_t offset) {
3253     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3254     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3255     assert( addr < _cm->finger(), "invariant");
3256 
3257     statsOnly( _task->increase_objs_found_on_bitmap() );
3258     assert(addr >= _task->finger(), "invariant");
3259 
3260     // We move that task's local finger along.
3261     _task->move_finger_to(addr);
3262 
3263     _task->scan_object(oop(addr));
3264     // we only partially drain the local queue and global stack
3265     _task->drain_local_queue(true);
3266     _task->drain_global_stack(true);
3267 
3268     // if the has_aborted flag has been raised, we need to bail out of
3269     // the iteration
3270     return !_task->has_aborted();
3271   }
3272 };
3273 
3274 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3275                                ConcurrentMark* cm,
3276                                CMTask* task)
3277   : _g1h(g1h), _cm(cm), _task(task) {
3278   assert(_ref_processor == NULL, "should be initialized to NULL");
3279 
3280   if (G1UseConcMarkReferenceProcessing) {
3281     _ref_processor = g1h->ref_processor_cm();
3282     assert(_ref_processor != NULL, "should not be NULL");
3283   }
3284 }
3285 
3286 void CMTask::setup_for_region(HeapRegion* hr) {
3287   assert(hr != NULL,
3288         "claim_region() should have filtered out NULL regions");
3289   assert(!hr->is_continues_humongous(),
3290         "claim_region() should have filtered out continues humongous regions");
3291 
3292   if (_cm->verbose_low()) {
3293     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3294                            _worker_id, p2i(hr));
3295   }
3296 
3297   _curr_region  = hr;
3298   _finger       = hr->bottom();
3299   update_region_limit();
3300 }
3301 
3302 void CMTask::update_region_limit() {
3303   HeapRegion* hr            = _curr_region;
3304   HeapWord* bottom          = hr->bottom();
3305   HeapWord* limit           = hr->next_top_at_mark_start();
3306 
3307   if (limit == bottom) {
3308     if (_cm->verbose_low()) {
3309       gclog_or_tty->print_cr("[%u] found an empty region "
3310                              "["PTR_FORMAT", "PTR_FORMAT")",
3311                              _worker_id, p2i(bottom), p2i(limit));
3312     }
3313     // The region was collected underneath our feet.
3314     // We set the finger to bottom to ensure that the bitmap
3315     // iteration that will follow this will not do anything.
3316     // (this is not a condition that holds when we set the region up,
3317     // as the region is not supposed to be empty in the first place)
3318     _finger = bottom;
3319   } else if (limit >= _region_limit) {
3320     assert(limit >= _finger, "peace of mind");
3321   } else {
3322     assert(limit < _region_limit, "only way to get here");
3323     // This can happen under some pretty unusual circumstances.  An
3324     // evacuation pause empties the region underneath our feet (NTAMS
3325     // at bottom). We then do some allocation in the region (NTAMS
3326     // stays at bottom), followed by the region being used as a GC
3327     // alloc region (NTAMS will move to top() and the objects
3328     // originally below it will be grayed). All objects now marked in
3329     // the region are explicitly grayed, if below the global finger,
3330     // and we do not need in fact to scan anything else. So, we simply
3331     // set _finger to be limit to ensure that the bitmap iteration
3332     // doesn't do anything.
3333     _finger = limit;
3334   }
3335 
3336   _region_limit = limit;
3337 }
3338 
3339 void CMTask::giveup_current_region() {
3340   assert(_curr_region != NULL, "invariant");
3341   if (_cm->verbose_low()) {
3342     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3343                            _worker_id, p2i(_curr_region));
3344   }
3345   clear_region_fields();
3346 }
3347 
3348 void CMTask::clear_region_fields() {
3349   // Values for these three fields that indicate that we're not
3350   // holding on to a region.
3351   _curr_region   = NULL;
3352   _finger        = NULL;
3353   _region_limit  = NULL;
3354 }
3355 
3356 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3357   if (cm_oop_closure == NULL) {
3358     assert(_cm_oop_closure != NULL, "invariant");
3359   } else {
3360     assert(_cm_oop_closure == NULL, "invariant");
3361   }
3362   _cm_oop_closure = cm_oop_closure;
3363 }
3364 
3365 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3366   guarantee(nextMarkBitMap != NULL, "invariant");
3367 
3368   if (_cm->verbose_low()) {
3369     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3370   }
3371 
3372   _nextMarkBitMap                = nextMarkBitMap;
3373   clear_region_fields();
3374 
3375   _calls                         = 0;
3376   _elapsed_time_ms               = 0.0;
3377   _termination_time_ms           = 0.0;
3378   _termination_start_time_ms     = 0.0;
3379 
3380 #if _MARKING_STATS_
3381   _aborted                       = 0;
3382   _aborted_overflow              = 0;
3383   _aborted_cm_aborted            = 0;
3384   _aborted_yield                 = 0;
3385   _aborted_timed_out             = 0;
3386   _aborted_satb                  = 0;
3387   _aborted_termination           = 0;
3388   _steal_attempts                = 0;
3389   _steals                        = 0;
3390   _local_pushes                  = 0;
3391   _local_pops                    = 0;
3392   _local_max_size                = 0;
3393   _objs_scanned                  = 0;
3394   _global_pushes                 = 0;
3395   _global_pops                   = 0;
3396   _global_max_size               = 0;
3397   _global_transfers_to           = 0;
3398   _global_transfers_from         = 0;
3399   _regions_claimed               = 0;
3400   _objs_found_on_bitmap          = 0;
3401   _satb_buffers_processed        = 0;
3402 #endif // _MARKING_STATS_
3403 }
3404 
3405 bool CMTask::should_exit_termination() {
3406   regular_clock_call();
3407   // This is called when we are in the termination protocol. We should
3408   // quit if, for some reason, this task wants to abort or the global
3409   // stack is not empty (this means that we can get work from it).
3410   return !_cm->mark_stack_empty() || has_aborted();
3411 }
3412 
3413 void CMTask::reached_limit() {
3414   assert(_words_scanned >= _words_scanned_limit ||
3415          _refs_reached >= _refs_reached_limit ,
3416          "shouldn't have been called otherwise");
3417   regular_clock_call();
3418 }
3419 
3420 void CMTask::regular_clock_call() {
3421   if (has_aborted()) return;
3422 
3423   // First, we need to recalculate the words scanned and refs reached
3424   // limits for the next clock call.
3425   recalculate_limits();
3426 
3427   // During the regular clock call we do the following
3428 
3429   // (1) If an overflow has been flagged, then we abort.
3430   if (_cm->has_overflown()) {
3431     set_has_aborted();
3432     return;
3433   }
3434 
3435   // If we are not concurrent (i.e. we're doing remark) we don't need
3436   // to check anything else. The other steps are only needed during
3437   // the concurrent marking phase.
3438   if (!concurrent()) return;
3439 
3440   // (2) If marking has been aborted for Full GC, then we also abort.
3441   if (_cm->has_aborted()) {
3442     set_has_aborted();
3443     statsOnly( ++_aborted_cm_aborted );
3444     return;
3445   }
3446 
3447   double curr_time_ms = os::elapsedVTime() * 1000.0;
3448 
3449   // (3) If marking stats are enabled, then we update the step history.
3450 #if _MARKING_STATS_
3451   if (_words_scanned >= _words_scanned_limit) {
3452     ++_clock_due_to_scanning;
3453   }
3454   if (_refs_reached >= _refs_reached_limit) {
3455     ++_clock_due_to_marking;
3456   }
3457 
3458   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3459   _interval_start_time_ms = curr_time_ms;
3460   _all_clock_intervals_ms.add(last_interval_ms);
3461 
3462   if (_cm->verbose_medium()) {
3463       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3464                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3465                         _worker_id, last_interval_ms,
3466                         _words_scanned,
3467                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3468                         _refs_reached,
3469                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3470   }
3471 #endif // _MARKING_STATS_
3472 
3473   // (4) We check whether we should yield. If we have to, then we abort.
3474   if (SuspendibleThreadSet::should_yield()) {
3475     // We should yield. To do this we abort the task. The caller is
3476     // responsible for yielding.
3477     set_has_aborted();
3478     statsOnly( ++_aborted_yield );
3479     return;
3480   }
3481 
3482   // (5) We check whether we've reached our time quota. If we have,
3483   // then we abort.
3484   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3485   if (elapsed_time_ms > _time_target_ms) {
3486     set_has_aborted();
3487     _has_timed_out = true;
3488     statsOnly( ++_aborted_timed_out );
3489     return;
3490   }
3491 
3492   // (6) Finally, we check whether there are enough completed STAB
3493   // buffers available for processing. If there are, we abort.
3494   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3495   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3496     if (_cm->verbose_low()) {
3497       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3498                              _worker_id);
3499     }
3500     // we do need to process SATB buffers, we'll abort and restart
3501     // the marking task to do so
3502     set_has_aborted();
3503     statsOnly( ++_aborted_satb );
3504     return;
3505   }
3506 }
3507 
3508 void CMTask::recalculate_limits() {
3509   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3510   _words_scanned_limit      = _real_words_scanned_limit;
3511 
3512   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3513   _refs_reached_limit       = _real_refs_reached_limit;
3514 }
3515 
3516 void CMTask::decrease_limits() {
3517   // This is called when we believe that we're going to do an infrequent
3518   // operation which will increase the per byte scanned cost (i.e. move
3519   // entries to/from the global stack). It basically tries to decrease the
3520   // scanning limit so that the clock is called earlier.
3521 
3522   if (_cm->verbose_medium()) {
3523     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3524   }
3525 
3526   _words_scanned_limit = _real_words_scanned_limit -
3527     3 * words_scanned_period / 4;
3528   _refs_reached_limit  = _real_refs_reached_limit -
3529     3 * refs_reached_period / 4;
3530 }
3531 
3532 void CMTask::move_entries_to_global_stack() {
3533   // local array where we'll store the entries that will be popped
3534   // from the local queue
3535   oop buffer[global_stack_transfer_size];
3536 
3537   int n = 0;
3538   oop obj;
3539   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3540     buffer[n] = obj;
3541     ++n;
3542   }
3543 
3544   if (n > 0) {
3545     // we popped at least one entry from the local queue
3546 
3547     statsOnly( ++_global_transfers_to; _local_pops += n );
3548 
3549     if (!_cm->mark_stack_push(buffer, n)) {
3550       if (_cm->verbose_low()) {
3551         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3552                                _worker_id);
3553       }
3554       set_has_aborted();
3555     } else {
3556       // the transfer was successful
3557 
3558       if (_cm->verbose_medium()) {
3559         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3560                                _worker_id, n);
3561       }
3562       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3563                  if (tmp_size > _global_max_size) {
3564                    _global_max_size = tmp_size;
3565                  }
3566                  _global_pushes += n );
3567     }
3568   }
3569 
3570   // this operation was quite expensive, so decrease the limits
3571   decrease_limits();
3572 }
3573 
3574 void CMTask::get_entries_from_global_stack() {
3575   // local array where we'll store the entries that will be popped
3576   // from the global stack.
3577   oop buffer[global_stack_transfer_size];
3578   int n;
3579   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3580   assert(n <= global_stack_transfer_size,
3581          "we should not pop more than the given limit");
3582   if (n > 0) {
3583     // yes, we did actually pop at least one entry
3584 
3585     statsOnly( ++_global_transfers_from; _global_pops += n );
3586     if (_cm->verbose_medium()) {
3587       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3588                              _worker_id, n);
3589     }
3590     for (int i = 0; i < n; ++i) {
3591       bool success = _task_queue->push(buffer[i]);
3592       // We only call this when the local queue is empty or under a
3593       // given target limit. So, we do not expect this push to fail.
3594       assert(success, "invariant");
3595     }
3596 
3597     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3598                if (tmp_size > _local_max_size) {
3599                  _local_max_size = tmp_size;
3600                }
3601                _local_pushes += n );
3602   }
3603 
3604   // this operation was quite expensive, so decrease the limits
3605   decrease_limits();
3606 }
3607 
3608 void CMTask::drain_local_queue(bool partially) {
3609   if (has_aborted()) return;
3610 
3611   // Decide what the target size is, depending whether we're going to
3612   // drain it partially (so that other tasks can steal if they run out
3613   // of things to do) or totally (at the very end).
3614   size_t target_size;
3615   if (partially) {
3616     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3617   } else {
3618     target_size = 0;
3619   }
3620 
3621   if (_task_queue->size() > target_size) {
3622     if (_cm->verbose_high()) {
3623       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3624                              _worker_id, target_size);
3625     }
3626 
3627     oop obj;
3628     bool ret = _task_queue->pop_local(obj);
3629     while (ret) {
3630       statsOnly( ++_local_pops );
3631 
3632       if (_cm->verbose_high()) {
3633         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3634                                p2i((void*) obj));
3635       }
3636 
3637       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3638       assert(!_g1h->is_on_master_free_list(
3639                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3640 
3641       scan_object(obj);
3642 
3643       if (_task_queue->size() <= target_size || has_aborted()) {
3644         ret = false;
3645       } else {
3646         ret = _task_queue->pop_local(obj);
3647       }
3648     }
3649 
3650     if (_cm->verbose_high()) {
3651       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3652                              _worker_id, _task_queue->size());
3653     }
3654   }
3655 }
3656 
3657 void CMTask::drain_global_stack(bool partially) {
3658   if (has_aborted()) return;
3659 
3660   // We have a policy to drain the local queue before we attempt to
3661   // drain the global stack.
3662   assert(partially || _task_queue->size() == 0, "invariant");
3663 
3664   // Decide what the target size is, depending whether we're going to
3665   // drain it partially (so that other tasks can steal if they run out
3666   // of things to do) or totally (at the very end).  Notice that,
3667   // because we move entries from the global stack in chunks or
3668   // because another task might be doing the same, we might in fact
3669   // drop below the target. But, this is not a problem.
3670   size_t target_size;
3671   if (partially) {
3672     target_size = _cm->partial_mark_stack_size_target();
3673   } else {
3674     target_size = 0;
3675   }
3676 
3677   if (_cm->mark_stack_size() > target_size) {
3678     if (_cm->verbose_low()) {
3679       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3680                              _worker_id, target_size);
3681     }
3682 
3683     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3684       get_entries_from_global_stack();
3685       drain_local_queue(partially);
3686     }
3687 
3688     if (_cm->verbose_low()) {
3689       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3690                              _worker_id, _cm->mark_stack_size());
3691     }
3692   }
3693 }
3694 
3695 // SATB Queue has several assumptions on whether to call the par or
3696 // non-par versions of the methods. this is why some of the code is
3697 // replicated. We should really get rid of the single-threaded version
3698 // of the code to simplify things.
3699 void CMTask::drain_satb_buffers() {
3700   if (has_aborted()) return;
3701 
3702   // We set this so that the regular clock knows that we're in the
3703   // middle of draining buffers and doesn't set the abort flag when it
3704   // notices that SATB buffers are available for draining. It'd be
3705   // very counter productive if it did that. :-)
3706   _draining_satb_buffers = true;
3707 
3708   CMSATBBufferClosure satb_cl(this, _g1h);
3709   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3710 
3711   // This keeps claiming and applying the closure to completed buffers
3712   // until we run out of buffers or we need to abort.
3713   while (!has_aborted() &&
3714          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3715     if (_cm->verbose_medium()) {
3716       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3717     }
3718     statsOnly( ++_satb_buffers_processed );
3719     regular_clock_call();
3720   }
3721 
3722   _draining_satb_buffers = false;
3723 
3724   assert(has_aborted() ||
3725          concurrent() ||
3726          satb_mq_set.completed_buffers_num() == 0, "invariant");
3727 
3728   // again, this was a potentially expensive operation, decrease the
3729   // limits to get the regular clock call early
3730   decrease_limits();
3731 }
3732 
3733 void CMTask::print_stats() {
3734   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3735                          _worker_id, _calls);
3736   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3737                          _elapsed_time_ms, _termination_time_ms);
3738   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3739                          _step_times_ms.num(), _step_times_ms.avg(),
3740                          _step_times_ms.sd());
3741   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3742                          _step_times_ms.maximum(), _step_times_ms.sum());
3743 
3744 #if _MARKING_STATS_
3745   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3746                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3747                          _all_clock_intervals_ms.sd());
3748   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3749                          _all_clock_intervals_ms.maximum(),
3750                          _all_clock_intervals_ms.sum());
3751   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3752                          _clock_due_to_scanning, _clock_due_to_marking);
3753   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3754                          _objs_scanned, _objs_found_on_bitmap);
3755   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3756                          _local_pushes, _local_pops, _local_max_size);
3757   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3758                          _global_pushes, _global_pops, _global_max_size);
3759   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3760                          _global_transfers_to,_global_transfers_from);
3761   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3762   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3763   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3764                          _steal_attempts, _steals);
3765   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3766   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3767                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3768   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3769                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3770 #endif // _MARKING_STATS_
3771 }
3772 
3773 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3774   return _task_queues->steal(worker_id, hash_seed, obj);
3775 }
3776 
3777 /*****************************************************************************
3778 
3779     The do_marking_step(time_target_ms, ...) method is the building
3780     block of the parallel marking framework. It can be called in parallel
3781     with other invocations of do_marking_step() on different tasks
3782     (but only one per task, obviously) and concurrently with the
3783     mutator threads, or during remark, hence it eliminates the need
3784     for two versions of the code. When called during remark, it will
3785     pick up from where the task left off during the concurrent marking
3786     phase. Interestingly, tasks are also claimable during evacuation
3787     pauses too, since do_marking_step() ensures that it aborts before
3788     it needs to yield.
3789 
3790     The data structures that it uses to do marking work are the
3791     following:
3792 
3793       (1) Marking Bitmap. If there are gray objects that appear only
3794       on the bitmap (this happens either when dealing with an overflow
3795       or when the initial marking phase has simply marked the roots
3796       and didn't push them on the stack), then tasks claim heap
3797       regions whose bitmap they then scan to find gray objects. A
3798       global finger indicates where the end of the last claimed region
3799       is. A local finger indicates how far into the region a task has
3800       scanned. The two fingers are used to determine how to gray an
3801       object (i.e. whether simply marking it is OK, as it will be
3802       visited by a task in the future, or whether it needs to be also
3803       pushed on a stack).
3804 
3805       (2) Local Queue. The local queue of the task which is accessed
3806       reasonably efficiently by the task. Other tasks can steal from
3807       it when they run out of work. Throughout the marking phase, a
3808       task attempts to keep its local queue short but not totally
3809       empty, so that entries are available for stealing by other
3810       tasks. Only when there is no more work, a task will totally
3811       drain its local queue.
3812 
3813       (3) Global Mark Stack. This handles local queue overflow. During
3814       marking only sets of entries are moved between it and the local
3815       queues, as access to it requires a mutex and more fine-grain
3816       interaction with it which might cause contention. If it
3817       overflows, then the marking phase should restart and iterate
3818       over the bitmap to identify gray objects. Throughout the marking
3819       phase, tasks attempt to keep the global mark stack at a small
3820       length but not totally empty, so that entries are available for
3821       popping by other tasks. Only when there is no more work, tasks
3822       will totally drain the global mark stack.
3823 
3824       (4) SATB Buffer Queue. This is where completed SATB buffers are
3825       made available. Buffers are regularly removed from this queue
3826       and scanned for roots, so that the queue doesn't get too
3827       long. During remark, all completed buffers are processed, as
3828       well as the filled in parts of any uncompleted buffers.
3829 
3830     The do_marking_step() method tries to abort when the time target
3831     has been reached. There are a few other cases when the
3832     do_marking_step() method also aborts:
3833 
3834       (1) When the marking phase has been aborted (after a Full GC).
3835 
3836       (2) When a global overflow (on the global stack) has been
3837       triggered. Before the task aborts, it will actually sync up with
3838       the other tasks to ensure that all the marking data structures
3839       (local queues, stacks, fingers etc.)  are re-initialized so that
3840       when do_marking_step() completes, the marking phase can
3841       immediately restart.
3842 
3843       (3) When enough completed SATB buffers are available. The
3844       do_marking_step() method only tries to drain SATB buffers right
3845       at the beginning. So, if enough buffers are available, the
3846       marking step aborts and the SATB buffers are processed at
3847       the beginning of the next invocation.
3848 
3849       (4) To yield. when we have to yield then we abort and yield
3850       right at the end of do_marking_step(). This saves us from a lot
3851       of hassle as, by yielding we might allow a Full GC. If this
3852       happens then objects will be compacted underneath our feet, the
3853       heap might shrink, etc. We save checking for this by just
3854       aborting and doing the yield right at the end.
3855 
3856     From the above it follows that the do_marking_step() method should
3857     be called in a loop (or, otherwise, regularly) until it completes.
3858 
3859     If a marking step completes without its has_aborted() flag being
3860     true, it means it has completed the current marking phase (and
3861     also all other marking tasks have done so and have all synced up).
3862 
3863     A method called regular_clock_call() is invoked "regularly" (in
3864     sub ms intervals) throughout marking. It is this clock method that
3865     checks all the abort conditions which were mentioned above and
3866     decides when the task should abort. A work-based scheme is used to
3867     trigger this clock method: when the number of object words the
3868     marking phase has scanned or the number of references the marking
3869     phase has visited reach a given limit. Additional invocations to
3870     the method clock have been planted in a few other strategic places
3871     too. The initial reason for the clock method was to avoid calling
3872     vtime too regularly, as it is quite expensive. So, once it was in
3873     place, it was natural to piggy-back all the other conditions on it
3874     too and not constantly check them throughout the code.
3875 
3876     If do_termination is true then do_marking_step will enter its
3877     termination protocol.
3878 
3879     The value of is_serial must be true when do_marking_step is being
3880     called serially (i.e. by the VMThread) and do_marking_step should
3881     skip any synchronization in the termination and overflow code.
3882     Examples include the serial remark code and the serial reference
3883     processing closures.
3884 
3885     The value of is_serial must be false when do_marking_step is
3886     being called by any of the worker threads in a work gang.
3887     Examples include the concurrent marking code (CMMarkingTask),
3888     the MT remark code, and the MT reference processing closures.
3889 
3890  *****************************************************************************/
3891 
3892 void CMTask::do_marking_step(double time_target_ms,
3893                              bool do_termination,
3894                              bool is_serial) {
3895   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3896   assert(concurrent() == _cm->concurrent(), "they should be the same");
3897 
3898   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3899   assert(_task_queues != NULL, "invariant");
3900   assert(_task_queue != NULL, "invariant");
3901   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3902 
3903   assert(!_claimed,
3904          "only one thread should claim this task at any one time");
3905 
3906   // OK, this doesn't safeguard again all possible scenarios, as it is
3907   // possible for two threads to set the _claimed flag at the same
3908   // time. But it is only for debugging purposes anyway and it will
3909   // catch most problems.
3910   _claimed = true;
3911 
3912   _start_time_ms = os::elapsedVTime() * 1000.0;
3913   statsOnly( _interval_start_time_ms = _start_time_ms );
3914 
3915   // If do_stealing is true then do_marking_step will attempt to
3916   // steal work from the other CMTasks. It only makes sense to
3917   // enable stealing when the termination protocol is enabled
3918   // and do_marking_step() is not being called serially.
3919   bool do_stealing = do_termination && !is_serial;
3920 
3921   double diff_prediction_ms =
3922     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3923   _time_target_ms = time_target_ms - diff_prediction_ms;
3924 
3925   // set up the variables that are used in the work-based scheme to
3926   // call the regular clock method
3927   _words_scanned = 0;
3928   _refs_reached  = 0;
3929   recalculate_limits();
3930 
3931   // clear all flags
3932   clear_has_aborted();
3933   _has_timed_out = false;
3934   _draining_satb_buffers = false;
3935 
3936   ++_calls;
3937 
3938   if (_cm->verbose_low()) {
3939     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3940                            "target = %1.2lfms >>>>>>>>>>",
3941                            _worker_id, _calls, _time_target_ms);
3942   }
3943 
3944   // Set up the bitmap and oop closures. Anything that uses them is
3945   // eventually called from this method, so it is OK to allocate these
3946   // statically.
3947   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3948   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3949   set_cm_oop_closure(&cm_oop_closure);
3950 
3951   if (_cm->has_overflown()) {
3952     // This can happen if the mark stack overflows during a GC pause
3953     // and this task, after a yield point, restarts. We have to abort
3954     // as we need to get into the overflow protocol which happens
3955     // right at the end of this task.
3956     set_has_aborted();
3957   }
3958 
3959   // First drain any available SATB buffers. After this, we will not
3960   // look at SATB buffers before the next invocation of this method.
3961   // If enough completed SATB buffers are queued up, the regular clock
3962   // will abort this task so that it restarts.
3963   drain_satb_buffers();
3964   // ...then partially drain the local queue and the global stack
3965   drain_local_queue(true);
3966   drain_global_stack(true);
3967 
3968   do {
3969     if (!has_aborted() && _curr_region != NULL) {
3970       // This means that we're already holding on to a region.
3971       assert(_finger != NULL, "if region is not NULL, then the finger "
3972              "should not be NULL either");
3973 
3974       // We might have restarted this task after an evacuation pause
3975       // which might have evacuated the region we're holding on to
3976       // underneath our feet. Let's read its limit again to make sure
3977       // that we do not iterate over a region of the heap that
3978       // contains garbage (update_region_limit() will also move
3979       // _finger to the start of the region if it is found empty).
3980       update_region_limit();
3981       // We will start from _finger not from the start of the region,
3982       // as we might be restarting this task after aborting half-way
3983       // through scanning this region. In this case, _finger points to
3984       // the address where we last found a marked object. If this is a
3985       // fresh region, _finger points to start().
3986       MemRegion mr = MemRegion(_finger, _region_limit);
3987 
3988       if (_cm->verbose_low()) {
3989         gclog_or_tty->print_cr("[%u] we're scanning part "
3990                                "["PTR_FORMAT", "PTR_FORMAT") "
3991                                "of region "HR_FORMAT,
3992                                _worker_id, p2i(_finger), p2i(_region_limit),
3993                                HR_FORMAT_PARAMS(_curr_region));
3994       }
3995 
3996       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3997              "humongous regions should go around loop once only");
3998 
3999       // Some special cases:
4000       // If the memory region is empty, we can just give up the region.
4001       // If the current region is humongous then we only need to check
4002       // the bitmap for the bit associated with the start of the object,
4003       // scan the object if it's live, and give up the region.
4004       // Otherwise, let's iterate over the bitmap of the part of the region
4005       // that is left.
4006       // If the iteration is successful, give up the region.
4007       if (mr.is_empty()) {
4008         giveup_current_region();
4009         regular_clock_call();
4010       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
4011         if (_nextMarkBitMap->isMarked(mr.start())) {
4012           // The object is marked - apply the closure
4013           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4014           bitmap_closure.do_bit(offset);
4015         }
4016         // Even if this task aborted while scanning the humongous object
4017         // we can (and should) give up the current region.
4018         giveup_current_region();
4019         regular_clock_call();
4020       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4021         giveup_current_region();
4022         regular_clock_call();
4023       } else {
4024         assert(has_aborted(), "currently the only way to do so");
4025         // The only way to abort the bitmap iteration is to return
4026         // false from the do_bit() method. However, inside the
4027         // do_bit() method we move the _finger to point to the
4028         // object currently being looked at. So, if we bail out, we
4029         // have definitely set _finger to something non-null.
4030         assert(_finger != NULL, "invariant");
4031 
4032         // Region iteration was actually aborted. So now _finger
4033         // points to the address of the object we last scanned. If we
4034         // leave it there, when we restart this task, we will rescan
4035         // the object. It is easy to avoid this. We move the finger by
4036         // enough to point to the next possible object header (the
4037         // bitmap knows by how much we need to move it as it knows its
4038         // granularity).
4039         assert(_finger < _region_limit, "invariant");
4040         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4041         // Check if bitmap iteration was aborted while scanning the last object
4042         if (new_finger >= _region_limit) {
4043           giveup_current_region();
4044         } else {
4045           move_finger_to(new_finger);
4046         }
4047       }
4048     }
4049     // At this point we have either completed iterating over the
4050     // region we were holding on to, or we have aborted.
4051 
4052     // We then partially drain the local queue and the global stack.
4053     // (Do we really need this?)
4054     drain_local_queue(true);
4055     drain_global_stack(true);
4056 
4057     // Read the note on the claim_region() method on why it might
4058     // return NULL with potentially more regions available for
4059     // claiming and why we have to check out_of_regions() to determine
4060     // whether we're done or not.
4061     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4062       // We are going to try to claim a new region. We should have
4063       // given up on the previous one.
4064       // Separated the asserts so that we know which one fires.
4065       assert(_curr_region  == NULL, "invariant");
4066       assert(_finger       == NULL, "invariant");
4067       assert(_region_limit == NULL, "invariant");
4068       if (_cm->verbose_low()) {
4069         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4070       }
4071       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4072       if (claimed_region != NULL) {
4073         // Yes, we managed to claim one
4074         statsOnly( ++_regions_claimed );
4075 
4076         if (_cm->verbose_low()) {
4077           gclog_or_tty->print_cr("[%u] we successfully claimed "
4078                                  "region "PTR_FORMAT,
4079                                  _worker_id, p2i(claimed_region));
4080         }
4081 
4082         setup_for_region(claimed_region);
4083         assert(_curr_region == claimed_region, "invariant");
4084       }
4085       // It is important to call the regular clock here. It might take
4086       // a while to claim a region if, for example, we hit a large
4087       // block of empty regions. So we need to call the regular clock
4088       // method once round the loop to make sure it's called
4089       // frequently enough.
4090       regular_clock_call();
4091     }
4092 
4093     if (!has_aborted() && _curr_region == NULL) {
4094       assert(_cm->out_of_regions(),
4095              "at this point we should be out of regions");
4096     }
4097   } while ( _curr_region != NULL && !has_aborted());
4098 
4099   if (!has_aborted()) {
4100     // We cannot check whether the global stack is empty, since other
4101     // tasks might be pushing objects to it concurrently.
4102     assert(_cm->out_of_regions(),
4103            "at this point we should be out of regions");
4104 
4105     if (_cm->verbose_low()) {
4106       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4107     }
4108 
4109     // Try to reduce the number of available SATB buffers so that
4110     // remark has less work to do.
4111     drain_satb_buffers();
4112   }
4113 
4114   // Since we've done everything else, we can now totally drain the
4115   // local queue and global stack.
4116   drain_local_queue(false);
4117   drain_global_stack(false);
4118 
4119   // Attempt at work stealing from other task's queues.
4120   if (do_stealing && !has_aborted()) {
4121     // We have not aborted. This means that we have finished all that
4122     // we could. Let's try to do some stealing...
4123 
4124     // We cannot check whether the global stack is empty, since other
4125     // tasks might be pushing objects to it concurrently.
4126     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4127            "only way to reach here");
4128 
4129     if (_cm->verbose_low()) {
4130       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4131     }
4132 
4133     while (!has_aborted()) {
4134       oop obj;
4135       statsOnly( ++_steal_attempts );
4136 
4137       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4138         if (_cm->verbose_medium()) {
4139           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4140                                  _worker_id, p2i((void*) obj));
4141         }
4142 
4143         statsOnly( ++_steals );
4144 
4145         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4146                "any stolen object should be marked");
4147         scan_object(obj);
4148 
4149         // And since we're towards the end, let's totally drain the
4150         // local queue and global stack.
4151         drain_local_queue(false);
4152         drain_global_stack(false);
4153       } else {
4154         break;
4155       }
4156     }
4157   }
4158 
4159   // If we are about to wrap up and go into termination, check if we
4160   // should raise the overflow flag.
4161   if (do_termination && !has_aborted()) {
4162     if (_cm->force_overflow()->should_force()) {
4163       _cm->set_has_overflown();
4164       regular_clock_call();
4165     }
4166   }
4167 
4168   // We still haven't aborted. Now, let's try to get into the
4169   // termination protocol.
4170   if (do_termination && !has_aborted()) {
4171     // We cannot check whether the global stack is empty, since other
4172     // tasks might be concurrently pushing objects on it.
4173     // Separated the asserts so that we know which one fires.
4174     assert(_cm->out_of_regions(), "only way to reach here");
4175     assert(_task_queue->size() == 0, "only way to reach here");
4176 
4177     if (_cm->verbose_low()) {
4178       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4179     }
4180 
4181     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4182 
4183     // The CMTask class also extends the TerminatorTerminator class,
4184     // hence its should_exit_termination() method will also decide
4185     // whether to exit the termination protocol or not.
4186     bool finished = (is_serial ||
4187                      _cm->terminator()->offer_termination(this));
4188     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4189     _termination_time_ms +=
4190       termination_end_time_ms - _termination_start_time_ms;
4191 
4192     if (finished) {
4193       // We're all done.
4194 
4195       if (_worker_id == 0) {
4196         // let's allow task 0 to do this
4197         if (concurrent()) {
4198           assert(_cm->concurrent_marking_in_progress(), "invariant");
4199           // we need to set this to false before the next
4200           // safepoint. This way we ensure that the marking phase
4201           // doesn't observe any more heap expansions.
4202           _cm->clear_concurrent_marking_in_progress();
4203         }
4204       }
4205 
4206       // We can now guarantee that the global stack is empty, since
4207       // all other tasks have finished. We separated the guarantees so
4208       // that, if a condition is false, we can immediately find out
4209       // which one.
4210       guarantee(_cm->out_of_regions(), "only way to reach here");
4211       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4212       guarantee(_task_queue->size() == 0, "only way to reach here");
4213       guarantee(!_cm->has_overflown(), "only way to reach here");
4214       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4215 
4216       if (_cm->verbose_low()) {
4217         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4218       }
4219     } else {
4220       // Apparently there's more work to do. Let's abort this task. It
4221       // will restart it and we can hopefully find more things to do.
4222 
4223       if (_cm->verbose_low()) {
4224         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4225                                _worker_id);
4226       }
4227 
4228       set_has_aborted();
4229       statsOnly( ++_aborted_termination );
4230     }
4231   }
4232 
4233   // Mainly for debugging purposes to make sure that a pointer to the
4234   // closure which was statically allocated in this frame doesn't
4235   // escape it by accident.
4236   set_cm_oop_closure(NULL);
4237   double end_time_ms = os::elapsedVTime() * 1000.0;
4238   double elapsed_time_ms = end_time_ms - _start_time_ms;
4239   // Update the step history.
4240   _step_times_ms.add(elapsed_time_ms);
4241 
4242   if (has_aborted()) {
4243     // The task was aborted for some reason.
4244 
4245     statsOnly( ++_aborted );
4246 
4247     if (_has_timed_out) {
4248       double diff_ms = elapsed_time_ms - _time_target_ms;
4249       // Keep statistics of how well we did with respect to hitting
4250       // our target only if we actually timed out (if we aborted for
4251       // other reasons, then the results might get skewed).
4252       _marking_step_diffs_ms.add(diff_ms);
4253     }
4254 
4255     if (_cm->has_overflown()) {
4256       // This is the interesting one. We aborted because a global
4257       // overflow was raised. This means we have to restart the
4258       // marking phase and start iterating over regions. However, in
4259       // order to do this we have to make sure that all tasks stop
4260       // what they are doing and re-initialize in a safe manner. We
4261       // will achieve this with the use of two barrier sync points.
4262 
4263       if (_cm->verbose_low()) {
4264         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4265       }
4266 
4267       if (!is_serial) {
4268         // We only need to enter the sync barrier if being called
4269         // from a parallel context
4270         _cm->enter_first_sync_barrier(_worker_id);
4271 
4272         // When we exit this sync barrier we know that all tasks have
4273         // stopped doing marking work. So, it's now safe to
4274         // re-initialize our data structures. At the end of this method,
4275         // task 0 will clear the global data structures.
4276       }
4277 
4278       statsOnly( ++_aborted_overflow );
4279 
4280       // We clear the local state of this task...
4281       clear_region_fields();
4282 
4283       if (!is_serial) {
4284         // ...and enter the second barrier.
4285         _cm->enter_second_sync_barrier(_worker_id);
4286       }
4287       // At this point, if we're during the concurrent phase of
4288       // marking, everything has been re-initialized and we're
4289       // ready to restart.
4290     }
4291 
4292     if (_cm->verbose_low()) {
4293       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4294                              "elapsed = %1.2lfms <<<<<<<<<<",
4295                              _worker_id, _time_target_ms, elapsed_time_ms);
4296       if (_cm->has_aborted()) {
4297         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4298                                _worker_id);
4299       }
4300     }
4301   } else {
4302     if (_cm->verbose_low()) {
4303       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4304                              "elapsed = %1.2lfms <<<<<<<<<<",
4305                              _worker_id, _time_target_ms, elapsed_time_ms);
4306     }
4307   }
4308 
4309   _claimed = false;
4310 }
4311 
4312 CMTask::CMTask(uint worker_id,
4313                ConcurrentMark* cm,
4314                size_t* marked_bytes,
4315                BitMap* card_bm,
4316                CMTaskQueue* task_queue,
4317                CMTaskQueueSet* task_queues)
4318   : _g1h(G1CollectedHeap::heap()),
4319     _worker_id(worker_id), _cm(cm),
4320     _claimed(false),
4321     _nextMarkBitMap(NULL), _hash_seed(17),
4322     _task_queue(task_queue),
4323     _task_queues(task_queues),
4324     _cm_oop_closure(NULL),
4325     _marked_bytes_array(marked_bytes),
4326     _card_bm(card_bm) {
4327   guarantee(task_queue != NULL, "invariant");
4328   guarantee(task_queues != NULL, "invariant");
4329 
4330   statsOnly( _clock_due_to_scanning = 0;
4331              _clock_due_to_marking  = 0 );
4332 
4333   _marking_step_diffs_ms.add(0.5);
4334 }
4335 
4336 // These are formatting macros that are used below to ensure
4337 // consistent formatting. The *_H_* versions are used to format the
4338 // header for a particular value and they should be kept consistent
4339 // with the corresponding macro. Also note that most of the macros add
4340 // the necessary white space (as a prefix) which makes them a bit
4341 // easier to compose.
4342 
4343 // All the output lines are prefixed with this string to be able to
4344 // identify them easily in a large log file.
4345 #define G1PPRL_LINE_PREFIX            "###"
4346 
4347 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4348 #ifdef _LP64
4349 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4350 #else // _LP64
4351 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4352 #endif // _LP64
4353 
4354 // For per-region info
4355 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4356 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4357 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4358 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4359 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4360 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4361 
4362 // For summary info
4363 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4364 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4365 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4366 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4367 
4368 G1PrintRegionLivenessInfoClosure::
4369 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4370   : _out(out),
4371     _total_used_bytes(0), _total_capacity_bytes(0),
4372     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4373     _hum_used_bytes(0), _hum_capacity_bytes(0),
4374     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4375     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4376   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4377   MemRegion g1_reserved = g1h->g1_reserved();
4378   double now = os::elapsedTime();
4379 
4380   // Print the header of the output.
4381   _out->cr();
4382   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4383   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4384                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4385                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4386                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4387                  HeapRegion::GrainBytes);
4388   _out->print_cr(G1PPRL_LINE_PREFIX);
4389   _out->print_cr(G1PPRL_LINE_PREFIX
4390                 G1PPRL_TYPE_H_FORMAT
4391                 G1PPRL_ADDR_BASE_H_FORMAT
4392                 G1PPRL_BYTE_H_FORMAT
4393                 G1PPRL_BYTE_H_FORMAT
4394                 G1PPRL_BYTE_H_FORMAT
4395                 G1PPRL_DOUBLE_H_FORMAT
4396                 G1PPRL_BYTE_H_FORMAT
4397                 G1PPRL_BYTE_H_FORMAT,
4398                 "type", "address-range",
4399                 "used", "prev-live", "next-live", "gc-eff",
4400                 "remset", "code-roots");
4401   _out->print_cr(G1PPRL_LINE_PREFIX
4402                 G1PPRL_TYPE_H_FORMAT
4403                 G1PPRL_ADDR_BASE_H_FORMAT
4404                 G1PPRL_BYTE_H_FORMAT
4405                 G1PPRL_BYTE_H_FORMAT
4406                 G1PPRL_BYTE_H_FORMAT
4407                 G1PPRL_DOUBLE_H_FORMAT
4408                 G1PPRL_BYTE_H_FORMAT
4409                 G1PPRL_BYTE_H_FORMAT,
4410                 "", "",
4411                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4412                 "(bytes)", "(bytes)");
4413 }
4414 
4415 // It takes as a parameter a reference to one of the _hum_* fields, it
4416 // deduces the corresponding value for a region in a humongous region
4417 // series (either the region size, or what's left if the _hum_* field
4418 // is < the region size), and updates the _hum_* field accordingly.
4419 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4420   size_t bytes = 0;
4421   // The > 0 check is to deal with the prev and next live bytes which
4422   // could be 0.
4423   if (*hum_bytes > 0) {
4424     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4425     *hum_bytes -= bytes;
4426   }
4427   return bytes;
4428 }
4429 
4430 // It deduces the values for a region in a humongous region series
4431 // from the _hum_* fields and updates those accordingly. It assumes
4432 // that that _hum_* fields have already been set up from the "starts
4433 // humongous" region and we visit the regions in address order.
4434 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4435                                                      size_t* capacity_bytes,
4436                                                      size_t* prev_live_bytes,
4437                                                      size_t* next_live_bytes) {
4438   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4439   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4440   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4441   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4442   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4443 }
4444 
4445 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4446   const char* type       = r->get_type_str();
4447   HeapWord* bottom       = r->bottom();
4448   HeapWord* end          = r->end();
4449   size_t capacity_bytes  = r->capacity();
4450   size_t used_bytes      = r->used();
4451   size_t prev_live_bytes = r->live_bytes();
4452   size_t next_live_bytes = r->next_live_bytes();
4453   double gc_eff          = r->gc_efficiency();
4454   size_t remset_bytes    = r->rem_set()->mem_size();
4455   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4456 
4457   if (r->is_starts_humongous()) {
4458     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4459            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4460            "they should have been zeroed after the last time we used them");
4461     // Set up the _hum_* fields.
4462     _hum_capacity_bytes  = capacity_bytes;
4463     _hum_used_bytes      = used_bytes;
4464     _hum_prev_live_bytes = prev_live_bytes;
4465     _hum_next_live_bytes = next_live_bytes;
4466     get_hum_bytes(&used_bytes, &capacity_bytes,
4467                   &prev_live_bytes, &next_live_bytes);
4468     end = bottom + HeapRegion::GrainWords;
4469   } else if (r->is_continues_humongous()) {
4470     get_hum_bytes(&used_bytes, &capacity_bytes,
4471                   &prev_live_bytes, &next_live_bytes);
4472     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4473   }
4474 
4475   _total_used_bytes      += used_bytes;
4476   _total_capacity_bytes  += capacity_bytes;
4477   _total_prev_live_bytes += prev_live_bytes;
4478   _total_next_live_bytes += next_live_bytes;
4479   _total_remset_bytes    += remset_bytes;
4480   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4481 
4482   // Print a line for this particular region.
4483   _out->print_cr(G1PPRL_LINE_PREFIX
4484                  G1PPRL_TYPE_FORMAT
4485                  G1PPRL_ADDR_BASE_FORMAT
4486                  G1PPRL_BYTE_FORMAT
4487                  G1PPRL_BYTE_FORMAT
4488                  G1PPRL_BYTE_FORMAT
4489                  G1PPRL_DOUBLE_FORMAT
4490                  G1PPRL_BYTE_FORMAT
4491                  G1PPRL_BYTE_FORMAT,
4492                  type, p2i(bottom), p2i(end),
4493                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4494                  remset_bytes, strong_code_roots_bytes);
4495 
4496   return false;
4497 }
4498 
4499 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4500   // add static memory usages to remembered set sizes
4501   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4502   // Print the footer of the output.
4503   _out->print_cr(G1PPRL_LINE_PREFIX);
4504   _out->print_cr(G1PPRL_LINE_PREFIX
4505                  " SUMMARY"
4506                  G1PPRL_SUM_MB_FORMAT("capacity")
4507                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4508                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4509                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4510                  G1PPRL_SUM_MB_FORMAT("remset")
4511                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4512                  bytes_to_mb(_total_capacity_bytes),
4513                  bytes_to_mb(_total_used_bytes),
4514                  perc(_total_used_bytes, _total_capacity_bytes),
4515                  bytes_to_mb(_total_prev_live_bytes),
4516                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4517                  bytes_to_mb(_total_next_live_bytes),
4518                  perc(_total_next_live_bytes, _total_capacity_bytes),
4519                  bytes_to_mb(_total_remset_bytes),
4520                  bytes_to_mb(_total_strong_code_roots_bytes));
4521   _out->cr();
4522 }