Old src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1ErgoVerbose.hpp"
  34 #include "gc/g1/g1Log.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1RemSet.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionManager.inline.hpp"
  40 #include "gc/g1/heapRegionRemSet.hpp"
  41 #include "gc/g1/heapRegionSet.inline.hpp"
  42 #include "gc/g1/suspendibleThreadSet.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 CMBitMapRO::CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   if (limit == NULL) {
  76     limit = _bmStartWord + _bmWordSize;
  77   }
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  88                                                  const HeapWord* limit) const {
  89   size_t addrOffset = heapWordToOffset(addr);
  90   if (limit == NULL) {
  91     limit = _bmStartWord + _bmWordSize;
  92   }
  93   size_t limitOffset = heapWordToOffset(limit);
  94   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  95   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  96   assert(nextAddr >= addr, "get_next_one postcondition");
  97   assert(nextAddr == limit || !isMarked(nextAddr),
  98          "get_next_one postcondition");
  99   return nextAddr;
 100 }
 101 
 102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 103   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 104   return (int) (diff >> _shifter);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 113          _bmWordSize  == heap_rs.word_size();
 114 }
 115 #endif
 116 
 117 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 118   _bm.print_on_error(st, prefix);
 119 }
 120 
 121 size_t CMBitMap::compute_size(size_t heap_size) {
 122   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 123 }
 124 
 125 size_t CMBitMap::mark_distance() {
 126   return MinObjAlignmentInBytes * BitsPerByte;
 127 }
 128 
 129 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 130   _bmStartWord = heap.start();
 131   _bmWordSize = heap.word_size();
 132 
 133   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 134   _bm.set_size(_bmWordSize >> _shifter);
 135 
 136   storage->set_mapping_changed_listener(&_listener);
 137 }
 138 
 139 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 140   if (zero_filled) {
 141     return;
 142   }
 143   // We need to clear the bitmap on commit, removing any existing information.
 144   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 145   _bm->clearRange(mr);
 146 }
 147 
 148 // Closure used for clearing the given mark bitmap.
 149 class ClearBitmapHRClosure : public HeapRegionClosure {
 150  private:
 151   ConcurrentMark* _cm;
 152   CMBitMap* _bitmap;
 153   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 154  public:
 155   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 156     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 157   }
 158 
 159   virtual bool doHeapRegion(HeapRegion* r) {
 160     size_t const chunk_size_in_words = M / HeapWordSize;
 161 
 162     HeapWord* cur = r->bottom();
 163     HeapWord* const end = r->end();
 164 
 165     while (cur < end) {
 166       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 167       _bitmap->clearRange(mr);
 168 
 169       cur += chunk_size_in_words;
 170 
 171       // Abort iteration if after yielding the marking has been aborted.
 172       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 173         return true;
 174       }
 175       // Repeat the asserts from before the start of the closure. We will do them
 176       // as asserts here to minimize their overhead on the product. However, we
 177       // will have them as guarantees at the beginning / end of the bitmap
 178       // clearing to get some checking in the product.
 179       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 180       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 181     }
 182 
 183     return false;
 184   }
 185 };
 186 
 187 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 188   ClearBitmapHRClosure* _cl;
 189   HeapRegionClaimer     _hrclaimer;
 190   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 191 
 192 public:
 193   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 194       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 195 
 196   void work(uint worker_id) {
 197     SuspendibleThreadSetJoiner sts_join(_suspendible);
 198     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 199   }
 200 };
 201 
 202 void CMBitMap::clearAll() {
 203   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 204   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 205   uint n_workers = g1h->workers()->active_workers();
 206   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 207   g1h->workers()->run_task(&task);
 208   guarantee(cl.complete(), "Must have completed iteration.");
 209   return;
 210 }
 211 
 212 void CMBitMap::markRange(MemRegion mr) {
 213   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 214   assert(!mr.is_empty(), "unexpected empty region");
 215   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 216           ((HeapWord *) mr.end())),
 217          "markRange memory region end is not card aligned");
 218   // convert address range into offset range
 219   _bm.at_put_range(heapWordToOffset(mr.start()),
 220                    heapWordToOffset(mr.end()), true);
 221 }
 222 
 223 void CMBitMap::clearRange(MemRegion mr) {
 224   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 225   assert(!mr.is_empty(), "unexpected empty region");
 226   // convert address range into offset range
 227   _bm.at_put_range(heapWordToOffset(mr.start()),
 228                    heapWordToOffset(mr.end()), false);
 229 }
 230 
 231 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 232                                             HeapWord* end_addr) {
 233   HeapWord* start = getNextMarkedWordAddress(addr);
 234   start = MIN2(start, end_addr);
 235   HeapWord* end   = getNextUnmarkedWordAddress(start);
 236   end = MIN2(end, end_addr);
 237   assert(start <= end, "Consistency check");
 238   MemRegion mr(start, end);
 239   if (!mr.is_empty()) {
 240     clearRange(mr);
 241   }
 242   return mr;
 243 }
 244 
 245 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 246   _base(NULL), _cm(cm)
 247 #ifdef ASSERT
 248   , _drain_in_progress(false)
 249   , _drain_in_progress_yields(false)
 250 #endif
 251 {}
 252 
 253 bool CMMarkStack::allocate(size_t capacity) {
 254   // allocate a stack of the requisite depth
 255   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 256   if (!rs.is_reserved()) {
 257     warning("ConcurrentMark MarkStack allocation failure");
 258     return false;
 259   }
 260   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 261   if (!_virtual_space.initialize(rs, rs.size())) {
 262     warning("ConcurrentMark MarkStack backing store failure");
 263     // Release the virtual memory reserved for the marking stack
 264     rs.release();
 265     return false;
 266   }
 267   assert(_virtual_space.committed_size() == rs.size(),
 268          "Didn't reserve backing store for all of ConcurrentMark stack?");
 269   _base = (oop*) _virtual_space.low();
 270   setEmpty();
 271   _capacity = (jint) capacity;
 272   _saved_index = -1;
 273   _should_expand = false;
 274   return true;
 275 }
 276 
 277 void CMMarkStack::expand() {
 278   // Called, during remark, if we've overflown the marking stack during marking.
 279   assert(isEmpty(), "stack should been emptied while handling overflow");
 280   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 281   // Clear expansion flag
 282   _should_expand = false;
 283   if (_capacity == (jint) MarkStackSizeMax) {
 284     if (PrintGCDetails && Verbose) {
 285       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 286     }
 287     return;
 288   }
 289   // Double capacity if possible
 290   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 291   // Do not give up existing stack until we have managed to
 292   // get the double capacity that we desired.
 293   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 294                                                            sizeof(oop)));
 295   if (rs.is_reserved()) {
 296     // Release the backing store associated with old stack
 297     _virtual_space.release();
 298     // Reinitialize virtual space for new stack
 299     if (!_virtual_space.initialize(rs, rs.size())) {
 300       fatal("Not enough swap for expanded marking stack capacity");
 301     }
 302     _base = (oop*)(_virtual_space.low());
 303     _index = 0;
 304     _capacity = new_capacity;
 305   } else {
 306     if (PrintGCDetails && Verbose) {
 307       // Failed to double capacity, continue;
 308       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 309                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 310                           _capacity / K, new_capacity / K);
 311     }
 312   }
 313 }
 314 
 315 void CMMarkStack::set_should_expand() {
 316   // If we're resetting the marking state because of an
 317   // marking stack overflow, record that we should, if
 318   // possible, expand the stack.
 319   _should_expand = _cm->has_overflown();
 320 }
 321 
 322 CMMarkStack::~CMMarkStack() {
 323   if (_base != NULL) {
 324     _base = NULL;
 325     _virtual_space.release();
 326   }
 327 }
 328 
 329 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 330   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 331   jint start = _index;
 332   jint next_index = start + n;
 333   if (next_index > _capacity) {
 334     _overflow = true;
 335     return;
 336   }
 337   // Otherwise.
 338   _index = next_index;
 339   for (int i = 0; i < n; i++) {
 340     int ind = start + i;
 341     assert(ind < _capacity, "By overflow test above.");
 342     _base[ind] = ptr_arr[i];
 343   }
 344 }
 345 
 346 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 347   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 348   jint index = _index;
 349   if (index == 0) {
 350     *n = 0;
 351     return false;
 352   } else {
 353     int k = MIN2(max, index);
 354     jint  new_ind = index - k;
 355     for (int j = 0; j < k; j++) {
 356       ptr_arr[j] = _base[new_ind + j];
 357     }
 358     _index = new_ind;
 359     *n = k;
 360     return true;
 361   }
 362 }
 363 
 364 template<class OopClosureClass>
 365 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 366   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 367          || SafepointSynchronize::is_at_safepoint(),
 368          "Drain recursion must be yield-safe.");
 369   bool res = true;
 370   debug_only(_drain_in_progress = true);
 371   debug_only(_drain_in_progress_yields = yield_after);
 372   while (!isEmpty()) {
 373     oop newOop = pop();
 374     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 375     assert(newOop->is_oop(), "Expected an oop");
 376     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 377            "only grey objects on this stack");
 378     newOop->oop_iterate(cl);
 379     if (yield_after && _cm->do_yield_check()) {
 380       res = false;
 381       break;
 382     }
 383   }
 384   debug_only(_drain_in_progress = false);
 385   return res;
 386 }
 387 
 388 void CMMarkStack::note_start_of_gc() {
 389   assert(_saved_index == -1,
 390          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 391   _saved_index = _index;
 392 }
 393 
 394 void CMMarkStack::note_end_of_gc() {
 395   // This is intentionally a guarantee, instead of an assert. If we
 396   // accidentally add something to the mark stack during GC, it
 397   // will be a correctness issue so it's better if we crash. we'll
 398   // only check this once per GC anyway, so it won't be a performance
 399   // issue in any way.
 400   guarantee(_saved_index == _index,
 401             err_msg("saved index: %d index: %d", _saved_index, _index));
 402   _saved_index = -1;
 403 }
 404 
 405 void CMMarkStack::oops_do(OopClosure* f) {
 406   assert(_saved_index == _index,
 407          err_msg("saved index: %d index: %d", _saved_index, _index));
 408   for (int i = 0; i < _index; i += 1) {
 409     f->do_oop(&_base[i]);
 410   }
 411 }
 412 
 413 CMRootRegions::CMRootRegions() :
 414   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 415   _should_abort(false),  _next_survivor(NULL) { }
 416 
 417 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 418   _young_list = g1h->young_list();
 419   _cm = cm;
 420 }
 421 
 422 void CMRootRegions::prepare_for_scan() {
 423   assert(!scan_in_progress(), "pre-condition");
 424 
 425   // Currently, only survivors can be root regions.
 426   assert(_next_survivor == NULL, "pre-condition");
 427   _next_survivor = _young_list->first_survivor_region();
 428   _scan_in_progress = (_next_survivor != NULL);
 429   _should_abort = false;
 430 }
 431 
 432 HeapRegion* CMRootRegions::claim_next() {
 433   if (_should_abort) {
 434     // If someone has set the should_abort flag, we return NULL to
 435     // force the caller to bail out of their loop.
 436     return NULL;
 437   }
 438 
 439   // Currently, only survivors can be root regions.
 440   HeapRegion* res = _next_survivor;
 441   if (res != NULL) {
 442     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 443     // Read it again in case it changed while we were waiting for the lock.
 444     res = _next_survivor;
 445     if (res != NULL) {
 446       if (res == _young_list->last_survivor_region()) {
 447         // We just claimed the last survivor so store NULL to indicate
 448         // that we're done.
 449         _next_survivor = NULL;
 450       } else {
 451         _next_survivor = res->get_next_young_region();
 452       }
 453     } else {
 454       // Someone else claimed the last survivor while we were trying
 455       // to take the lock so nothing else to do.
 456     }
 457   }
 458   assert(res == NULL || res->is_survivor(), "post-condition");
 459 
 460   return res;
 461 }
 462 
 463 void CMRootRegions::scan_finished() {
 464   assert(scan_in_progress(), "pre-condition");
 465 
 466   // Currently, only survivors can be root regions.
 467   if (!_should_abort) {
 468     assert(_next_survivor == NULL, "we should have claimed all survivors");
 469   }
 470   _next_survivor = NULL;
 471 
 472   {
 473     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 474     _scan_in_progress = false;
 475     RootRegionScan_lock->notify_all();
 476   }
 477 }
 478 
 479 bool CMRootRegions::wait_until_scan_finished() {
 480   if (!scan_in_progress()) return false;
 481 
 482   {
 483     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 484     while (scan_in_progress()) {
 485       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 486     }
 487   }
 488   return true;
 489 }
 490 
 491 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 492 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 493 #endif // _MSC_VER
 494 
 495 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 496   return MAX2((n_par_threads + 2) / 4, 1U);
 497 }
 498 
 499 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 500   _g1h(g1h),
 501   _markBitMap1(),
 502   _markBitMap2(),
 503   _parallel_marking_threads(0),
 504   _max_parallel_marking_threads(0),
 505   _sleep_factor(0.0),
 506   _marking_task_overhead(1.0),
 507   _cleanup_sleep_factor(0.0),
 508   _cleanup_task_overhead(1.0),
 509   _cleanup_list("Cleanup List"),
 510   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 511   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 512             CardTableModRefBS::card_shift,
 513             false /* in_resource_area*/),
 514 
 515   _prevMarkBitMap(&_markBitMap1),
 516   _nextMarkBitMap(&_markBitMap2),
 517 
 518   _markStack(this),
 519   // _finger set in set_non_marking_state
 520 
 521   _max_worker_id((uint)ParallelGCThreads),
 522   // _active_tasks set in set_non_marking_state
 523   // _tasks set inside the constructor
 524   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 525   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 526 
 527   _has_overflown(false),
 528   _concurrent(false),
 529   _has_aborted(false),
 530   _aborted_gc_id(GCId::undefined()),
 531   _restart_for_overflow(false),
 532   _concurrent_marking_in_progress(false),
 533 
 534   // _verbose_level set below
 535 
 536   _init_times(),
 537   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 538   _cleanup_times(),
 539   _total_counting_time(0.0),
 540   _total_rs_scrub_time(0.0),
 541 
 542   _parallel_workers(NULL),
 543 
 544   _count_card_bitmaps(NULL),
 545   _count_marked_bytes(NULL),
 546   _completed_initialization(false) {
 547   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 548   if (verbose_level < no_verbose) {
 549     verbose_level = no_verbose;
 550   }
 551   if (verbose_level > high_verbose) {
 552     verbose_level = high_verbose;
 553   }
 554   _verbose_level = verbose_level;
 555 
 556   if (verbose_low()) {
 557     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 558                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 559   }
 560 
 561   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 562   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 563 
 564   // Create & start a ConcurrentMark thread.
 565   _cmThread = new ConcurrentMarkThread(this);
 566   assert(cmThread() != NULL, "CM Thread should have been created");
 567   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 568   if (_cmThread->osthread() == NULL) {
 569       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 570   }
 571 
 572   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 573   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 574   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 575 
 576   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 577   satb_qs.set_buffer_size(G1SATBBufferSize);
 578 
 579   _root_regions.init(_g1h, this);
 580 
 581   if (ConcGCThreads > ParallelGCThreads) {
 582     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 583             "than ParallelGCThreads (" UINTX_FORMAT ").",
 584             ConcGCThreads, ParallelGCThreads);
 585     return;
 586   }
 587   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 588     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 589     // if both are set
 590     _sleep_factor             = 0.0;
 591     _marking_task_overhead    = 1.0;
 592   } else if (G1MarkingOverheadPercent > 0) {
 593     // We will calculate the number of parallel marking threads based
 594     // on a target overhead with respect to the soft real-time goal
 595     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 596     double overall_cm_overhead =
 597       (double) MaxGCPauseMillis * marking_overhead /
 598       (double) GCPauseIntervalMillis;
 599     double cpu_ratio = 1.0 / (double) os::processor_count();
 600     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 601     double marking_task_overhead =
 602       overall_cm_overhead / marking_thread_num *
 603                                               (double) os::processor_count();
 604     double sleep_factor =
 605                        (1.0 - marking_task_overhead) / marking_task_overhead;
 606 
 607     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 608     _sleep_factor             = sleep_factor;
 609     _marking_task_overhead    = marking_task_overhead;
 610   } else {
 611     // Calculate the number of parallel marking threads by scaling
 612     // the number of parallel GC threads.
 613     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 614     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 615     _sleep_factor             = 0.0;
 616     _marking_task_overhead    = 1.0;
 617   }
 618 
 619   assert(ConcGCThreads > 0, "Should have been set");
 620   _parallel_marking_threads = (uint) ConcGCThreads;
 621   _max_parallel_marking_threads = _parallel_marking_threads;
 622 
 623   if (parallel_marking_threads() > 1) {
 624     _cleanup_task_overhead = 1.0;
 625   } else {
 626     _cleanup_task_overhead = marking_task_overhead();
 627   }
 628   _cleanup_sleep_factor =
 629                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 630 
 631 #if 0
 632   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 633   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 634   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 635   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 636   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 637 #endif
 638 
 639   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 640        _max_parallel_marking_threads, false, true);
 641   if (_parallel_workers == NULL) {
 642     vm_exit_during_initialization("Failed necessary allocation.");
 643   } else {
 644     _parallel_workers->initialize_workers();
 645   }
 646 
 647   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 648     size_t mark_stack_size =
 649       MIN2(MarkStackSizeMax,
 650           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 651     // Verify that the calculated value for MarkStackSize is in range.
 652     // It would be nice to use the private utility routine from Arguments.
 653     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 654       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 655               "must be between 1 and " SIZE_FORMAT,
 656               mark_stack_size, MarkStackSizeMax);
 657       return;
 658     }
 659     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 660   } else {
 661     // Verify MarkStackSize is in range.
 662     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 663       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 664         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 665           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 666                   "must be between 1 and " SIZE_FORMAT,
 667                   MarkStackSize, MarkStackSizeMax);
 668           return;
 669         }
 670       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 671         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 672           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 673                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 674                   MarkStackSize, MarkStackSizeMax);
 675           return;
 676         }
 677       }
 678     }
 679   }
 680 
 681   if (!_markStack.allocate(MarkStackSize)) {
 682     warning("Failed to allocate CM marking stack");
 683     return;
 684   }
 685 
 686   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 687   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 688 
 689   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 690   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 691 
 692   BitMap::idx_t card_bm_size = _card_bm.size();
 693 
 694   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 695   _active_tasks = _max_worker_id;
 696 
 697   uint max_regions = _g1h->max_regions();
 698   for (uint i = 0; i < _max_worker_id; ++i) {
 699     CMTaskQueue* task_queue = new CMTaskQueue();
 700     task_queue->initialize();
 701     _task_queues->register_queue(i, task_queue);
 702 
 703     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 704     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 705 
 706     _tasks[i] = new CMTask(i, this,
 707                            _count_marked_bytes[i],
 708                            &_count_card_bitmaps[i],
 709                            task_queue, _task_queues);
 710 
 711     _accum_task_vtime[i] = 0.0;
 712   }
 713 
 714   // Calculate the card number for the bottom of the heap. Used
 715   // in biasing indexes into the accounting card bitmaps.
 716   _heap_bottom_card_num =
 717     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 718                                 CardTableModRefBS::card_shift);
 719 
 720   // Clear all the liveness counting data
 721   clear_all_count_data();
 722 
 723   // so that the call below can read a sensible value
 724   _heap_start = g1h->reserved_region().start();
 725   set_non_marking_state();
 726   _completed_initialization = true;
 727 }
 728 
 729 void ConcurrentMark::reset() {
 730   // Starting values for these two. This should be called in a STW
 731   // phase.
 732   MemRegion reserved = _g1h->g1_reserved();
 733   _heap_start = reserved.start();
 734   _heap_end   = reserved.end();
 735 
 736   // Separated the asserts so that we know which one fires.
 737   assert(_heap_start != NULL, "heap bounds should look ok");
 738   assert(_heap_end != NULL, "heap bounds should look ok");
 739   assert(_heap_start < _heap_end, "heap bounds should look ok");
 740 
 741   // Reset all the marking data structures and any necessary flags
 742   reset_marking_state();
 743 
 744   if (verbose_low()) {
 745     gclog_or_tty->print_cr("[global] resetting");
 746   }
 747 
 748   // We do reset all of them, since different phases will use
 749   // different number of active threads. So, it's easiest to have all
 750   // of them ready.
 751   for (uint i = 0; i < _max_worker_id; ++i) {
 752     _tasks[i]->reset(_nextMarkBitMap);
 753   }
 754 
 755   // we need this to make sure that the flag is on during the evac
 756   // pause with initial mark piggy-backed
 757   set_concurrent_marking_in_progress();
 758 }
 759 
 760 
 761 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 762   _markStack.set_should_expand();
 763   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 764   if (clear_overflow) {
 765     clear_has_overflown();
 766   } else {
 767     assert(has_overflown(), "pre-condition");
 768   }
 769   _finger = _heap_start;
 770 
 771   for (uint i = 0; i < _max_worker_id; ++i) {
 772     CMTaskQueue* queue = _task_queues->queue(i);
 773     queue->set_empty();
 774   }
 775 }
 776 
 777 void ConcurrentMark::set_concurrency(uint active_tasks) {
 778   assert(active_tasks <= _max_worker_id, "we should not have more");
 779 
 780   _active_tasks = active_tasks;
 781   // Need to update the three data structures below according to the
 782   // number of active threads for this phase.
 783   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 784   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 785   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 786 }
 787 
 788 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 789   set_concurrency(active_tasks);
 790 
 791   _concurrent = concurrent;
 792   // We propagate this to all tasks, not just the active ones.
 793   for (uint i = 0; i < _max_worker_id; ++i)
 794     _tasks[i]->set_concurrent(concurrent);
 795 
 796   if (concurrent) {
 797     set_concurrent_marking_in_progress();
 798   } else {
 799     // We currently assume that the concurrent flag has been set to
 800     // false before we start remark. At this point we should also be
 801     // in a STW phase.
 802     assert(!concurrent_marking_in_progress(), "invariant");
 803     assert(out_of_regions(),
 804            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 805                    p2i(_finger), p2i(_heap_end)));
 806   }
 807 }
 808 
 809 void ConcurrentMark::set_non_marking_state() {
 810   // We set the global marking state to some default values when we're
 811   // not doing marking.
 812   reset_marking_state();
 813   _active_tasks = 0;
 814   clear_concurrent_marking_in_progress();
 815 }
 816 
 817 ConcurrentMark::~ConcurrentMark() {
 818   // The ConcurrentMark instance is never freed.
 819   ShouldNotReachHere();
 820 }
 821 
 822 void ConcurrentMark::clearNextBitmap() {
 823   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 824 
 825   // Make sure that the concurrent mark thread looks to still be in
 826   // the current cycle.
 827   guarantee(cmThread()->during_cycle(), "invariant");
 828 
 829   // We are finishing up the current cycle by clearing the next
 830   // marking bitmap and getting it ready for the next cycle. During
 831   // this time no other cycle can start. So, let's make sure that this
 832   // is the case.
 833   guarantee(!g1h->mark_in_progress(), "invariant");
 834 
 835   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 836   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 837   _parallel_workers->run_task(&task);
 838 
 839   // Clear the liveness counting data. If the marking has been aborted, the abort()
 840   // call already did that.
 841   if (cl.complete()) {
 842     clear_all_count_data();
 843   }
 844 
 845   // Repeat the asserts from above.
 846   guarantee(cmThread()->during_cycle(), "invariant");
 847   guarantee(!g1h->mark_in_progress(), "invariant");
 848 }
 849 
 850 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 851   CMBitMap* _bitmap;
 852   bool _error;
 853  public:
 854   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 855   }
 856 
 857   virtual bool doHeapRegion(HeapRegion* r) {
 858     // This closure can be called concurrently to the mutator, so we must make sure
 859     // that the result of the getNextMarkedWordAddress() call is compared to the
 860     // value passed to it as limit to detect any found bits.
 861     // We can use the region's orig_end() for the limit and the comparison value
 862     // as it always contains the "real" end of the region that never changes and
 863     // has no side effects.
 864     // Due to the latter, there can also be no problem with the compiler generating
 865     // reloads of the orig_end() call.
 866     HeapWord* end = r->orig_end();
 867     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 868   }
 869 };
 870 
 871 bool ConcurrentMark::nextMarkBitmapIsClear() {
 872   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 873   _g1h->heap_region_iterate(&cl);
 874   return cl.complete();
 875 }
 876 
 877 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 878 public:
 879   bool doHeapRegion(HeapRegion* r) {
 880     if (!r->is_continues_humongous()) {
 881       r->note_start_of_marking();
 882     }
 883     return false;
 884   }
 885 };
 886 
 887 void ConcurrentMark::checkpointRootsInitialPre() {
 888   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 889   G1CollectorPolicy* g1p = g1h->g1_policy();
 890 
 891   _has_aborted = false;
 892 
 893   // Initialize marking structures. This has to be done in a STW phase.
 894   reset();
 895 
 896   // For each region note start of marking.
 897   NoteStartOfMarkHRClosure startcl;
 898   g1h->heap_region_iterate(&startcl);
 899 }
 900 
 901 
 902 void ConcurrentMark::checkpointRootsInitialPost() {
 903   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 904 
 905   // If we force an overflow during remark, the remark operation will
 906   // actually abort and we'll restart concurrent marking. If we always
 907   // force an overflow during remark we'll never actually complete the
 908   // marking phase. So, we initialize this here, at the start of the
 909   // cycle, so that at the remaining overflow number will decrease at
 910   // every remark and we'll eventually not need to cause one.
 911   force_overflow_stw()->init();
 912 
 913   // Start Concurrent Marking weak-reference discovery.
 914   ReferenceProcessor* rp = g1h->ref_processor_cm();
 915   // enable ("weak") refs discovery
 916   rp->enable_discovery();
 917   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 918 
 919   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 920   // This is the start of  the marking cycle, we're expected all
 921   // threads to have SATB queues with active set to false.
 922   satb_mq_set.set_active_all_threads(true, /* new active value */
 923                                      false /* expected_active */);
 924 
 925   _root_regions.prepare_for_scan();
 926 
 927   // update_g1_committed() will be called at the end of an evac pause
 928   // when marking is on. So, it's also called at the end of the
 929   // initial-mark pause to update the heap end, if the heap expands
 930   // during it. No need to call it here.
 931 }
 932 
 933 /*
 934  * Notice that in the next two methods, we actually leave the STS
 935  * during the barrier sync and join it immediately afterwards. If we
 936  * do not do this, the following deadlock can occur: one thread could
 937  * be in the barrier sync code, waiting for the other thread to also
 938  * sync up, whereas another one could be trying to yield, while also
 939  * waiting for the other threads to sync up too.
 940  *
 941  * Note, however, that this code is also used during remark and in
 942  * this case we should not attempt to leave / enter the STS, otherwise
 943  * we'll either hit an assert (debug / fastdebug) or deadlock
 944  * (product). So we should only leave / enter the STS if we are
 945  * operating concurrently.
 946  *
 947  * Because the thread that does the sync barrier has left the STS, it
 948  * is possible to be suspended for a Full GC or an evacuation pause
 949  * could occur. This is actually safe, since the entering the sync
 950  * barrier is one of the last things do_marking_step() does, and it
 951  * doesn't manipulate any data structures afterwards.
 952  */
 953 
 954 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 955   bool barrier_aborted;
 956 
 957   if (verbose_low()) {
 958     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 959   }
 960 
 961   {
 962     SuspendibleThreadSetLeaver sts_leave(concurrent());
 963     barrier_aborted = !_first_overflow_barrier_sync.enter();
 964   }
 965 
 966   // at this point everyone should have synced up and not be doing any
 967   // more work
 968 
 969   if (verbose_low()) {
 970     if (barrier_aborted) {
 971       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 972     } else {
 973       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 974     }
 975   }
 976 
 977   if (barrier_aborted) {
 978     // If the barrier aborted we ignore the overflow condition and
 979     // just abort the whole marking phase as quickly as possible.
 980     return;
 981   }
 982 
 983   // If we're executing the concurrent phase of marking, reset the marking
 984   // state; otherwise the marking state is reset after reference processing,
 985   // during the remark pause.
 986   // If we reset here as a result of an overflow during the remark we will
 987   // see assertion failures from any subsequent set_concurrency_and_phase()
 988   // calls.
 989   if (concurrent()) {
 990     // let the task associated with with worker 0 do this
 991     if (worker_id == 0) {
 992       // task 0 is responsible for clearing the global data structures
 993       // We should be here because of an overflow. During STW we should
 994       // not clear the overflow flag since we rely on it being true when
 995       // we exit this method to abort the pause and restart concurrent
 996       // marking.
 997       reset_marking_state(true /* clear_overflow */);
 998       force_overflow()->update();
 999 
1000       if (G1Log::fine()) {
1001         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1002         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1003       }
1004     }
1005   }
1006 
1007   // after this, each task should reset its own data structures then
1008   // then go into the second barrier
1009 }
1010 
1011 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1012   bool barrier_aborted;
1013 
1014   if (verbose_low()) {
1015     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1016   }
1017 
1018   {
1019     SuspendibleThreadSetLeaver sts_leave(concurrent());
1020     barrier_aborted = !_second_overflow_barrier_sync.enter();
1021   }
1022 
1023   // at this point everything should be re-initialized and ready to go
1024 
1025   if (verbose_low()) {
1026     if (barrier_aborted) {
1027       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1028     } else {
1029       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1030     }
1031   }
1032 }
1033 
1034 #ifndef PRODUCT
1035 void ForceOverflowSettings::init() {
1036   _num_remaining = G1ConcMarkForceOverflow;
1037   _force = false;
1038   update();
1039 }
1040 
1041 void ForceOverflowSettings::update() {
1042   if (_num_remaining > 0) {
1043     _num_remaining -= 1;
1044     _force = true;
1045   } else {
1046     _force = false;
1047   }
1048 }
1049 
1050 bool ForceOverflowSettings::should_force() {
1051   if (_force) {
1052     _force = false;
1053     return true;
1054   } else {
1055     return false;
1056   }
1057 }
1058 #endif // !PRODUCT
1059 
1060 class CMConcurrentMarkingTask: public AbstractGangTask {
1061 private:
1062   ConcurrentMark*       _cm;
1063   ConcurrentMarkThread* _cmt;
1064 
1065 public:
1066   void work(uint worker_id) {
1067     assert(Thread::current()->is_ConcurrentGC_thread(),
1068            "this should only be done by a conc GC thread");
1069     ResourceMark rm;
1070 
1071     double start_vtime = os::elapsedVTime();
1072 
1073     {
1074       SuspendibleThreadSetJoiner sts_join;
1075 
1076       assert(worker_id < _cm->active_tasks(), "invariant");
1077       CMTask* the_task = _cm->task(worker_id);
1078       the_task->record_start_time();
1079       if (!_cm->has_aborted()) {
1080         do {
1081           double start_vtime_sec = os::elapsedVTime();
1082           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1083 
1084           the_task->do_marking_step(mark_step_duration_ms,
1085                                     true  /* do_termination */,
1086                                     false /* is_serial*/);
1087 
1088           double end_vtime_sec = os::elapsedVTime();
1089           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1090           _cm->clear_has_overflown();
1091 
1092           _cm->do_yield_check(worker_id);
1093 
1094           jlong sleep_time_ms;
1095           if (!_cm->has_aborted() && the_task->has_aborted()) {
1096             sleep_time_ms =
1097               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1098             {
1099               SuspendibleThreadSetLeaver sts_leave;
1100               os::sleep(Thread::current(), sleep_time_ms, false);
1101             }
1102           }
1103         } while (!_cm->has_aborted() && the_task->has_aborted());
1104       }
1105       the_task->record_end_time();
1106       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1107     }
1108 
1109     double end_vtime = os::elapsedVTime();
1110     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1111   }
1112 
1113   CMConcurrentMarkingTask(ConcurrentMark* cm,
1114                           ConcurrentMarkThread* cmt) :
1115       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1116 
1117   ~CMConcurrentMarkingTask() { }
1118 };
1119 
1120 // Calculates the number of active workers for a concurrent
1121 // phase.
1122 uint ConcurrentMark::calc_parallel_marking_threads() {
1123   uint n_conc_workers = 0;
1124   if (!UseDynamicNumberOfGCThreads ||
1125       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1126        !ForceDynamicNumberOfGCThreads)) {
1127     n_conc_workers = max_parallel_marking_threads();
1128   } else {
1129     n_conc_workers =
1130       AdaptiveSizePolicy::calc_default_active_workers(
1131                                    max_parallel_marking_threads(),
1132                                    1, /* Minimum workers */
1133                                    parallel_marking_threads(),
1134                                    Threads::number_of_non_daemon_threads());
1135     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1136     // that scaling has already gone into "_max_parallel_marking_threads".
1137   }
1138   assert(n_conc_workers > 0, "Always need at least 1");
1139   return n_conc_workers;
1140 }
1141 
1142 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1143   // Currently, only survivors can be root regions.
1144   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1145   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1146 
1147   const uintx interval = PrefetchScanIntervalInBytes;
1148   HeapWord* curr = hr->bottom();
1149   const HeapWord* end = hr->top();
1150   while (curr < end) {
1151     Prefetch::read(curr, interval);
1152     oop obj = oop(curr);
1153     int size = obj->oop_iterate(&cl);
1154     assert(size == obj->size(), "sanity");
1155     curr += size;
1156   }
1157 }
1158 
1159 class CMRootRegionScanTask : public AbstractGangTask {
1160 private:
1161   ConcurrentMark* _cm;
1162 
1163 public:
1164   CMRootRegionScanTask(ConcurrentMark* cm) :
1165     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1166 
1167   void work(uint worker_id) {
1168     assert(Thread::current()->is_ConcurrentGC_thread(),
1169            "this should only be done by a conc GC thread");
1170 
1171     CMRootRegions* root_regions = _cm->root_regions();
1172     HeapRegion* hr = root_regions->claim_next();
1173     while (hr != NULL) {
1174       _cm->scanRootRegion(hr, worker_id);
1175       hr = root_regions->claim_next();
1176     }
1177   }
1178 };
1179 
1180 void ConcurrentMark::scanRootRegions() {
1181   // Start of concurrent marking.
1182   ClassLoaderDataGraph::clear_claimed_marks();
1183 
1184   // scan_in_progress() will have been set to true only if there was
1185   // at least one root region to scan. So, if it's false, we
1186   // should not attempt to do any further work.
1187   if (root_regions()->scan_in_progress()) {
1188     _parallel_marking_threads = calc_parallel_marking_threads();
1189     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1190            "Maximum number of marking threads exceeded");
1191     uint active_workers = MAX2(1U, parallel_marking_threads());
1192 
1193     CMRootRegionScanTask task(this);
1194     _parallel_workers->set_active_workers(active_workers);
1195     _parallel_workers->run_task(&task);
1196 
1197     // It's possible that has_aborted() is true here without actually
1198     // aborting the survivor scan earlier. This is OK as it's
1199     // mainly used for sanity checking.
1200     root_regions()->scan_finished();
1201   }
1202 }
1203 
1204 void ConcurrentMark::markFromRoots() {
1205   // we might be tempted to assert that:
1206   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1207   //        "inconsistent argument?");
1208   // However that wouldn't be right, because it's possible that
1209   // a safepoint is indeed in progress as a younger generation
1210   // stop-the-world GC happens even as we mark in this generation.
1211 
1212   _restart_for_overflow = false;
1213   force_overflow_conc()->init();
1214 
1215   // _g1h has _n_par_threads
1216   _parallel_marking_threads = calc_parallel_marking_threads();
1217   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1218     "Maximum number of marking threads exceeded");
1219 
1220   uint active_workers = MAX2(1U, parallel_marking_threads());
1221   assert(active_workers > 0, "Should have been set");
1222 
1223   // Parallel task terminator is set in "set_concurrency_and_phase()"
1224   set_concurrency_and_phase(active_workers, true /* concurrent */);
1225 
1226   CMConcurrentMarkingTask markingTask(this, cmThread());
1227   _parallel_workers->set_active_workers(active_workers);
1228   _parallel_workers->run_task(&markingTask);
1229   print_stats();
1230 }
1231 
1232 // Helper class to get rid of some boilerplate code.
1233 class G1CMTraceTime : public GCTraceTime {
1234   static bool doit_and_prepend(bool doit) {
1235     if (doit) {
1236       gclog_or_tty->put(' ');
1237     }
1238     return doit;
1239   }
1240 
1241  public:
1242   G1CMTraceTime(const char* title, bool doit)
1243     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1244         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1245   }
1246 };
1247 
1248 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1249   // world is stopped at this checkpoint
1250   assert(SafepointSynchronize::is_at_safepoint(),
1251          "world should be stopped");
1252 
1253   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1254 
1255   // If a full collection has happened, we shouldn't do this.
1256   if (has_aborted()) {
1257     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1258     return;
1259   }
1260 
1261   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1262 
1263   if (VerifyDuringGC) {
1264     HandleMark hm;  // handle scope
1265     g1h->prepare_for_verify();
1266     Universe::verify(VerifyOption_G1UsePrevMarking,
1267                      " VerifyDuringGC:(before)");
1268   }
1269   g1h->check_bitmaps("Remark Start");
1270 
1271   G1CollectorPolicy* g1p = g1h->g1_policy();
1272   g1p->record_concurrent_mark_remark_start();
1273 
1274   double start = os::elapsedTime();
1275 
1276   checkpointRootsFinalWork();
1277 
1278   double mark_work_end = os::elapsedTime();
1279 
1280   weakRefsWork(clear_all_soft_refs);
1281 
1282   if (has_overflown()) {
1283     // Oops.  We overflowed.  Restart concurrent marking.
1284     _restart_for_overflow = true;
1285     if (G1TraceMarkStackOverflow) {
1286       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1287     }
1288 
1289     // Verify the heap w.r.t. the previous marking bitmap.
1290     if (VerifyDuringGC) {
1291       HandleMark hm;  // handle scope
1292       g1h->prepare_for_verify();
1293       Universe::verify(VerifyOption_G1UsePrevMarking,
1294                        " VerifyDuringGC:(overflow)");
1295     }
1296 
1297     // Clear the marking state because we will be restarting
1298     // marking due to overflowing the global mark stack.
1299     reset_marking_state();
1300   } else {
1301     {
1302       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1303 
1304       // Aggregate the per-task counting data that we have accumulated
1305       // while marking.
1306       aggregate_count_data();
1307     }
1308 
1309     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1310     // We're done with marking.
1311     // This is the end of  the marking cycle, we're expected all
1312     // threads to have SATB queues with active set to true.
1313     satb_mq_set.set_active_all_threads(false, /* new active value */
1314                                        true /* expected_active */);
1315 
1316     if (VerifyDuringGC) {
1317       HandleMark hm;  // handle scope
1318       g1h->prepare_for_verify();
1319       Universe::verify(VerifyOption_G1UseNextMarking,
1320                        " VerifyDuringGC:(after)");
1321     }
1322     g1h->check_bitmaps("Remark End");
1323     assert(!restart_for_overflow(), "sanity");
1324     // Completely reset the marking state since marking completed
1325     set_non_marking_state();
1326   }
1327 
1328   // Expand the marking stack, if we have to and if we can.
1329   if (_markStack.should_expand()) {
1330     _markStack.expand();
1331   }
1332 
1333   // Statistics
1334   double now = os::elapsedTime();
1335   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1336   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1337   _remark_times.add((now - start) * 1000.0);
1338 
1339   g1p->record_concurrent_mark_remark_end();
1340 
1341   G1CMIsAliveClosure is_alive(g1h);
1342   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1343 }
1344 
1345 // Base class of the closures that finalize and verify the
1346 // liveness counting data.
1347 class CMCountDataClosureBase: public HeapRegionClosure {
1348 protected:
1349   G1CollectedHeap* _g1h;
1350   ConcurrentMark* _cm;
1351   CardTableModRefBS* _ct_bs;
1352 
1353   BitMap* _region_bm;
1354   BitMap* _card_bm;
1355 
1356   // Takes a region that's not empty (i.e., it has at least one
1357   // live object in it and sets its corresponding bit on the region
1358   // bitmap to 1. If the region is "starts humongous" it will also set
1359   // to 1 the bits on the region bitmap that correspond to its
1360   // associated "continues humongous" regions.
1361   void set_bit_for_region(HeapRegion* hr) {
1362     assert(!hr->is_continues_humongous(), "should have filtered those out");
1363 
1364     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1365     if (!hr->is_starts_humongous()) {
1366       // Normal (non-humongous) case: just set the bit.
1367       _region_bm->par_at_put(index, true);
1368     } else {
1369       // Starts humongous case: calculate how many regions are part of
1370       // this humongous region and then set the bit range.
1371       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1372       _region_bm->par_at_put_range(index, end_index, true);
1373     }
1374   }
1375 
1376 public:
1377   CMCountDataClosureBase(G1CollectedHeap* g1h,
1378                          BitMap* region_bm, BitMap* card_bm):
1379     _g1h(g1h), _cm(g1h->concurrent_mark()),
1380     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1381     _region_bm(region_bm), _card_bm(card_bm) { }
1382 };
1383 
1384 // Closure that calculates the # live objects per region. Used
1385 // for verification purposes during the cleanup pause.
1386 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1387   CMBitMapRO* _bm;
1388   size_t _region_marked_bytes;
1389 
1390 public:
1391   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1392                          BitMap* region_bm, BitMap* card_bm) :
1393     CMCountDataClosureBase(g1h, region_bm, card_bm),
1394     _bm(bm), _region_marked_bytes(0) { }
1395 
1396   bool doHeapRegion(HeapRegion* hr) {
1397 
1398     if (hr->is_continues_humongous()) {
1399       // We will ignore these here and process them when their
1400       // associated "starts humongous" region is processed (see
1401       // set_bit_for_heap_region()). Note that we cannot rely on their
1402       // associated "starts humongous" region to have their bit set to
1403       // 1 since, due to the region chunking in the parallel region
1404       // iteration, a "continues humongous" region might be visited
1405       // before its associated "starts humongous".
1406       return false;
1407     }
1408 
1409     HeapWord* ntams = hr->next_top_at_mark_start();
1410     HeapWord* start = hr->bottom();
1411 
1412     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1413            err_msg("Preconditions not met - "
1414                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1415                    p2i(start), p2i(ntams), p2i(hr->end())));
1416 
1417     // Find the first marked object at or after "start".
1418     start = _bm->getNextMarkedWordAddress(start, ntams);
1419 
1420     size_t marked_bytes = 0;
1421 
1422     while (start < ntams) {
1423       oop obj = oop(start);
1424       int obj_sz = obj->size();
1425       HeapWord* obj_end = start + obj_sz;
1426 
1427       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1428       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1429 
1430       // Note: if we're looking at the last region in heap - obj_end
1431       // could be actually just beyond the end of the heap; end_idx
1432       // will then correspond to a (non-existent) card that is also
1433       // just beyond the heap.
1434       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1435         // end of object is not card aligned - increment to cover
1436         // all the cards spanned by the object
1437         end_idx += 1;
1438       }
1439 
1440       // Set the bits in the card BM for the cards spanned by this object.
1441       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1442 
1443       // Add the size of this object to the number of marked bytes.
1444       marked_bytes += (size_t)obj_sz * HeapWordSize;
1445 
1446       // Find the next marked object after this one.
1447       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1448     }
1449 
1450     // Mark the allocated-since-marking portion...
1451     HeapWord* top = hr->top();
1452     if (ntams < top) {
1453       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1454       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1455 
1456       // Note: if we're looking at the last region in heap - top
1457       // could be actually just beyond the end of the heap; end_idx
1458       // will then correspond to a (non-existent) card that is also
1459       // just beyond the heap.
1460       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1461         // end of object is not card aligned - increment to cover
1462         // all the cards spanned by the object
1463         end_idx += 1;
1464       }
1465       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1466 
1467       // This definitely means the region has live objects.
1468       set_bit_for_region(hr);
1469     }
1470 
1471     // Update the live region bitmap.
1472     if (marked_bytes > 0) {
1473       set_bit_for_region(hr);
1474     }
1475 
1476     // Set the marked bytes for the current region so that
1477     // it can be queried by a calling verification routine
1478     _region_marked_bytes = marked_bytes;
1479 
1480     return false;
1481   }
1482 
1483   size_t region_marked_bytes() const { return _region_marked_bytes; }
1484 };
1485 
1486 // Heap region closure used for verifying the counting data
1487 // that was accumulated concurrently and aggregated during
1488 // the remark pause. This closure is applied to the heap
1489 // regions during the STW cleanup pause.
1490 
1491 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1492   G1CollectedHeap* _g1h;
1493   ConcurrentMark* _cm;
1494   CalcLiveObjectsClosure _calc_cl;
1495   BitMap* _region_bm;   // Region BM to be verified
1496   BitMap* _card_bm;     // Card BM to be verified
1497   bool _verbose;        // verbose output?
1498 
1499   BitMap* _exp_region_bm; // Expected Region BM values
1500   BitMap* _exp_card_bm;   // Expected card BM values
1501 
1502   int _failures;
1503 
1504 public:
1505   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1506                                 BitMap* region_bm,
1507                                 BitMap* card_bm,
1508                                 BitMap* exp_region_bm,
1509                                 BitMap* exp_card_bm,
1510                                 bool verbose) :
1511     _g1h(g1h), _cm(g1h->concurrent_mark()),
1512     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1513     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1514     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1515     _failures(0) { }
1516 
1517   int failures() const { return _failures; }
1518 
1519   bool doHeapRegion(HeapRegion* hr) {
1520     if (hr->is_continues_humongous()) {
1521       // We will ignore these here and process them when their
1522       // associated "starts humongous" region is processed (see
1523       // set_bit_for_heap_region()). Note that we cannot rely on their
1524       // associated "starts humongous" region to have their bit set to
1525       // 1 since, due to the region chunking in the parallel region
1526       // iteration, a "continues humongous" region might be visited
1527       // before its associated "starts humongous".
1528       return false;
1529     }
1530 
1531     int failures = 0;
1532 
1533     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1534     // this region and set the corresponding bits in the expected region
1535     // and card bitmaps.
1536     bool res = _calc_cl.doHeapRegion(hr);
1537     assert(res == false, "should be continuing");
1538 
1539     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1540                     Mutex::_no_safepoint_check_flag);
1541 
1542     // Verify the marked bytes for this region.
1543     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1544     size_t act_marked_bytes = hr->next_marked_bytes();
1545 
1546     // We're not OK if expected marked bytes > actual marked bytes. It means
1547     // we have missed accounting some objects during the actual marking.
1548     if (exp_marked_bytes > act_marked_bytes) {
1549       if (_verbose) {
1550         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1551                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1552                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1553       }
1554       failures += 1;
1555     }
1556 
1557     // Verify the bit, for this region, in the actual and expected
1558     // (which was just calculated) region bit maps.
1559     // We're not OK if the bit in the calculated expected region
1560     // bitmap is set and the bit in the actual region bitmap is not.
1561     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1562 
1563     bool expected = _exp_region_bm->at(index);
1564     bool actual = _region_bm->at(index);
1565     if (expected && !actual) {
1566       if (_verbose) {
1567         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1568                                "expected: %s, actual: %s",
1569                                hr->hrm_index(),
1570                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1571       }
1572       failures += 1;
1573     }
1574 
1575     // Verify that the card bit maps for the cards spanned by the current
1576     // region match. We have an error if we have a set bit in the expected
1577     // bit map and the corresponding bit in the actual bitmap is not set.
1578 
1579     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1580     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1581 
1582     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1583       expected = _exp_card_bm->at(i);
1584       actual = _card_bm->at(i);
1585 
1586       if (expected && !actual) {
1587         if (_verbose) {
1588           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1589                                  "expected: %s, actual: %s",
1590                                  hr->hrm_index(), i,
1591                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1592         }
1593         failures += 1;
1594       }
1595     }
1596 
1597     if (failures > 0 && _verbose)  {
1598       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1599                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1600                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1601                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1602     }
1603 
1604     _failures += failures;
1605 
1606     // We could stop iteration over the heap when we
1607     // find the first violating region by returning true.
1608     return false;
1609   }
1610 };
1611 
1612 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1613 protected:
1614   G1CollectedHeap* _g1h;
1615   ConcurrentMark* _cm;
1616   BitMap* _actual_region_bm;
1617   BitMap* _actual_card_bm;
1618 
1619   uint    _n_workers;
1620 
1621   BitMap* _expected_region_bm;
1622   BitMap* _expected_card_bm;
1623 
1624   int  _failures;
1625   bool _verbose;
1626 
1627   HeapRegionClaimer _hrclaimer;
1628 
1629 public:
1630   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1631                             BitMap* region_bm, BitMap* card_bm,
1632                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1633     : AbstractGangTask("G1 verify final counting"),
1634       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1635       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1636       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1637       _failures(0), _verbose(false),
1638       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1639     assert(VerifyDuringGC, "don't call this otherwise");
1640     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1641     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1642 
1643     _verbose = _cm->verbose_medium();
1644   }
1645 
1646   void work(uint worker_id) {
1647     assert(worker_id < _n_workers, "invariant");
1648 
1649     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1650                                             _actual_region_bm, _actual_card_bm,
1651                                             _expected_region_bm,
1652                                             _expected_card_bm,
1653                                             _verbose);
1654 
1655     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1656 
1657     Atomic::add(verify_cl.failures(), &_failures);
1658   }
1659 
1660   int failures() const { return _failures; }
1661 };
1662 
1663 // Closure that finalizes the liveness counting data.
1664 // Used during the cleanup pause.
1665 // Sets the bits corresponding to the interval [NTAMS, top]
1666 // (which contains the implicitly live objects) in the
1667 // card liveness bitmap. Also sets the bit for each region,
1668 // containing live data, in the region liveness bitmap.
1669 
1670 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1671  public:
1672   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1673                               BitMap* region_bm,
1674                               BitMap* card_bm) :
1675     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1676 
1677   bool doHeapRegion(HeapRegion* hr) {
1678 
1679     if (hr->is_continues_humongous()) {
1680       // We will ignore these here and process them when their
1681       // associated "starts humongous" region is processed (see
1682       // set_bit_for_heap_region()). Note that we cannot rely on their
1683       // associated "starts humongous" region to have their bit set to
1684       // 1 since, due to the region chunking in the parallel region
1685       // iteration, a "continues humongous" region might be visited
1686       // before its associated "starts humongous".
1687       return false;
1688     }
1689 
1690     HeapWord* ntams = hr->next_top_at_mark_start();
1691     HeapWord* top   = hr->top();
1692 
1693     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1694 
1695     // Mark the allocated-since-marking portion...
1696     if (ntams < top) {
1697       // This definitely means the region has live objects.
1698       set_bit_for_region(hr);
1699 
1700       // Now set the bits in the card bitmap for [ntams, top)
1701       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1702       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1703 
1704       // Note: if we're looking at the last region in heap - top
1705       // could be actually just beyond the end of the heap; end_idx
1706       // will then correspond to a (non-existent) card that is also
1707       // just beyond the heap.
1708       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1709         // end of object is not card aligned - increment to cover
1710         // all the cards spanned by the object
1711         end_idx += 1;
1712       }
1713 
1714       assert(end_idx <= _card_bm->size(),
1715              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1716                      end_idx, _card_bm->size()));
1717       assert(start_idx < _card_bm->size(),
1718              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1719                      start_idx, _card_bm->size()));
1720 
1721       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1722     }
1723 
1724     // Set the bit for the region if it contains live data
1725     if (hr->next_marked_bytes() > 0) {
1726       set_bit_for_region(hr);
1727     }
1728 
1729     return false;
1730   }
1731 };
1732 
1733 class G1ParFinalCountTask: public AbstractGangTask {
1734 protected:
1735   G1CollectedHeap* _g1h;
1736   ConcurrentMark* _cm;
1737   BitMap* _actual_region_bm;
1738   BitMap* _actual_card_bm;
1739 
1740   uint    _n_workers;
1741   HeapRegionClaimer _hrclaimer;
1742 
1743 public:
1744   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1745     : AbstractGangTask("G1 final counting"),
1746       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1747       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1748       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1749   }
1750 
1751   void work(uint worker_id) {
1752     assert(worker_id < _n_workers, "invariant");
1753 
1754     FinalCountDataUpdateClosure final_update_cl(_g1h,
1755                                                 _actual_region_bm,
1756                                                 _actual_card_bm);
1757 
1758     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1759   }
1760 };
1761 
1762 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1763   G1CollectedHeap* _g1;
1764   size_t _freed_bytes;
1765   FreeRegionList* _local_cleanup_list;
1766   HeapRegionSetCount _old_regions_removed;
1767   HeapRegionSetCount _humongous_regions_removed;
1768   HRRSCleanupTask* _hrrs_cleanup_task;
1769 
1770 public:
1771   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1772                              FreeRegionList* local_cleanup_list,
1773                              HRRSCleanupTask* hrrs_cleanup_task) :
1774     _g1(g1),
1775     _freed_bytes(0),
1776     _local_cleanup_list(local_cleanup_list),
1777     _old_regions_removed(),
1778     _humongous_regions_removed(),
1779     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1780 
1781   size_t freed_bytes() { return _freed_bytes; }
1782   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1783   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1784 
1785   bool doHeapRegion(HeapRegion *hr) {
1786     if (hr->is_continues_humongous()) {
1787       return false;
1788     }
1789     // We use a claim value of zero here because all regions
1790     // were claimed with value 1 in the FinalCount task.
1791     _g1->reset_gc_time_stamps(hr);
1792     hr->note_end_of_marking();
1793 
1794     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1795       _freed_bytes += hr->used();
1796       hr->set_containing_set(NULL);
1797       if (hr->is_humongous()) {
1798         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1799         _humongous_regions_removed.increment(1u, hr->capacity());
1800         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1801       } else {
1802         _old_regions_removed.increment(1u, hr->capacity());
1803         _g1->free_region(hr, _local_cleanup_list, true);
1804       }
1805     } else {
1806       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1807     }
1808 
1809     return false;
1810   }
1811 };
1812 
1813 class G1ParNoteEndTask: public AbstractGangTask {
1814   friend class G1NoteEndOfConcMarkClosure;
1815 
1816 protected:
1817   G1CollectedHeap* _g1h;
1818   FreeRegionList* _cleanup_list;
1819   HeapRegionClaimer _hrclaimer;
1820 
1821 public:
1822   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1823       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1824   }
1825 
1826   void work(uint worker_id) {
1827     FreeRegionList local_cleanup_list("Local Cleanup List");
1828     HRRSCleanupTask hrrs_cleanup_task;
1829     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1830                                            &hrrs_cleanup_task);
1831     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1832     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1833 
1834     // Now update the lists
1835     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1836     {
1837       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1838       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1839 
1840       // If we iterate over the global cleanup list at the end of
1841       // cleanup to do this printing we will not guarantee to only
1842       // generate output for the newly-reclaimed regions (the list
1843       // might not be empty at the beginning of cleanup; we might
1844       // still be working on its previous contents). So we do the
1845       // printing here, before we append the new regions to the global
1846       // cleanup list.
1847 
1848       G1HRPrinter* hr_printer = _g1h->hr_printer();
1849       if (hr_printer->is_active()) {
1850         FreeRegionListIterator iter(&local_cleanup_list);
1851         while (iter.more_available()) {
1852           HeapRegion* hr = iter.get_next();
1853           hr_printer->cleanup(hr);
1854         }
1855       }
1856 
1857       _cleanup_list->add_ordered(&local_cleanup_list);
1858       assert(local_cleanup_list.is_empty(), "post-condition");
1859 
1860       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1861     }
1862   }
1863 };
1864 
1865 class G1ParScrubRemSetTask: public AbstractGangTask {
1866 protected:
1867   G1RemSet* _g1rs;
1868   BitMap* _region_bm;
1869   BitMap* _card_bm;
1870   HeapRegionClaimer _hrclaimer;
1871 
1872 public:
1873   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1874       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1875   }
1876 
1877   void work(uint worker_id) {
1878     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1879   }
1880 
1881 };
1882 
1883 void ConcurrentMark::cleanup() {
1884   // world is stopped at this checkpoint
1885   assert(SafepointSynchronize::is_at_safepoint(),
1886          "world should be stopped");
1887   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1888 
1889   // If a full collection has happened, we shouldn't do this.
1890   if (has_aborted()) {
1891     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1892     return;
1893   }
1894 
1895   g1h->verify_region_sets_optional();
1896 
1897   if (VerifyDuringGC) {
1898     HandleMark hm;  // handle scope
1899     g1h->prepare_for_verify();
1900     Universe::verify(VerifyOption_G1UsePrevMarking,
1901                      " VerifyDuringGC:(before)");
1902   }
1903   g1h->check_bitmaps("Cleanup Start");
1904 
1905   G1CollectorPolicy* g1p = g1h->g1_policy();
1906   g1p->record_concurrent_mark_cleanup_start();
1907 
1908   double start = os::elapsedTime();
1909 
1910   HeapRegionRemSet::reset_for_cleanup_tasks();
1911 
1912   // Do counting once more with the world stopped for good measure.
1913   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1914 
1915   g1h->workers()->run_task(&g1_par_count_task);
1916 
1917   if (VerifyDuringGC) {
1918     // Verify that the counting data accumulated during marking matches
1919     // that calculated by walking the marking bitmap.
1920 
1921     // Bitmaps to hold expected values
1922     BitMap expected_region_bm(_region_bm.size(), true);
1923     BitMap expected_card_bm(_card_bm.size(), true);
1924 
1925     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1926                                                  &_region_bm,
1927                                                  &_card_bm,
1928                                                  &expected_region_bm,
1929                                                  &expected_card_bm);
1930 
1931     g1h->workers()->run_task(&g1_par_verify_task);
1932 
1933     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1934   }
1935 
1936   size_t start_used_bytes = g1h->used();
1937   g1h->set_marking_complete();
1938 
1939   double count_end = os::elapsedTime();
1940   double this_final_counting_time = (count_end - start);
1941   _total_counting_time += this_final_counting_time;
1942 
1943   if (G1PrintRegionLivenessInfo) {
1944     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1945     _g1h->heap_region_iterate(&cl);
1946   }
1947 
1948   // Install newly created mark bitMap as "prev".
1949   swapMarkBitMaps();
1950 
1951   g1h->reset_gc_time_stamp();
1952 
1953   uint n_workers = _g1h->workers()->active_workers();
1954 
1955   // Note end of marking in all heap regions.
1956   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1957   g1h->workers()->run_task(&g1_par_note_end_task);
1958   g1h->check_gc_time_stamps();
1959 
1960   if (!cleanup_list_is_empty()) {
1961     // The cleanup list is not empty, so we'll have to process it
1962     // concurrently. Notify anyone else that might be wanting free
1963     // regions that there will be more free regions coming soon.
1964     g1h->set_free_regions_coming();
1965   }
1966 
1967   // call below, since it affects the metric by which we sort the heap
1968   // regions.
1969   if (G1ScrubRemSets) {
1970     double rs_scrub_start = os::elapsedTime();
1971     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1972     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1973 
1974     double rs_scrub_end = os::elapsedTime();
1975     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1976     _total_rs_scrub_time += this_rs_scrub_time;
1977   }
1978 
1979   // this will also free any regions totally full of garbage objects,
1980   // and sort the regions.
1981   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1982 
1983   // Statistics.
1984   double end = os::elapsedTime();
1985   _cleanup_times.add((end - start) * 1000.0);
1986 
1987   if (G1Log::fine()) {
1988     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1989   }
1990 
1991   // Clean up will have freed any regions completely full of garbage.
1992   // Update the soft reference policy with the new heap occupancy.
1993   Universe::update_heap_info_at_gc();
1994 
1995   if (VerifyDuringGC) {
1996     HandleMark hm;  // handle scope
1997     g1h->prepare_for_verify();
1998     Universe::verify(VerifyOption_G1UsePrevMarking,
1999                      " VerifyDuringGC:(after)");
2000   }
2001 
2002   g1h->check_bitmaps("Cleanup End");
2003 
2004   g1h->verify_region_sets_optional();
2005 
2006   // We need to make this be a "collection" so any collection pause that
2007   // races with it goes around and waits for completeCleanup to finish.
2008   g1h->increment_total_collections();
2009 
2010   // Clean out dead classes and update Metaspace sizes.
2011   if (ClassUnloadingWithConcurrentMark) {
2012     ClassLoaderDataGraph::purge();
2013   }
2014   MetaspaceGC::compute_new_size();
2015 
2016   // We reclaimed old regions so we should calculate the sizes to make
2017   // sure we update the old gen/space data.
2018   g1h->g1mm()->update_sizes();
2019   g1h->allocation_context_stats().update_after_mark();
2020 
2021   g1h->trace_heap_after_concurrent_cycle();
2022 }
2023 
2024 void ConcurrentMark::completeCleanup() {
2025   if (has_aborted()) return;
2026 
2027   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2028 
2029   _cleanup_list.verify_optional();
2030   FreeRegionList tmp_free_list("Tmp Free List");
2031 
2032   if (G1ConcRegionFreeingVerbose) {
2033     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2034                            "cleanup list has %u entries",
2035                            _cleanup_list.length());
2036   }
2037 
2038   // No one else should be accessing the _cleanup_list at this point,
2039   // so it is not necessary to take any locks
2040   while (!_cleanup_list.is_empty()) {
2041     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2042     assert(hr != NULL, "Got NULL from a non-empty list");
2043     hr->par_clear();
2044     tmp_free_list.add_ordered(hr);
2045 
2046     // Instead of adding one region at a time to the secondary_free_list,
2047     // we accumulate them in the local list and move them a few at a
2048     // time. This also cuts down on the number of notify_all() calls
2049     // we do during this process. We'll also append the local list when
2050     // _cleanup_list is empty (which means we just removed the last
2051     // region from the _cleanup_list).
2052     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2053         _cleanup_list.is_empty()) {
2054       if (G1ConcRegionFreeingVerbose) {
2055         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2056                                "appending %u entries to the secondary_free_list, "
2057                                "cleanup list still has %u entries",
2058                                tmp_free_list.length(),
2059                                _cleanup_list.length());
2060       }
2061 
2062       {
2063         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2064         g1h->secondary_free_list_add(&tmp_free_list);
2065         SecondaryFreeList_lock->notify_all();
2066       }
2067 #ifndef PRODUCT
2068       if (G1StressConcRegionFreeing) {
2069         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2070           os::sleep(Thread::current(), (jlong) 1, false);
2071         }
2072       }
2073 #endif
2074     }
2075   }
2076   assert(tmp_free_list.is_empty(), "post-condition");
2077 }
2078 
2079 // Supporting Object and Oop closures for reference discovery
2080 // and processing in during marking
2081 
2082 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2083   HeapWord* addr = (HeapWord*)obj;
2084   return addr != NULL &&
2085          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2086 }
2087 
2088 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2089 // Uses the CMTask associated with a worker thread (for serial reference
2090 // processing the CMTask for worker 0 is used) to preserve (mark) and
2091 // trace referent objects.
2092 //
2093 // Using the CMTask and embedded local queues avoids having the worker
2094 // threads operating on the global mark stack. This reduces the risk
2095 // of overflowing the stack - which we would rather avoid at this late
2096 // state. Also using the tasks' local queues removes the potential
2097 // of the workers interfering with each other that could occur if
2098 // operating on the global stack.
2099 
2100 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2101   ConcurrentMark* _cm;
2102   CMTask*         _task;
2103   int             _ref_counter_limit;
2104   int             _ref_counter;
2105   bool            _is_serial;
2106  public:
2107   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2108     _cm(cm), _task(task), _is_serial(is_serial),
2109     _ref_counter_limit(G1RefProcDrainInterval) {
2110     assert(_ref_counter_limit > 0, "sanity");
2111     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2112     _ref_counter = _ref_counter_limit;
2113   }
2114 
2115   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2116   virtual void do_oop(      oop* p) { do_oop_work(p); }
2117 
2118   template <class T> void do_oop_work(T* p) {
2119     if (!_cm->has_overflown()) {
2120       oop obj = oopDesc::load_decode_heap_oop(p);
2121       if (_cm->verbose_high()) {
2122         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2123                                "*"PTR_FORMAT" = "PTR_FORMAT,
2124                                _task->worker_id(), p2i(p), p2i((void*) obj));
2125       }
2126 
2127       _task->deal_with_reference(obj);
2128       _ref_counter--;
2129 
2130       if (_ref_counter == 0) {
2131         // We have dealt with _ref_counter_limit references, pushing them
2132         // and objects reachable from them on to the local stack (and
2133         // possibly the global stack). Call CMTask::do_marking_step() to
2134         // process these entries.
2135         //
2136         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2137         // there's nothing more to do (i.e. we're done with the entries that
2138         // were pushed as a result of the CMTask::deal_with_reference() calls
2139         // above) or we overflow.
2140         //
2141         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2142         // flag while there may still be some work to do. (See the comment at
2143         // the beginning of CMTask::do_marking_step() for those conditions -
2144         // one of which is reaching the specified time target.) It is only
2145         // when CMTask::do_marking_step() returns without setting the
2146         // has_aborted() flag that the marking step has completed.
2147         do {
2148           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2149           _task->do_marking_step(mark_step_duration_ms,
2150                                  false      /* do_termination */,
2151                                  _is_serial);
2152         } while (_task->has_aborted() && !_cm->has_overflown());
2153         _ref_counter = _ref_counter_limit;
2154       }
2155     } else {
2156       if (_cm->verbose_high()) {
2157          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2158       }
2159     }
2160   }
2161 };
2162 
2163 // 'Drain' oop closure used by both serial and parallel reference processing.
2164 // Uses the CMTask associated with a given worker thread (for serial
2165 // reference processing the CMtask for worker 0 is used). Calls the
2166 // do_marking_step routine, with an unbelievably large timeout value,
2167 // to drain the marking data structures of the remaining entries
2168 // added by the 'keep alive' oop closure above.
2169 
2170 class G1CMDrainMarkingStackClosure: public VoidClosure {
2171   ConcurrentMark* _cm;
2172   CMTask*         _task;
2173   bool            _is_serial;
2174  public:
2175   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2176     _cm(cm), _task(task), _is_serial(is_serial) {
2177     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2178   }
2179 
2180   void do_void() {
2181     do {
2182       if (_cm->verbose_high()) {
2183         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2184                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2185       }
2186 
2187       // We call CMTask::do_marking_step() to completely drain the local
2188       // and global marking stacks of entries pushed by the 'keep alive'
2189       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2190       //
2191       // CMTask::do_marking_step() is called in a loop, which we'll exit
2192       // if there's nothing more to do (i.e. we've completely drained the
2193       // entries that were pushed as a a result of applying the 'keep alive'
2194       // closure to the entries on the discovered ref lists) or we overflow
2195       // the global marking stack.
2196       //
2197       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2198       // flag while there may still be some work to do. (See the comment at
2199       // the beginning of CMTask::do_marking_step() for those conditions -
2200       // one of which is reaching the specified time target.) It is only
2201       // when CMTask::do_marking_step() returns without setting the
2202       // has_aborted() flag that the marking step has completed.
2203 
2204       _task->do_marking_step(1000000000.0 /* something very large */,
2205                              true         /* do_termination */,
2206                              _is_serial);
2207     } while (_task->has_aborted() && !_cm->has_overflown());
2208   }
2209 };
2210 
2211 // Implementation of AbstractRefProcTaskExecutor for parallel
2212 // reference processing at the end of G1 concurrent marking
2213 
2214 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2215 private:
2216   G1CollectedHeap* _g1h;
2217   ConcurrentMark*  _cm;
2218   WorkGang*        _workers;
2219   uint             _active_workers;
2220 
2221 public:
2222   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2223                           ConcurrentMark* cm,
2224                           WorkGang* workers,
2225                           uint n_workers) :
2226     _g1h(g1h), _cm(cm),
2227     _workers(workers), _active_workers(n_workers) { }
2228 
2229   // Executes the given task using concurrent marking worker threads.
2230   virtual void execute(ProcessTask& task);
2231   virtual void execute(EnqueueTask& task);
2232 };
2233 
2234 class G1CMRefProcTaskProxy: public AbstractGangTask {
2235   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2236   ProcessTask&     _proc_task;
2237   G1CollectedHeap* _g1h;
2238   ConcurrentMark*  _cm;
2239 
2240 public:
2241   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2242                      G1CollectedHeap* g1h,
2243                      ConcurrentMark* cm) :
2244     AbstractGangTask("Process reference objects in parallel"),
2245     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2246     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2247     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2248   }
2249 
2250   virtual void work(uint worker_id) {
2251     ResourceMark rm;
2252     HandleMark hm;
2253     CMTask* task = _cm->task(worker_id);
2254     G1CMIsAliveClosure g1_is_alive(_g1h);
2255     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2256     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2257 
2258     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2259   }
2260 };
2261 
2262 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2263   assert(_workers != NULL, "Need parallel worker threads.");
2264   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2265 
2266   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2267 
2268   // We need to reset the concurrency level before each
2269   // proxy task execution, so that the termination protocol
2270   // and overflow handling in CMTask::do_marking_step() knows
2271   // how many workers to wait for.
2272   _cm->set_concurrency(_active_workers);
2273   _workers->run_task(&proc_task_proxy);
2274 }
2275 
2276 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2277   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2278   EnqueueTask& _enq_task;
2279 
2280 public:
2281   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2282     AbstractGangTask("Enqueue reference objects in parallel"),
2283     _enq_task(enq_task) { }
2284 
2285   virtual void work(uint worker_id) {
2286     _enq_task.work(worker_id);
2287   }
2288 };
2289 
2290 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2291   assert(_workers != NULL, "Need parallel worker threads.");
2292   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2293 
2294   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2295 
2296   // Not strictly necessary but...
2297   //
2298   // We need to reset the concurrency level before each
2299   // proxy task execution, so that the termination protocol
2300   // and overflow handling in CMTask::do_marking_step() knows
2301   // how many workers to wait for.
2302   _cm->set_concurrency(_active_workers);
2303   _workers->run_task(&enq_task_proxy);
2304 }
2305 
2306 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2307   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2308 }
2309 
2310 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2311   if (has_overflown()) {
2312     // Skip processing the discovered references if we have
2313     // overflown the global marking stack. Reference objects
2314     // only get discovered once so it is OK to not
2315     // de-populate the discovered reference lists. We could have,
2316     // but the only benefit would be that, when marking restarts,
2317     // less reference objects are discovered.
2318     return;
2319   }
2320 
2321   ResourceMark rm;
2322   HandleMark   hm;
2323 
2324   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2325 
2326   // Is alive closure.
2327   G1CMIsAliveClosure g1_is_alive(g1h);
2328 
2329   // Inner scope to exclude the cleaning of the string and symbol
2330   // tables from the displayed time.
2331   {
2332     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2333 
2334     ReferenceProcessor* rp = g1h->ref_processor_cm();
2335 
2336     // See the comment in G1CollectedHeap::ref_processing_init()
2337     // about how reference processing currently works in G1.
2338 
2339     // Set the soft reference policy
2340     rp->setup_policy(clear_all_soft_refs);
2341     assert(_markStack.isEmpty(), "mark stack should be empty");
2342 
2343     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2344     // in serial reference processing. Note these closures are also
2345     // used for serially processing (by the the current thread) the
2346     // JNI references during parallel reference processing.
2347     //
2348     // These closures do not need to synchronize with the worker
2349     // threads involved in parallel reference processing as these
2350     // instances are executed serially by the current thread (e.g.
2351     // reference processing is not multi-threaded and is thus
2352     // performed by the current thread instead of a gang worker).
2353     //
2354     // The gang tasks involved in parallel reference processing create
2355     // their own instances of these closures, which do their own
2356     // synchronization among themselves.
2357     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2358     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2359 
2360     // We need at least one active thread. If reference processing
2361     // is not multi-threaded we use the current (VMThread) thread,
2362     // otherwise we use the work gang from the G1CollectedHeap and
2363     // we utilize all the worker threads we can.
2364     bool processing_is_mt = rp->processing_is_mt();
2365     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2366     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2367 
2368     // Parallel processing task executor.
2369     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2370                                               g1h->workers(), active_workers);
2371     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2372 
2373     // Set the concurrency level. The phase was already set prior to
2374     // executing the remark task.
2375     set_concurrency(active_workers);
2376 
2377     // Set the degree of MT processing here.  If the discovery was done MT,
2378     // the number of threads involved during discovery could differ from
2379     // the number of active workers.  This is OK as long as the discovered
2380     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2381     rp->set_active_mt_degree(active_workers);
2382 
2383     // Process the weak references.
2384     const ReferenceProcessorStats& stats =
2385         rp->process_discovered_references(&g1_is_alive,
2386                                           &g1_keep_alive,
2387                                           &g1_drain_mark_stack,
2388                                           executor,
2389                                           g1h->gc_timer_cm(),
2390                                           concurrent_gc_id());
2391     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2392 
2393     // The do_oop work routines of the keep_alive and drain_marking_stack
2394     // oop closures will set the has_overflown flag if we overflow the
2395     // global marking stack.
2396 
2397     assert(_markStack.overflow() || _markStack.isEmpty(),
2398             "mark stack should be empty (unless it overflowed)");
2399 
2400     if (_markStack.overflow()) {
2401       // This should have been done already when we tried to push an
2402       // entry on to the global mark stack. But let's do it again.
2403       set_has_overflown();
2404     }
2405 
2406     assert(rp->num_q() == active_workers, "why not");
2407 
2408     rp->enqueue_discovered_references(executor);
2409 
2410     rp->verify_no_references_recorded();
2411     assert(!rp->discovery_enabled(), "Post condition");
2412   }
2413 
2414   if (has_overflown()) {
2415     // We can not trust g1_is_alive if the marking stack overflowed
2416     return;
2417   }
2418 
2419   assert(_markStack.isEmpty(), "Marking should have completed");
2420 
2421   // Unload Klasses, String, Symbols, Code Cache, etc.
2422   {
2423     G1CMTraceTime trace("Unloading", G1Log::finer());
2424 
2425     if (ClassUnloadingWithConcurrentMark) {
2426       bool purged_classes;
2427 
2428       {
2429         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2430         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2431       }
2432 
2433       {
2434         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2435         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2436       }
2437     }
2438 
2439     if (G1StringDedup::is_enabled()) {
2440       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2441       G1StringDedup::unlink(&g1_is_alive);
2442     }
2443   }
2444 }
2445 
2446 void ConcurrentMark::swapMarkBitMaps() {
2447   CMBitMapRO* temp = _prevMarkBitMap;
2448   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2449   _nextMarkBitMap  = (CMBitMap*)  temp;
2450 }
2451 
2452 // Closure for marking entries in SATB buffers.
2453 class CMSATBBufferClosure : public SATBBufferClosure {
2454 private:
2455   CMTask* _task;
2456   G1CollectedHeap* _g1h;
2457 
2458   // This is very similar to CMTask::deal_with_reference, but with
2459   // more relaxed requirements for the argument, so this must be more
2460   // circumspect about treating the argument as an object.
2461   void do_entry(void* entry) const {
2462     _task->increment_refs_reached();
2463     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2464     if (entry < hr->next_top_at_mark_start()) {
2465       // Until we get here, we don't know whether entry refers to a valid
2466       // object; it could instead have been a stale reference.
2467       oop obj = static_cast<oop>(entry);
2468       assert(obj->is_oop(true /* ignore mark word */),
2469              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2470       _task->make_reference_grey(obj, hr);
2471     }
2472   }
2473 
2474 public:
2475   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2476     : _task(task), _g1h(g1h) { }
2477 
2478   virtual void do_buffer(void** buffer, size_t size) {
2479     for (size_t i = 0; i < size; ++i) {
2480       do_entry(buffer[i]);
2481     }
2482   }
2483 };
2484 
2485 class G1RemarkThreadsClosure : public ThreadClosure {
2486   CMSATBBufferClosure _cm_satb_cl;
2487   G1CMOopClosure _cm_cl;
2488   MarkingCodeBlobClosure _code_cl;
2489   int _thread_parity;
2490 
2491  public:
2492   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2493     _cm_satb_cl(task, g1h),
2494     _cm_cl(g1h, g1h->concurrent_mark(), task),
2495     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2496     _thread_parity(Threads::thread_claim_parity()) {}
2497 
2498   void do_thread(Thread* thread) {
2499     if (thread->is_Java_thread()) {
2500       if (thread->claim_oops_do(true, _thread_parity)) {
2501         JavaThread* jt = (JavaThread*)thread;
2502 
2503         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2504         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2505         // * Alive if on the stack of an executing method
2506         // * Weakly reachable otherwise
2507         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2508         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2509         jt->nmethods_do(&_code_cl);
2510 
2511         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2512       }
2513     } else if (thread->is_VM_thread()) {
2514       if (thread->claim_oops_do(true, _thread_parity)) {
2515         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2516       }
2517     }
2518   }
2519 };
2520 
2521 class CMRemarkTask: public AbstractGangTask {
2522 private:
2523   ConcurrentMark* _cm;
2524 public:
2525   void work(uint worker_id) {
2526     // Since all available tasks are actually started, we should
2527     // only proceed if we're supposed to be active.
2528     if (worker_id < _cm->active_tasks()) {
2529       CMTask* task = _cm->task(worker_id);
2530       task->record_start_time();
2531       {
2532         ResourceMark rm;
2533         HandleMark hm;
2534 
2535         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2536         Threads::threads_do(&threads_f);
2537       }
2538 
2539       do {
2540         task->do_marking_step(1000000000.0 /* something very large */,
2541                               true         /* do_termination       */,
2542                               false        /* is_serial            */);
2543       } while (task->has_aborted() && !_cm->has_overflown());
2544       // If we overflow, then we do not want to restart. We instead
2545       // want to abort remark and do concurrent marking again.
2546       task->record_end_time();
2547     }
2548   }
2549 
2550   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2551     AbstractGangTask("Par Remark"), _cm(cm) {
2552     _cm->terminator()->reset_for_reuse(active_workers);
2553   }
2554 };
2555 
2556 void ConcurrentMark::checkpointRootsFinalWork() {
2557   ResourceMark rm;
2558   HandleMark   hm;
2559   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2560 
2561   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2562 
2563   g1h->ensure_parsability(false);
2564 
2565   // this is remark, so we'll use up all active threads
2566   uint active_workers = g1h->workers()->active_workers();
2567   set_concurrency_and_phase(active_workers, false /* concurrent */);
2568   // Leave _parallel_marking_threads at it's
2569   // value originally calculated in the ConcurrentMark
2570   // constructor and pass values of the active workers
2571   // through the gang in the task.
2572 
2573   {
2574     StrongRootsScope srs(active_workers);
2575 
2576     CMRemarkTask remarkTask(this, active_workers);
2577     // We will start all available threads, even if we decide that the
2578     // active_workers will be fewer. The extra ones will just bail out
2579     // immediately.
2580     g1h->workers()->run_task(&remarkTask);
2581   }
2582 
2583   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2584   guarantee(has_overflown() ||
2585             satb_mq_set.completed_buffers_num() == 0,
2586             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2587                     BOOL_TO_STR(has_overflown()),
2588                     satb_mq_set.completed_buffers_num()));
2589 
2590   print_stats();
2591 }
2592 
2593 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2594   // Note we are overriding the read-only view of the prev map here, via
2595   // the cast.
2596   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2597 }
2598 
2599 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2600   _nextMarkBitMap->clearRange(mr);
2601 }
2602 
2603 HeapRegion*
2604 ConcurrentMark::claim_region(uint worker_id) {
2605   // "checkpoint" the finger
2606   HeapWord* finger = _finger;
2607 
2608   // _heap_end will not change underneath our feet; it only changes at
2609   // yield points.
2610   while (finger < _heap_end) {
2611     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2612 
2613     // Note on how this code handles humongous regions. In the
2614     // normal case the finger will reach the start of a "starts
2615     // humongous" (SH) region. Its end will either be the end of the
2616     // last "continues humongous" (CH) region in the sequence, or the
2617     // standard end of the SH region (if the SH is the only region in
2618     // the sequence). That way claim_region() will skip over the CH
2619     // regions. However, there is a subtle race between a CM thread
2620     // executing this method and a mutator thread doing a humongous
2621     // object allocation. The two are not mutually exclusive as the CM
2622     // thread does not need to hold the Heap_lock when it gets
2623     // here. So there is a chance that claim_region() will come across
2624     // a free region that's in the progress of becoming a SH or a CH
2625     // region. In the former case, it will either
2626     //   a) Miss the update to the region's end, in which case it will
2627     //      visit every subsequent CH region, will find their bitmaps
2628     //      empty, and do nothing, or
2629     //   b) Will observe the update of the region's end (in which case
2630     //      it will skip the subsequent CH regions).
2631     // If it comes across a region that suddenly becomes CH, the
2632     // scenario will be similar to b). So, the race between
2633     // claim_region() and a humongous object allocation might force us
2634     // to do a bit of unnecessary work (due to some unnecessary bitmap
2635     // iterations) but it should not introduce and correctness issues.
2636     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2637 
2638     // Above heap_region_containing_raw may return NULL as we always scan claim
2639     // until the end of the heap. In this case, just jump to the next region.
2640     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2641 
2642     // Is the gap between reading the finger and doing the CAS too long?
2643     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2644     if (res == finger && curr_region != NULL) {
2645       // we succeeded
2646       HeapWord*   bottom        = curr_region->bottom();
2647       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2648 
2649       if (verbose_low()) {
2650         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2651                                "["PTR_FORMAT", "PTR_FORMAT"), "
2652                                "limit = "PTR_FORMAT,
2653                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2654       }
2655 
2656       // notice that _finger == end cannot be guaranteed here since,
2657       // someone else might have moved the finger even further
2658       assert(_finger >= end, "the finger should have moved forward");
2659 
2660       if (verbose_low()) {
2661         gclog_or_tty->print_cr("[%u] we were successful with region = "
2662                                PTR_FORMAT, worker_id, p2i(curr_region));
2663       }
2664 
2665       if (limit > bottom) {
2666         if (verbose_low()) {
2667           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2668                                  "returning it ", worker_id, p2i(curr_region));
2669         }
2670         return curr_region;
2671       } else {
2672         assert(limit == bottom,
2673                "the region limit should be at bottom");
2674         if (verbose_low()) {
2675           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2676                                  "returning NULL", worker_id, p2i(curr_region));
2677         }
2678         // we return NULL and the caller should try calling
2679         // claim_region() again.
2680         return NULL;
2681       }
2682     } else {
2683       assert(_finger > finger, "the finger should have moved forward");
2684       if (verbose_low()) {
2685         if (curr_region == NULL) {
2686           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2687                                  "global finger = "PTR_FORMAT", "
2688                                  "our finger = "PTR_FORMAT,
2689                                  worker_id, p2i(_finger), p2i(finger));
2690         } else {
2691           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2692                                  "global finger = "PTR_FORMAT", "
2693                                  "our finger = "PTR_FORMAT,
2694                                  worker_id, p2i(_finger), p2i(finger));
2695         }
2696       }
2697 
2698       // read it again
2699       finger = _finger;
2700     }
2701   }
2702 
2703   return NULL;
2704 }
2705 
2706 #ifndef PRODUCT
2707 enum VerifyNoCSetOopsPhase {
2708   VerifyNoCSetOopsStack,
2709   VerifyNoCSetOopsQueues
2710 };
2711 
2712 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2713 private:
2714   G1CollectedHeap* _g1h;
2715   VerifyNoCSetOopsPhase _phase;
2716   int _info;
2717 
2718   const char* phase_str() {
2719     switch (_phase) {
2720     case VerifyNoCSetOopsStack:         return "Stack";
2721     case VerifyNoCSetOopsQueues:        return "Queue";
2722     default:                            ShouldNotReachHere();
2723     }
2724     return NULL;
2725   }
2726 
2727   void do_object_work(oop obj) {
2728     guarantee(!_g1h->obj_in_cs(obj),
2729               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2730                       p2i((void*) obj), phase_str(), _info));
2731   }
2732 
2733 public:
2734   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2735 
2736   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2737     _phase = phase;
2738     _info = info;
2739   }
2740 
2741   virtual void do_oop(oop* p) {
2742     oop obj = oopDesc::load_decode_heap_oop(p);
2743     do_object_work(obj);
2744   }
2745 
2746   virtual void do_oop(narrowOop* p) {
2747     // We should not come across narrow oops while scanning marking
2748     // stacks
2749     ShouldNotReachHere();
2750   }
2751 
2752   virtual void do_object(oop obj) {
2753     do_object_work(obj);
2754   }
2755 };
2756 
2757 void ConcurrentMark::verify_no_cset_oops() {
2758   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2759   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2760     return;
2761   }
2762 
2763   VerifyNoCSetOopsClosure cl;
2764 
2765   // Verify entries on the global mark stack
2766   cl.set_phase(VerifyNoCSetOopsStack);
2767   _markStack.oops_do(&cl);
2768 
2769   // Verify entries on the task queues
2770   for (uint i = 0; i < _max_worker_id; i += 1) {
2771     cl.set_phase(VerifyNoCSetOopsQueues, i);
2772     CMTaskQueue* queue = _task_queues->queue(i);
2773     queue->oops_do(&cl);
2774   }
2775 
2776   // Verify the global finger
2777   HeapWord* global_finger = finger();
2778   if (global_finger != NULL && global_finger < _heap_end) {
2779     // The global finger always points to a heap region boundary. We
2780     // use heap_region_containing_raw() to get the containing region
2781     // given that the global finger could be pointing to a free region
2782     // which subsequently becomes continues humongous. If that
2783     // happens, heap_region_containing() will return the bottom of the
2784     // corresponding starts humongous region and the check below will
2785     // not hold any more.
2786     // Since we always iterate over all regions, we might get a NULL HeapRegion
2787     // here.
2788     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2789     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2790               err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2791                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2792   }
2793 
2794   // Verify the task fingers
2795   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2796   for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2797     CMTask* task = _tasks[i];
2798     HeapWord* task_finger = task->finger();
2799     if (task_finger != NULL && task_finger < _heap_end) {
2800       // See above note on the global finger verification.
2801       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2802       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2803                 !task_hr->in_collection_set(),
2804                 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2805                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2806     }
2807   }
2808 }
2809 #endif // PRODUCT
2810 
2811 // Aggregate the counting data that was constructed concurrently
2812 // with marking.
2813 class AggregateCountDataHRClosure: public HeapRegionClosure {
2814   G1CollectedHeap* _g1h;
2815   ConcurrentMark* _cm;
2816   CardTableModRefBS* _ct_bs;
2817   BitMap* _cm_card_bm;
2818   uint _max_worker_id;
2819 
2820  public:
2821   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2822                               BitMap* cm_card_bm,
2823                               uint max_worker_id) :
2824     _g1h(g1h), _cm(g1h->concurrent_mark()),
2825     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2826     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2827 
2828   bool doHeapRegion(HeapRegion* hr) {
2829     if (hr->is_continues_humongous()) {
2830       // We will ignore these here and process them when their
2831       // associated "starts humongous" region is processed.
2832       // Note that we cannot rely on their associated
2833       // "starts humongous" region to have their bit set to 1
2834       // since, due to the region chunking in the parallel region
2835       // iteration, a "continues humongous" region might be visited
2836       // before its associated "starts humongous".
2837       return false;
2838     }
2839 
2840     HeapWord* start = hr->bottom();
2841     HeapWord* limit = hr->next_top_at_mark_start();
2842     HeapWord* end = hr->end();
2843 
2844     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2845            err_msg("Preconditions not met - "
2846                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2847                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2848                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2849 
2850     assert(hr->next_marked_bytes() == 0, "Precondition");
2851 
2852     if (start == limit) {
2853       // NTAMS of this region has not been set so nothing to do.
2854       return false;
2855     }
2856 
2857     // 'start' should be in the heap.
2858     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2859     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2860     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2861 
2862     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2863     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2864     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2865 
2866     // If ntams is not card aligned then we bump card bitmap index
2867     // for limit so that we get the all the cards spanned by
2868     // the object ending at ntams.
2869     // Note: if this is the last region in the heap then ntams
2870     // could be actually just beyond the end of the the heap;
2871     // limit_idx will then  correspond to a (non-existent) card
2872     // that is also outside the heap.
2873     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2874       limit_idx += 1;
2875     }
2876 
2877     assert(limit_idx <= end_idx, "or else use atomics");
2878 
2879     // Aggregate the "stripe" in the count data associated with hr.
2880     uint hrm_index = hr->hrm_index();
2881     size_t marked_bytes = 0;
2882 
2883     for (uint i = 0; i < _max_worker_id; i += 1) {
2884       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2885       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2886 
2887       // Fetch the marked_bytes in this region for task i and
2888       // add it to the running total for this region.
2889       marked_bytes += marked_bytes_array[hrm_index];
2890 
2891       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2892       // into the global card bitmap.
2893       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2894 
2895       while (scan_idx < limit_idx) {
2896         assert(task_card_bm->at(scan_idx) == true, "should be");
2897         _cm_card_bm->set_bit(scan_idx);
2898         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2899 
2900         // BitMap::get_next_one_offset() can handle the case when
2901         // its left_offset parameter is greater than its right_offset
2902         // parameter. It does, however, have an early exit if
2903         // left_offset == right_offset. So let's limit the value
2904         // passed in for left offset here.
2905         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2906         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2907       }
2908     }
2909 
2910     // Update the marked bytes for this region.
2911     hr->add_to_marked_bytes(marked_bytes);
2912 
2913     // Next heap region
2914     return false;
2915   }
2916 };
2917 
2918 class G1AggregateCountDataTask: public AbstractGangTask {
2919 protected:
2920   G1CollectedHeap* _g1h;
2921   ConcurrentMark* _cm;
2922   BitMap* _cm_card_bm;
2923   uint _max_worker_id;
2924   uint _active_workers;
2925   HeapRegionClaimer _hrclaimer;
2926 
2927 public:
2928   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2929                            ConcurrentMark* cm,
2930                            BitMap* cm_card_bm,
2931                            uint max_worker_id,
2932                            uint n_workers) :
2933       AbstractGangTask("Count Aggregation"),
2934       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2935       _max_worker_id(max_worker_id),
2936       _active_workers(n_workers),
2937       _hrclaimer(_active_workers) {
2938   }
2939 
2940   void work(uint worker_id) {
2941     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2942 
2943     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2944   }
2945 };
2946 
2947 
2948 void ConcurrentMark::aggregate_count_data() {
2949   uint n_workers = _g1h->workers()->active_workers();
2950 
2951   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2952                                            _max_worker_id, n_workers);
2953 
2954   _g1h->workers()->run_task(&g1_par_agg_task);
2955 }
2956 
2957 // Clear the per-worker arrays used to store the per-region counting data
2958 void ConcurrentMark::clear_all_count_data() {
2959   // Clear the global card bitmap - it will be filled during
2960   // liveness count aggregation (during remark) and the
2961   // final counting task.
2962   _card_bm.clear();
2963 
2964   // Clear the global region bitmap - it will be filled as part
2965   // of the final counting task.
2966   _region_bm.clear();
2967 
2968   uint max_regions = _g1h->max_regions();
2969   assert(_max_worker_id > 0, "uninitialized");
2970 
2971   for (uint i = 0; i < _max_worker_id; i += 1) {
2972     BitMap* task_card_bm = count_card_bitmap_for(i);
2973     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2974 
2975     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2976     assert(marked_bytes_array != NULL, "uninitialized");
2977 
2978     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2979     task_card_bm->clear();
2980   }
2981 }
2982 
2983 void ConcurrentMark::print_stats() {
2984   if (verbose_stats()) {
2985     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2986     for (size_t i = 0; i < _active_tasks; ++i) {
2987       _tasks[i]->print_stats();
2988       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2989     }
2990   }
2991 }
2992 
2993 // abandon current marking iteration due to a Full GC
2994 void ConcurrentMark::abort() {
2995   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2996   // concurrent bitmap clearing.
2997   _nextMarkBitMap->clearAll();
2998 
2999   // Note we cannot clear the previous marking bitmap here
3000   // since VerifyDuringGC verifies the objects marked during
3001   // a full GC against the previous bitmap.
3002 
3003   // Clear the liveness counting data
3004   clear_all_count_data();
3005   // Empty mark stack
3006   reset_marking_state();
3007   for (uint i = 0; i < _max_worker_id; ++i) {
3008     _tasks[i]->clear_region_fields();
3009   }
3010   _first_overflow_barrier_sync.abort();
3011   _second_overflow_barrier_sync.abort();
3012   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3013   if (!gc_id.is_undefined()) {
3014     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3015     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3016     _aborted_gc_id = gc_id;
3017    }
3018   _has_aborted = true;
3019 
3020   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3021   satb_mq_set.abandon_partial_marking();
3022   // This can be called either during or outside marking, we'll read
3023   // the expected_active value from the SATB queue set.
3024   satb_mq_set.set_active_all_threads(
3025                                  false, /* new active value */
3026                                  satb_mq_set.is_active() /* expected_active */);
3027 
3028   _g1h->trace_heap_after_concurrent_cycle();
3029   _g1h->register_concurrent_cycle_end();
3030 }
3031 
3032 const GCId& ConcurrentMark::concurrent_gc_id() {
3033   if (has_aborted()) {
3034     return _aborted_gc_id;
3035   }
3036   return _g1h->gc_tracer_cm()->gc_id();
3037 }
3038 
3039 static void print_ms_time_info(const char* prefix, const char* name,
3040                                NumberSeq& ns) {
3041   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3042                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3043   if (ns.num() > 0) {
3044     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3045                            prefix, ns.sd(), ns.maximum());
3046   }
3047 }
3048 
3049 void ConcurrentMark::print_summary_info() {
3050   gclog_or_tty->print_cr(" Concurrent marking:");
3051   print_ms_time_info("  ", "init marks", _init_times);
3052   print_ms_time_info("  ", "remarks", _remark_times);
3053   {
3054     print_ms_time_info("     ", "final marks", _remark_mark_times);
3055     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3056 
3057   }
3058   print_ms_time_info("  ", "cleanups", _cleanup_times);
3059   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3060                          _total_counting_time,
3061                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3062                           (double)_cleanup_times.num()
3063                          : 0.0));
3064   if (G1ScrubRemSets) {
3065     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3066                            _total_rs_scrub_time,
3067                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3068                             (double)_cleanup_times.num()
3069                            : 0.0));
3070   }
3071   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3072                          (_init_times.sum() + _remark_times.sum() +
3073                           _cleanup_times.sum())/1000.0);
3074   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3075                 "(%8.2f s marking).",
3076                 cmThread()->vtime_accum(),
3077                 cmThread()->vtime_mark_accum());
3078 }
3079 
3080 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3081   _parallel_workers->print_worker_threads_on(st);
3082 }
3083 
3084 void ConcurrentMark::print_on_error(outputStream* st) const {
3085   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3086       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3087   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3088   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3089 }
3090 
3091 // We take a break if someone is trying to stop the world.
3092 bool ConcurrentMark::do_yield_check(uint worker_id) {
3093   if (SuspendibleThreadSet::should_yield()) {
3094     if (worker_id == 0) {
3095       _g1h->g1_policy()->record_concurrent_pause();
3096     }
3097     SuspendibleThreadSet::yield();
3098     return true;
3099   } else {
3100     return false;
3101   }
3102 }
3103 
3104 #ifndef PRODUCT
3105 // for debugging purposes
3106 void ConcurrentMark::print_finger() {
3107   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3108                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3109   for (uint i = 0; i < _max_worker_id; ++i) {
3110     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3111   }
3112   gclog_or_tty->cr();
3113 }
3114 #endif
3115 
3116 template<bool scan>
3117 inline void CMTask::process_grey_object(oop obj) {
3118   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3119   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3120 
3121   if (_cm->verbose_high()) {
3122     gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3123                            _worker_id, p2i((void*) obj));
3124   }
3125 
3126   size_t obj_size = obj->size();
3127   _words_scanned += obj_size;
3128 
3129   if (scan) {
3130     obj->oop_iterate(_cm_oop_closure);
3131   }
3132   statsOnly( ++_objs_scanned );
3133   check_limits();
3134 }
3135 
3136 template void CMTask::process_grey_object<true>(oop);
3137 template void CMTask::process_grey_object<false>(oop);
3138 
3139 // Closure for iteration over bitmaps
3140 class CMBitMapClosure : public BitMapClosure {
3141 private:
3142   // the bitmap that is being iterated over
3143   CMBitMap*                   _nextMarkBitMap;
3144   ConcurrentMark*             _cm;
3145   CMTask*                     _task;
3146 
3147 public:
3148   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3149     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3150 
3151   bool do_bit(size_t offset) {
3152     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3153     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3154     assert( addr < _cm->finger(), "invariant");
3155 
3156     statsOnly( _task->increase_objs_found_on_bitmap() );
3157     assert(addr >= _task->finger(), "invariant");
3158 
3159     // We move that task's local finger along.
3160     _task->move_finger_to(addr);
3161 
3162     _task->scan_object(oop(addr));
3163     // we only partially drain the local queue and global stack
3164     _task->drain_local_queue(true);
3165     _task->drain_global_stack(true);
3166 
3167     // if the has_aborted flag has been raised, we need to bail out of
3168     // the iteration
3169     return !_task->has_aborted();
3170   }
3171 };
3172 
3173 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3174                                ConcurrentMark* cm,
3175                                CMTask* task)
3176   : _g1h(g1h), _cm(cm), _task(task) {
3177   assert(_ref_processor == NULL, "should be initialized to NULL");
3178 
3179   if (G1UseConcMarkReferenceProcessing) {
3180     _ref_processor = g1h->ref_processor_cm();
3181     assert(_ref_processor != NULL, "should not be NULL");
3182   }
3183 }
3184 
3185 void CMTask::setup_for_region(HeapRegion* hr) {
3186   assert(hr != NULL,
3187         "claim_region() should have filtered out NULL regions");
3188   assert(!hr->is_continues_humongous(),
3189         "claim_region() should have filtered out continues humongous regions");
3190 
3191   if (_cm->verbose_low()) {
3192     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3193                            _worker_id, p2i(hr));
3194   }
3195 
3196   _curr_region  = hr;
3197   _finger       = hr->bottom();
3198   update_region_limit();
3199 }
3200 
3201 void CMTask::update_region_limit() {
3202   HeapRegion* hr            = _curr_region;
3203   HeapWord* bottom          = hr->bottom();
3204   HeapWord* limit           = hr->next_top_at_mark_start();
3205 
3206   if (limit == bottom) {
3207     if (_cm->verbose_low()) {
3208       gclog_or_tty->print_cr("[%u] found an empty region "
3209                              "["PTR_FORMAT", "PTR_FORMAT")",
3210                              _worker_id, p2i(bottom), p2i(limit));
3211     }
3212     // The region was collected underneath our feet.
3213     // We set the finger to bottom to ensure that the bitmap
3214     // iteration that will follow this will not do anything.
3215     // (this is not a condition that holds when we set the region up,
3216     // as the region is not supposed to be empty in the first place)
3217     _finger = bottom;
3218   } else if (limit >= _region_limit) {
3219     assert(limit >= _finger, "peace of mind");
3220   } else {
3221     assert(limit < _region_limit, "only way to get here");
3222     // This can happen under some pretty unusual circumstances.  An
3223     // evacuation pause empties the region underneath our feet (NTAMS
3224     // at bottom). We then do some allocation in the region (NTAMS
3225     // stays at bottom), followed by the region being used as a GC
3226     // alloc region (NTAMS will move to top() and the objects
3227     // originally below it will be grayed). All objects now marked in
3228     // the region are explicitly grayed, if below the global finger,
3229     // and we do not need in fact to scan anything else. So, we simply
3230     // set _finger to be limit to ensure that the bitmap iteration
3231     // doesn't do anything.
3232     _finger = limit;
3233   }
3234 
3235   _region_limit = limit;
3236 }
3237 
3238 void CMTask::giveup_current_region() {
3239   assert(_curr_region != NULL, "invariant");
3240   if (_cm->verbose_low()) {
3241     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3242                            _worker_id, p2i(_curr_region));
3243   }
3244   clear_region_fields();
3245 }
3246 
3247 void CMTask::clear_region_fields() {
3248   // Values for these three fields that indicate that we're not
3249   // holding on to a region.
3250   _curr_region   = NULL;
3251   _finger        = NULL;
3252   _region_limit  = NULL;
3253 }
3254 
3255 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3256   if (cm_oop_closure == NULL) {
3257     assert(_cm_oop_closure != NULL, "invariant");
3258   } else {
3259     assert(_cm_oop_closure == NULL, "invariant");
3260   }
3261   _cm_oop_closure = cm_oop_closure;
3262 }
3263 
3264 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3265   guarantee(nextMarkBitMap != NULL, "invariant");
3266 
3267   if (_cm->verbose_low()) {
3268     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3269   }
3270 
3271   _nextMarkBitMap                = nextMarkBitMap;
3272   clear_region_fields();
3273 
3274   _calls                         = 0;
3275   _elapsed_time_ms               = 0.0;
3276   _termination_time_ms           = 0.0;
3277   _termination_start_time_ms     = 0.0;
3278 
3279 #if _MARKING_STATS_
3280   _aborted                       = 0;
3281   _aborted_overflow              = 0;
3282   _aborted_cm_aborted            = 0;
3283   _aborted_yield                 = 0;
3284   _aborted_timed_out             = 0;
3285   _aborted_satb                  = 0;
3286   _aborted_termination           = 0;
3287   _steal_attempts                = 0;
3288   _steals                        = 0;
3289   _local_pushes                  = 0;
3290   _local_pops                    = 0;
3291   _local_max_size                = 0;
3292   _objs_scanned                  = 0;
3293   _global_pushes                 = 0;
3294   _global_pops                   = 0;
3295   _global_max_size               = 0;
3296   _global_transfers_to           = 0;
3297   _global_transfers_from         = 0;
3298   _regions_claimed               = 0;
3299   _objs_found_on_bitmap          = 0;
3300   _satb_buffers_processed        = 0;
3301 #endif // _MARKING_STATS_
3302 }
3303 
3304 bool CMTask::should_exit_termination() {
3305   regular_clock_call();
3306   // This is called when we are in the termination protocol. We should
3307   // quit if, for some reason, this task wants to abort or the global
3308   // stack is not empty (this means that we can get work from it).
3309   return !_cm->mark_stack_empty() || has_aborted();
3310 }
3311 
3312 void CMTask::reached_limit() {
3313   assert(_words_scanned >= _words_scanned_limit ||
3314          _refs_reached >= _refs_reached_limit ,
3315          "shouldn't have been called otherwise");
3316   regular_clock_call();
3317 }
3318 
3319 void CMTask::regular_clock_call() {
3320   if (has_aborted()) return;
3321 
3322   // First, we need to recalculate the words scanned and refs reached
3323   // limits for the next clock call.
3324   recalculate_limits();
3325 
3326   // During the regular clock call we do the following
3327 
3328   // (1) If an overflow has been flagged, then we abort.
3329   if (_cm->has_overflown()) {
3330     set_has_aborted();
3331     return;
3332   }
3333 
3334   // If we are not concurrent (i.e. we're doing remark) we don't need
3335   // to check anything else. The other steps are only needed during
3336   // the concurrent marking phase.
3337   if (!concurrent()) return;
3338 
3339   // (2) If marking has been aborted for Full GC, then we also abort.
3340   if (_cm->has_aborted()) {
3341     set_has_aborted();
3342     statsOnly( ++_aborted_cm_aborted );
3343     return;
3344   }
3345 
3346   double curr_time_ms = os::elapsedVTime() * 1000.0;
3347 
3348   // (3) If marking stats are enabled, then we update the step history.
3349 #if _MARKING_STATS_
3350   if (_words_scanned >= _words_scanned_limit) {
3351     ++_clock_due_to_scanning;
3352   }
3353   if (_refs_reached >= _refs_reached_limit) {
3354     ++_clock_due_to_marking;
3355   }
3356 
3357   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3358   _interval_start_time_ms = curr_time_ms;
3359   _all_clock_intervals_ms.add(last_interval_ms);
3360 
3361   if (_cm->verbose_medium()) {
3362       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3363                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3364                         _worker_id, last_interval_ms,
3365                         _words_scanned,
3366                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3367                         _refs_reached,
3368                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3369   }
3370 #endif // _MARKING_STATS_
3371 
3372   // (4) We check whether we should yield. If we have to, then we abort.
3373   if (SuspendibleThreadSet::should_yield()) {
3374     // We should yield. To do this we abort the task. The caller is
3375     // responsible for yielding.
3376     set_has_aborted();
3377     statsOnly( ++_aborted_yield );
3378     return;
3379   }
3380 
3381   // (5) We check whether we've reached our time quota. If we have,
3382   // then we abort.
3383   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3384   if (elapsed_time_ms > _time_target_ms) {
3385     set_has_aborted();
3386     _has_timed_out = true;
3387     statsOnly( ++_aborted_timed_out );
3388     return;
3389   }
3390 
3391   // (6) Finally, we check whether there are enough completed STAB
3392   // buffers available for processing. If there are, we abort.
3393   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3394   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3395     if (_cm->verbose_low()) {
3396       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3397                              _worker_id);
3398     }
3399     // we do need to process SATB buffers, we'll abort and restart
3400     // the marking task to do so
3401     set_has_aborted();
3402     statsOnly( ++_aborted_satb );
3403     return;
3404   }
3405 }
3406 
3407 void CMTask::recalculate_limits() {
3408   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3409   _words_scanned_limit      = _real_words_scanned_limit;
3410 
3411   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3412   _refs_reached_limit       = _real_refs_reached_limit;
3413 }
3414 
3415 void CMTask::decrease_limits() {
3416   // This is called when we believe that we're going to do an infrequent
3417   // operation which will increase the per byte scanned cost (i.e. move
3418   // entries to/from the global stack). It basically tries to decrease the
3419   // scanning limit so that the clock is called earlier.
3420 
3421   if (_cm->verbose_medium()) {
3422     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3423   }
3424 
3425   _words_scanned_limit = _real_words_scanned_limit -
3426     3 * words_scanned_period / 4;
3427   _refs_reached_limit  = _real_refs_reached_limit -
3428     3 * refs_reached_period / 4;
3429 }
3430 
3431 void CMTask::move_entries_to_global_stack() {
3432   // local array where we'll store the entries that will be popped
3433   // from the local queue
3434   oop buffer[global_stack_transfer_size];
3435 
3436   int n = 0;
3437   oop obj;
3438   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3439     buffer[n] = obj;
3440     ++n;
3441   }
3442 
3443   if (n > 0) {
3444     // we popped at least one entry from the local queue
3445 
3446     statsOnly( ++_global_transfers_to; _local_pops += n );
3447 
3448     if (!_cm->mark_stack_push(buffer, n)) {
3449       if (_cm->verbose_low()) {
3450         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3451                                _worker_id);
3452       }
3453       set_has_aborted();
3454     } else {
3455       // the transfer was successful
3456 
3457       if (_cm->verbose_medium()) {
3458         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3459                                _worker_id, n);
3460       }
3461       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3462                  if (tmp_size > _global_max_size) {
3463                    _global_max_size = tmp_size;
3464                  }
3465                  _global_pushes += n );
3466     }
3467   }
3468 
3469   // this operation was quite expensive, so decrease the limits
3470   decrease_limits();
3471 }
3472 
3473 void CMTask::get_entries_from_global_stack() {
3474   // local array where we'll store the entries that will be popped
3475   // from the global stack.
3476   oop buffer[global_stack_transfer_size];
3477   int n;
3478   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3479   assert(n <= global_stack_transfer_size,
3480          "we should not pop more than the given limit");
3481   if (n > 0) {
3482     // yes, we did actually pop at least one entry
3483 
3484     statsOnly( ++_global_transfers_from; _global_pops += n );
3485     if (_cm->verbose_medium()) {
3486       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3487                              _worker_id, n);
3488     }
3489     for (int i = 0; i < n; ++i) {
3490       bool success = _task_queue->push(buffer[i]);
3491       // We only call this when the local queue is empty or under a
3492       // given target limit. So, we do not expect this push to fail.
3493       assert(success, "invariant");
3494     }
3495 
3496     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3497                if (tmp_size > _local_max_size) {
3498                  _local_max_size = tmp_size;
3499                }
3500                _local_pushes += n );
3501   }
3502 
3503   // this operation was quite expensive, so decrease the limits
3504   decrease_limits();
3505 }
3506 
3507 void CMTask::drain_local_queue(bool partially) {
3508   if (has_aborted()) return;
3509 
3510   // Decide what the target size is, depending whether we're going to
3511   // drain it partially (so that other tasks can steal if they run out
3512   // of things to do) or totally (at the very end).
3513   size_t target_size;
3514   if (partially) {
3515     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3516   } else {
3517     target_size = 0;
3518   }
3519 
3520   if (_task_queue->size() > target_size) {
3521     if (_cm->verbose_high()) {
3522       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3523                              _worker_id, target_size);
3524     }
3525 
3526     oop obj;
3527     bool ret = _task_queue->pop_local(obj);
3528     while (ret) {
3529       statsOnly( ++_local_pops );
3530 
3531       if (_cm->verbose_high()) {
3532         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3533                                p2i((void*) obj));
3534       }
3535 
3536       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3537       assert(!_g1h->is_on_master_free_list(
3538                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3539 
3540       scan_object(obj);
3541 
3542       if (_task_queue->size() <= target_size || has_aborted()) {
3543         ret = false;
3544       } else {
3545         ret = _task_queue->pop_local(obj);
3546       }
3547     }
3548 
3549     if (_cm->verbose_high()) {
3550       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3551                              _worker_id, _task_queue->size());
3552     }
3553   }
3554 }
3555 
3556 void CMTask::drain_global_stack(bool partially) {
3557   if (has_aborted()) return;
3558 
3559   // We have a policy to drain the local queue before we attempt to
3560   // drain the global stack.
3561   assert(partially || _task_queue->size() == 0, "invariant");
3562 
3563   // Decide what the target size is, depending whether we're going to
3564   // drain it partially (so that other tasks can steal if they run out
3565   // of things to do) or totally (at the very end).  Notice that,
3566   // because we move entries from the global stack in chunks or
3567   // because another task might be doing the same, we might in fact
3568   // drop below the target. But, this is not a problem.
3569   size_t target_size;
3570   if (partially) {
3571     target_size = _cm->partial_mark_stack_size_target();
3572   } else {
3573     target_size = 0;
3574   }
3575 
3576   if (_cm->mark_stack_size() > target_size) {
3577     if (_cm->verbose_low()) {
3578       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3579                              _worker_id, target_size);
3580     }
3581 
3582     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3583       get_entries_from_global_stack();
3584       drain_local_queue(partially);
3585     }
3586 
3587     if (_cm->verbose_low()) {
3588       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3589                              _worker_id, _cm->mark_stack_size());
3590     }
3591   }
3592 }
3593 
3594 // SATB Queue has several assumptions on whether to call the par or
3595 // non-par versions of the methods. this is why some of the code is
3596 // replicated. We should really get rid of the single-threaded version
3597 // of the code to simplify things.
3598 void CMTask::drain_satb_buffers() {
3599   if (has_aborted()) return;
3600 
3601   // We set this so that the regular clock knows that we're in the
3602   // middle of draining buffers and doesn't set the abort flag when it
3603   // notices that SATB buffers are available for draining. It'd be
3604   // very counter productive if it did that. :-)
3605   _draining_satb_buffers = true;
3606 
3607   CMSATBBufferClosure satb_cl(this, _g1h);
3608   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3609 
3610   // This keeps claiming and applying the closure to completed buffers
3611   // until we run out of buffers or we need to abort.
3612   while (!has_aborted() &&
3613          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3614     if (_cm->verbose_medium()) {
3615       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3616     }
3617     statsOnly( ++_satb_buffers_processed );
3618     regular_clock_call();
3619   }
3620 
3621   _draining_satb_buffers = false;
3622 
3623   assert(has_aborted() ||
3624          concurrent() ||
3625          satb_mq_set.completed_buffers_num() == 0, "invariant");
3626 
3627   // again, this was a potentially expensive operation, decrease the
3628   // limits to get the regular clock call early
3629   decrease_limits();
3630 }
3631 
3632 void CMTask::print_stats() {
3633   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3634                          _worker_id, _calls);
3635   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3636                          _elapsed_time_ms, _termination_time_ms);
3637   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3638                          _step_times_ms.num(), _step_times_ms.avg(),
3639                          _step_times_ms.sd());
3640   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3641                          _step_times_ms.maximum(), _step_times_ms.sum());
3642 
3643 #if _MARKING_STATS_
3644   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3645                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3646                          _all_clock_intervals_ms.sd());
3647   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3648                          _all_clock_intervals_ms.maximum(),
3649                          _all_clock_intervals_ms.sum());
3650   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3651                          _clock_due_to_scanning, _clock_due_to_marking);
3652   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3653                          _objs_scanned, _objs_found_on_bitmap);
3654   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3655                          _local_pushes, _local_pops, _local_max_size);
3656   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3657                          _global_pushes, _global_pops, _global_max_size);
3658   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3659                          _global_transfers_to,_global_transfers_from);
3660   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3661   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3662   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3663                          _steal_attempts, _steals);
3664   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3665   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3666                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3667   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3668                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3669 #endif // _MARKING_STATS_
3670 }
3671 
3672 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3673   return _task_queues->steal(worker_id, hash_seed, obj);
3674 }
3675 
3676 /*****************************************************************************
3677 
3678     The do_marking_step(time_target_ms, ...) method is the building
3679     block of the parallel marking framework. It can be called in parallel
3680     with other invocations of do_marking_step() on different tasks
3681     (but only one per task, obviously) and concurrently with the
3682     mutator threads, or during remark, hence it eliminates the need
3683     for two versions of the code. When called during remark, it will
3684     pick up from where the task left off during the concurrent marking
3685     phase. Interestingly, tasks are also claimable during evacuation
3686     pauses too, since do_marking_step() ensures that it aborts before
3687     it needs to yield.
3688 
3689     The data structures that it uses to do marking work are the
3690     following:
3691 
3692       (1) Marking Bitmap. If there are gray objects that appear only
3693       on the bitmap (this happens either when dealing with an overflow
3694       or when the initial marking phase has simply marked the roots
3695       and didn't push them on the stack), then tasks claim heap
3696       regions whose bitmap they then scan to find gray objects. A
3697       global finger indicates where the end of the last claimed region
3698       is. A local finger indicates how far into the region a task has
3699       scanned. The two fingers are used to determine how to gray an
3700       object (i.e. whether simply marking it is OK, as it will be
3701       visited by a task in the future, or whether it needs to be also
3702       pushed on a stack).
3703 
3704       (2) Local Queue. The local queue of the task which is accessed
3705       reasonably efficiently by the task. Other tasks can steal from
3706       it when they run out of work. Throughout the marking phase, a
3707       task attempts to keep its local queue short but not totally
3708       empty, so that entries are available for stealing by other
3709       tasks. Only when there is no more work, a task will totally
3710       drain its local queue.
3711 
3712       (3) Global Mark Stack. This handles local queue overflow. During
3713       marking only sets of entries are moved between it and the local
3714       queues, as access to it requires a mutex and more fine-grain
3715       interaction with it which might cause contention. If it
3716       overflows, then the marking phase should restart and iterate
3717       over the bitmap to identify gray objects. Throughout the marking
3718       phase, tasks attempt to keep the global mark stack at a small
3719       length but not totally empty, so that entries are available for
3720       popping by other tasks. Only when there is no more work, tasks
3721       will totally drain the global mark stack.
3722 
3723       (4) SATB Buffer Queue. This is where completed SATB buffers are
3724       made available. Buffers are regularly removed from this queue
3725       and scanned for roots, so that the queue doesn't get too
3726       long. During remark, all completed buffers are processed, as
3727       well as the filled in parts of any uncompleted buffers.
3728 
3729     The do_marking_step() method tries to abort when the time target
3730     has been reached. There are a few other cases when the
3731     do_marking_step() method also aborts:
3732 
3733       (1) When the marking phase has been aborted (after a Full GC).
3734 
3735       (2) When a global overflow (on the global stack) has been
3736       triggered. Before the task aborts, it will actually sync up with
3737       the other tasks to ensure that all the marking data structures
3738       (local queues, stacks, fingers etc.)  are re-initialized so that
3739       when do_marking_step() completes, the marking phase can
3740       immediately restart.
3741 
3742       (3) When enough completed SATB buffers are available. The
3743       do_marking_step() method only tries to drain SATB buffers right
3744       at the beginning. So, if enough buffers are available, the
3745       marking step aborts and the SATB buffers are processed at
3746       the beginning of the next invocation.
3747 
3748       (4) To yield. when we have to yield then we abort and yield
3749       right at the end of do_marking_step(). This saves us from a lot
3750       of hassle as, by yielding we might allow a Full GC. If this
3751       happens then objects will be compacted underneath our feet, the
3752       heap might shrink, etc. We save checking for this by just
3753       aborting and doing the yield right at the end.
3754 
3755     From the above it follows that the do_marking_step() method should
3756     be called in a loop (or, otherwise, regularly) until it completes.
3757 
3758     If a marking step completes without its has_aborted() flag being
3759     true, it means it has completed the current marking phase (and
3760     also all other marking tasks have done so and have all synced up).
3761 
3762     A method called regular_clock_call() is invoked "regularly" (in
3763     sub ms intervals) throughout marking. It is this clock method that
3764     checks all the abort conditions which were mentioned above and
3765     decides when the task should abort. A work-based scheme is used to
3766     trigger this clock method: when the number of object words the
3767     marking phase has scanned or the number of references the marking
3768     phase has visited reach a given limit. Additional invocations to
3769     the method clock have been planted in a few other strategic places
3770     too. The initial reason for the clock method was to avoid calling
3771     vtime too regularly, as it is quite expensive. So, once it was in
3772     place, it was natural to piggy-back all the other conditions on it
3773     too and not constantly check them throughout the code.
3774 
3775     If do_termination is true then do_marking_step will enter its
3776     termination protocol.
3777 
3778     The value of is_serial must be true when do_marking_step is being
3779     called serially (i.e. by the VMThread) and do_marking_step should
3780     skip any synchronization in the termination and overflow code.
3781     Examples include the serial remark code and the serial reference
3782     processing closures.
3783 
3784     The value of is_serial must be false when do_marking_step is
3785     being called by any of the worker threads in a work gang.
3786     Examples include the concurrent marking code (CMMarkingTask),
3787     the MT remark code, and the MT reference processing closures.
3788 
3789  *****************************************************************************/
3790 
3791 void CMTask::do_marking_step(double time_target_ms,
3792                              bool do_termination,
3793                              bool is_serial) {
3794   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3795   assert(concurrent() == _cm->concurrent(), "they should be the same");
3796 
3797   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3798   assert(_task_queues != NULL, "invariant");
3799   assert(_task_queue != NULL, "invariant");
3800   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3801 
3802   assert(!_claimed,
3803          "only one thread should claim this task at any one time");
3804 
3805   // OK, this doesn't safeguard again all possible scenarios, as it is
3806   // possible for two threads to set the _claimed flag at the same
3807   // time. But it is only for debugging purposes anyway and it will
3808   // catch most problems.
3809   _claimed = true;
3810 
3811   _start_time_ms = os::elapsedVTime() * 1000.0;
3812   statsOnly( _interval_start_time_ms = _start_time_ms );
3813 
3814   // If do_stealing is true then do_marking_step will attempt to
3815   // steal work from the other CMTasks. It only makes sense to
3816   // enable stealing when the termination protocol is enabled
3817   // and do_marking_step() is not being called serially.
3818   bool do_stealing = do_termination && !is_serial;
3819 
3820   double diff_prediction_ms =
3821     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3822   _time_target_ms = time_target_ms - diff_prediction_ms;
3823 
3824   // set up the variables that are used in the work-based scheme to
3825   // call the regular clock method
3826   _words_scanned = 0;
3827   _refs_reached  = 0;
3828   recalculate_limits();
3829 
3830   // clear all flags
3831   clear_has_aborted();
3832   _has_timed_out = false;
3833   _draining_satb_buffers = false;
3834 
3835   ++_calls;
3836 
3837   if (_cm->verbose_low()) {
3838     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3839                            "target = %1.2lfms >>>>>>>>>>",
3840                            _worker_id, _calls, _time_target_ms);
3841   }
3842 
3843   // Set up the bitmap and oop closures. Anything that uses them is
3844   // eventually called from this method, so it is OK to allocate these
3845   // statically.
3846   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3847   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3848   set_cm_oop_closure(&cm_oop_closure);
3849 
3850   if (_cm->has_overflown()) {
3851     // This can happen if the mark stack overflows during a GC pause
3852     // and this task, after a yield point, restarts. We have to abort
3853     // as we need to get into the overflow protocol which happens
3854     // right at the end of this task.
3855     set_has_aborted();
3856   }
3857 
3858   // First drain any available SATB buffers. After this, we will not
3859   // look at SATB buffers before the next invocation of this method.
3860   // If enough completed SATB buffers are queued up, the regular clock
3861   // will abort this task so that it restarts.
3862   drain_satb_buffers();
3863   // ...then partially drain the local queue and the global stack
3864   drain_local_queue(true);
3865   drain_global_stack(true);
3866 
3867   do {
3868     if (!has_aborted() && _curr_region != NULL) {
3869       // This means that we're already holding on to a region.
3870       assert(_finger != NULL, "if region is not NULL, then the finger "
3871              "should not be NULL either");
3872 
3873       // We might have restarted this task after an evacuation pause
3874       // which might have evacuated the region we're holding on to
3875       // underneath our feet. Let's read its limit again to make sure
3876       // that we do not iterate over a region of the heap that
3877       // contains garbage (update_region_limit() will also move
3878       // _finger to the start of the region if it is found empty).
3879       update_region_limit();
3880       // We will start from _finger not from the start of the region,
3881       // as we might be restarting this task after aborting half-way
3882       // through scanning this region. In this case, _finger points to
3883       // the address where we last found a marked object. If this is a
3884       // fresh region, _finger points to start().
3885       MemRegion mr = MemRegion(_finger, _region_limit);
3886 
3887       if (_cm->verbose_low()) {
3888         gclog_or_tty->print_cr("[%u] we're scanning part "
3889                                "["PTR_FORMAT", "PTR_FORMAT") "
3890                                "of region "HR_FORMAT,
3891                                _worker_id, p2i(_finger), p2i(_region_limit),
3892                                HR_FORMAT_PARAMS(_curr_region));
3893       }
3894 
3895       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3896              "humongous regions should go around loop once only");
3897 
3898       // Some special cases:
3899       // If the memory region is empty, we can just give up the region.
3900       // If the current region is humongous then we only need to check
3901       // the bitmap for the bit associated with the start of the object,
3902       // scan the object if it's live, and give up the region.
3903       // Otherwise, let's iterate over the bitmap of the part of the region
3904       // that is left.
3905       // If the iteration is successful, give up the region.
3906       if (mr.is_empty()) {
3907         giveup_current_region();
3908         regular_clock_call();
3909       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3910         if (_nextMarkBitMap->isMarked(mr.start())) {
3911           // The object is marked - apply the closure
3912           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3913           bitmap_closure.do_bit(offset);
3914         }
3915         // Even if this task aborted while scanning the humongous object
3916         // we can (and should) give up the current region.
3917         giveup_current_region();
3918         regular_clock_call();
3919       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3920         giveup_current_region();
3921         regular_clock_call();
3922       } else {
3923         assert(has_aborted(), "currently the only way to do so");
3924         // The only way to abort the bitmap iteration is to return
3925         // false from the do_bit() method. However, inside the
3926         // do_bit() method we move the _finger to point to the
3927         // object currently being looked at. So, if we bail out, we
3928         // have definitely set _finger to something non-null.
3929         assert(_finger != NULL, "invariant");
3930 
3931         // Region iteration was actually aborted. So now _finger
3932         // points to the address of the object we last scanned. If we
3933         // leave it there, when we restart this task, we will rescan
3934         // the object. It is easy to avoid this. We move the finger by
3935         // enough to point to the next possible object header (the
3936         // bitmap knows by how much we need to move it as it knows its
3937         // granularity).
3938         assert(_finger < _region_limit, "invariant");
3939         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3940         // Check if bitmap iteration was aborted while scanning the last object
3941         if (new_finger >= _region_limit) {
3942           giveup_current_region();
3943         } else {
3944           move_finger_to(new_finger);
3945         }
3946       }
3947     }
3948     // At this point we have either completed iterating over the
3949     // region we were holding on to, or we have aborted.
3950 
3951     // We then partially drain the local queue and the global stack.
3952     // (Do we really need this?)
3953     drain_local_queue(true);
3954     drain_global_stack(true);
3955 
3956     // Read the note on the claim_region() method on why it might
3957     // return NULL with potentially more regions available for
3958     // claiming and why we have to check out_of_regions() to determine
3959     // whether we're done or not.
3960     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3961       // We are going to try to claim a new region. We should have
3962       // given up on the previous one.
3963       // Separated the asserts so that we know which one fires.
3964       assert(_curr_region  == NULL, "invariant");
3965       assert(_finger       == NULL, "invariant");
3966       assert(_region_limit == NULL, "invariant");
3967       if (_cm->verbose_low()) {
3968         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3969       }
3970       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3971       if (claimed_region != NULL) {
3972         // Yes, we managed to claim one
3973         statsOnly( ++_regions_claimed );
3974 
3975         if (_cm->verbose_low()) {
3976           gclog_or_tty->print_cr("[%u] we successfully claimed "
3977                                  "region "PTR_FORMAT,
3978                                  _worker_id, p2i(claimed_region));
3979         }
3980 
3981         setup_for_region(claimed_region);
3982         assert(_curr_region == claimed_region, "invariant");
3983       }
3984       // It is important to call the regular clock here. It might take
3985       // a while to claim a region if, for example, we hit a large
3986       // block of empty regions. So we need to call the regular clock
3987       // method once round the loop to make sure it's called
3988       // frequently enough.
3989       regular_clock_call();
3990     }
3991 
3992     if (!has_aborted() && _curr_region == NULL) {
3993       assert(_cm->out_of_regions(),
3994              "at this point we should be out of regions");
3995     }
3996   } while ( _curr_region != NULL && !has_aborted());
3997 
3998   if (!has_aborted()) {
3999     // We cannot check whether the global stack is empty, since other
4000     // tasks might be pushing objects to it concurrently.
4001     assert(_cm->out_of_regions(),
4002            "at this point we should be out of regions");
4003 
4004     if (_cm->verbose_low()) {
4005       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4006     }
4007 
4008     // Try to reduce the number of available SATB buffers so that
4009     // remark has less work to do.
4010     drain_satb_buffers();
4011   }
4012 
4013   // Since we've done everything else, we can now totally drain the
4014   // local queue and global stack.
4015   drain_local_queue(false);
4016   drain_global_stack(false);
4017 
4018   // Attempt at work stealing from other task's queues.
4019   if (do_stealing && !has_aborted()) {
4020     // We have not aborted. This means that we have finished all that
4021     // we could. Let's try to do some stealing...
4022 
4023     // We cannot check whether the global stack is empty, since other
4024     // tasks might be pushing objects to it concurrently.
4025     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4026            "only way to reach here");
4027 
4028     if (_cm->verbose_low()) {
4029       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4030     }
4031 
4032     while (!has_aborted()) {
4033       oop obj;
4034       statsOnly( ++_steal_attempts );
4035 
4036       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4037         if (_cm->verbose_medium()) {
4038           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4039                                  _worker_id, p2i((void*) obj));
4040         }
4041 
4042         statsOnly( ++_steals );
4043 
4044         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4045                "any stolen object should be marked");
4046         scan_object(obj);
4047 
4048         // And since we're towards the end, let's totally drain the
4049         // local queue and global stack.
4050         drain_local_queue(false);
4051         drain_global_stack(false);
4052       } else {
4053         break;
4054       }
4055     }
4056   }
4057 
4058   // If we are about to wrap up and go into termination, check if we
4059   // should raise the overflow flag.
4060   if (do_termination && !has_aborted()) {
4061     if (_cm->force_overflow()->should_force()) {
4062       _cm->set_has_overflown();
4063       regular_clock_call();
4064     }
4065   }
4066 
4067   // We still haven't aborted. Now, let's try to get into the
4068   // termination protocol.
4069   if (do_termination && !has_aborted()) {
4070     // We cannot check whether the global stack is empty, since other
4071     // tasks might be concurrently pushing objects on it.
4072     // Separated the asserts so that we know which one fires.
4073     assert(_cm->out_of_regions(), "only way to reach here");
4074     assert(_task_queue->size() == 0, "only way to reach here");
4075 
4076     if (_cm->verbose_low()) {
4077       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4078     }
4079 
4080     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4081 
4082     // The CMTask class also extends the TerminatorTerminator class,
4083     // hence its should_exit_termination() method will also decide
4084     // whether to exit the termination protocol or not.
4085     bool finished = (is_serial ||
4086                      _cm->terminator()->offer_termination(this));
4087     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4088     _termination_time_ms +=
4089       termination_end_time_ms - _termination_start_time_ms;
4090 
4091     if (finished) {
4092       // We're all done.
4093 
4094       if (_worker_id == 0) {
4095         // let's allow task 0 to do this
4096         if (concurrent()) {
4097           assert(_cm->concurrent_marking_in_progress(), "invariant");
4098           // we need to set this to false before the next
4099           // safepoint. This way we ensure that the marking phase
4100           // doesn't observe any more heap expansions.
4101           _cm->clear_concurrent_marking_in_progress();
4102         }
4103       }
4104 
4105       // We can now guarantee that the global stack is empty, since
4106       // all other tasks have finished. We separated the guarantees so
4107       // that, if a condition is false, we can immediately find out
4108       // which one.
4109       guarantee(_cm->out_of_regions(), "only way to reach here");
4110       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4111       guarantee(_task_queue->size() == 0, "only way to reach here");
4112       guarantee(!_cm->has_overflown(), "only way to reach here");
4113       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4114 
4115       if (_cm->verbose_low()) {
4116         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4117       }
4118     } else {
4119       // Apparently there's more work to do. Let's abort this task. It
4120       // will restart it and we can hopefully find more things to do.
4121 
4122       if (_cm->verbose_low()) {
4123         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4124                                _worker_id);
4125       }
4126 
4127       set_has_aborted();
4128       statsOnly( ++_aborted_termination );
4129     }
4130   }
4131 
4132   // Mainly for debugging purposes to make sure that a pointer to the
4133   // closure which was statically allocated in this frame doesn't
4134   // escape it by accident.
4135   set_cm_oop_closure(NULL);
4136   double end_time_ms = os::elapsedVTime() * 1000.0;
4137   double elapsed_time_ms = end_time_ms - _start_time_ms;
4138   // Update the step history.
4139   _step_times_ms.add(elapsed_time_ms);
4140 
4141   if (has_aborted()) {
4142     // The task was aborted for some reason.
4143 
4144     statsOnly( ++_aborted );
4145 
4146     if (_has_timed_out) {
4147       double diff_ms = elapsed_time_ms - _time_target_ms;
4148       // Keep statistics of how well we did with respect to hitting
4149       // our target only if we actually timed out (if we aborted for
4150       // other reasons, then the results might get skewed).
4151       _marking_step_diffs_ms.add(diff_ms);
4152     }
4153 
4154     if (_cm->has_overflown()) {
4155       // This is the interesting one. We aborted because a global
4156       // overflow was raised. This means we have to restart the
4157       // marking phase and start iterating over regions. However, in
4158       // order to do this we have to make sure that all tasks stop
4159       // what they are doing and re-initialize in a safe manner. We
4160       // will achieve this with the use of two barrier sync points.
4161 
4162       if (_cm->verbose_low()) {
4163         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4164       }
4165 
4166       if (!is_serial) {
4167         // We only need to enter the sync barrier if being called
4168         // from a parallel context
4169         _cm->enter_first_sync_barrier(_worker_id);
4170 
4171         // When we exit this sync barrier we know that all tasks have
4172         // stopped doing marking work. So, it's now safe to
4173         // re-initialize our data structures. At the end of this method,
4174         // task 0 will clear the global data structures.
4175       }
4176 
4177       statsOnly( ++_aborted_overflow );
4178 
4179       // We clear the local state of this task...
4180       clear_region_fields();
4181 
4182       if (!is_serial) {
4183         // ...and enter the second barrier.
4184         _cm->enter_second_sync_barrier(_worker_id);
4185       }
4186       // At this point, if we're during the concurrent phase of
4187       // marking, everything has been re-initialized and we're
4188       // ready to restart.
4189     }
4190 
4191     if (_cm->verbose_low()) {
4192       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4193                              "elapsed = %1.2lfms <<<<<<<<<<",
4194                              _worker_id, _time_target_ms, elapsed_time_ms);
4195       if (_cm->has_aborted()) {
4196         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4197                                _worker_id);
4198       }
4199     }
4200   } else {
4201     if (_cm->verbose_low()) {
4202       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4203                              "elapsed = %1.2lfms <<<<<<<<<<",
4204                              _worker_id, _time_target_ms, elapsed_time_ms);
4205     }
4206   }
4207 
4208   _claimed = false;
4209 }
4210 
4211 CMTask::CMTask(uint worker_id,
4212                ConcurrentMark* cm,
4213                size_t* marked_bytes,
4214                BitMap* card_bm,
4215                CMTaskQueue* task_queue,
4216                CMTaskQueueSet* task_queues)
4217   : _g1h(G1CollectedHeap::heap()),
4218     _worker_id(worker_id), _cm(cm),
4219     _claimed(false),
4220     _nextMarkBitMap(NULL), _hash_seed(17),
4221     _task_queue(task_queue),
4222     _task_queues(task_queues),
4223     _cm_oop_closure(NULL),
4224     _marked_bytes_array(marked_bytes),
4225     _card_bm(card_bm) {
4226   guarantee(task_queue != NULL, "invariant");
4227   guarantee(task_queues != NULL, "invariant");
4228 
4229   statsOnly( _clock_due_to_scanning = 0;
4230              _clock_due_to_marking  = 0 );
4231 
4232   _marking_step_diffs_ms.add(0.5);
4233 }
4234 
4235 // These are formatting macros that are used below to ensure
4236 // consistent formatting. The *_H_* versions are used to format the
4237 // header for a particular value and they should be kept consistent
4238 // with the corresponding macro. Also note that most of the macros add
4239 // the necessary white space (as a prefix) which makes them a bit
4240 // easier to compose.
4241 
4242 // All the output lines are prefixed with this string to be able to
4243 // identify them easily in a large log file.
4244 #define G1PPRL_LINE_PREFIX            "###"
4245 
4246 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4247 #ifdef _LP64
4248 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4249 #else // _LP64
4250 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4251 #endif // _LP64
4252 
4253 // For per-region info
4254 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4255 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4256 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4257 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4258 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4259 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4260 
4261 // For summary info
4262 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4263 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4264 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4265 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4266 
4267 G1PrintRegionLivenessInfoClosure::
4268 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4269   : _out(out),
4270     _total_used_bytes(0), _total_capacity_bytes(0),
4271     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4272     _hum_used_bytes(0), _hum_capacity_bytes(0),
4273     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4274     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4275   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4276   MemRegion g1_reserved = g1h->g1_reserved();
4277   double now = os::elapsedTime();
4278 
4279   // Print the header of the output.
4280   _out->cr();
4281   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4282   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4283                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4284                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4285                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4286                  HeapRegion::GrainBytes);
4287   _out->print_cr(G1PPRL_LINE_PREFIX);
4288   _out->print_cr(G1PPRL_LINE_PREFIX
4289                 G1PPRL_TYPE_H_FORMAT
4290                 G1PPRL_ADDR_BASE_H_FORMAT
4291                 G1PPRL_BYTE_H_FORMAT
4292                 G1PPRL_BYTE_H_FORMAT
4293                 G1PPRL_BYTE_H_FORMAT
4294                 G1PPRL_DOUBLE_H_FORMAT
4295                 G1PPRL_BYTE_H_FORMAT
4296                 G1PPRL_BYTE_H_FORMAT,
4297                 "type", "address-range",
4298                 "used", "prev-live", "next-live", "gc-eff",
4299                 "remset", "code-roots");
4300   _out->print_cr(G1PPRL_LINE_PREFIX
4301                 G1PPRL_TYPE_H_FORMAT
4302                 G1PPRL_ADDR_BASE_H_FORMAT
4303                 G1PPRL_BYTE_H_FORMAT
4304                 G1PPRL_BYTE_H_FORMAT
4305                 G1PPRL_BYTE_H_FORMAT
4306                 G1PPRL_DOUBLE_H_FORMAT
4307                 G1PPRL_BYTE_H_FORMAT
4308                 G1PPRL_BYTE_H_FORMAT,
4309                 "", "",
4310                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4311                 "(bytes)", "(bytes)");
4312 }
4313 
4314 // It takes as a parameter a reference to one of the _hum_* fields, it
4315 // deduces the corresponding value for a region in a humongous region
4316 // series (either the region size, or what's left if the _hum_* field
4317 // is < the region size), and updates the _hum_* field accordingly.
4318 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4319   size_t bytes = 0;
4320   // The > 0 check is to deal with the prev and next live bytes which
4321   // could be 0.
4322   if (*hum_bytes > 0) {
4323     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4324     *hum_bytes -= bytes;
4325   }
4326   return bytes;
4327 }
4328 
4329 // It deduces the values for a region in a humongous region series
4330 // from the _hum_* fields and updates those accordingly. It assumes
4331 // that that _hum_* fields have already been set up from the "starts
4332 // humongous" region and we visit the regions in address order.
4333 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4334                                                      size_t* capacity_bytes,
4335                                                      size_t* prev_live_bytes,
4336                                                      size_t* next_live_bytes) {
4337   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4338   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4339   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4340   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4341   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4342 }
4343 
4344 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4345   const char* type       = r->get_type_str();
4346   HeapWord* bottom       = r->bottom();
4347   HeapWord* end          = r->end();
4348   size_t capacity_bytes  = r->capacity();
4349   size_t used_bytes      = r->used();
4350   size_t prev_live_bytes = r->live_bytes();
4351   size_t next_live_bytes = r->next_live_bytes();
4352   double gc_eff          = r->gc_efficiency();
4353   size_t remset_bytes    = r->rem_set()->mem_size();
4354   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4355 
4356   if (r->is_starts_humongous()) {
4357     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4358            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4359            "they should have been zeroed after the last time we used them");
4360     // Set up the _hum_* fields.
4361     _hum_capacity_bytes  = capacity_bytes;
4362     _hum_used_bytes      = used_bytes;
4363     _hum_prev_live_bytes = prev_live_bytes;
4364     _hum_next_live_bytes = next_live_bytes;
4365     get_hum_bytes(&used_bytes, &capacity_bytes,
4366                   &prev_live_bytes, &next_live_bytes);
4367     end = bottom + HeapRegion::GrainWords;
4368   } else if (r->is_continues_humongous()) {
4369     get_hum_bytes(&used_bytes, &capacity_bytes,
4370                   &prev_live_bytes, &next_live_bytes);
4371     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4372   }
4373 
4374   _total_used_bytes      += used_bytes;
4375   _total_capacity_bytes  += capacity_bytes;
4376   _total_prev_live_bytes += prev_live_bytes;
4377   _total_next_live_bytes += next_live_bytes;
4378   _total_remset_bytes    += remset_bytes;
4379   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4380 
4381   // Print a line for this particular region.
4382   _out->print_cr(G1PPRL_LINE_PREFIX
4383                  G1PPRL_TYPE_FORMAT
4384                  G1PPRL_ADDR_BASE_FORMAT
4385                  G1PPRL_BYTE_FORMAT
4386                  G1PPRL_BYTE_FORMAT
4387                  G1PPRL_BYTE_FORMAT
4388                  G1PPRL_DOUBLE_FORMAT
4389                  G1PPRL_BYTE_FORMAT
4390                  G1PPRL_BYTE_FORMAT,
4391                  type, p2i(bottom), p2i(end),
4392                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4393                  remset_bytes, strong_code_roots_bytes);
4394 
4395   return false;
4396 }
4397 
4398 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4399   // add static memory usages to remembered set sizes
4400   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4401   // Print the footer of the output.
4402   _out->print_cr(G1PPRL_LINE_PREFIX);
4403   _out->print_cr(G1PPRL_LINE_PREFIX
4404                  " SUMMARY"
4405                  G1PPRL_SUM_MB_FORMAT("capacity")
4406                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4407                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4408                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4409                  G1PPRL_SUM_MB_FORMAT("remset")
4410                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4411                  bytes_to_mb(_total_capacity_bytes),
4412                  bytes_to_mb(_total_used_bytes),
4413                  perc(_total_used_bytes, _total_capacity_bytes),
4414                  bytes_to_mb(_total_prev_live_bytes),
4415                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4416                  bytes_to_mb(_total_next_live_bytes),
4417                  perc(_total_next_live_bytes, _total_capacity_bytes),
4418                  bytes_to_mb(_total_remset_bytes),
4419                  bytes_to_mb(_total_strong_code_roots_bytes));
4420   _out->cr();
4421 }