New src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1ErgoVerbose.hpp"
  34 #include "gc/g1/g1Log.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1RemSet.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionManager.inline.hpp"
  40 #include "gc/g1/heapRegionRemSet.hpp"
  41 #include "gc/g1/heapRegionSet.inline.hpp"
  42 #include "gc/g1/suspendibleThreadSet.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 CMBitMapRO::CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   if (limit == NULL) {
  76     limit = _bmStartWord + _bmWordSize;
  77   }
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  88                                                  const HeapWord* limit) const {
  89   size_t addrOffset = heapWordToOffset(addr);
  90   if (limit == NULL) {
  91     limit = _bmStartWord + _bmWordSize;
  92   }
  93   size_t limitOffset = heapWordToOffset(limit);
  94   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  95   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  96   assert(nextAddr >= addr, "get_next_one postcondition");
  97   assert(nextAddr == limit || !isMarked(nextAddr),
  98          "get_next_one postcondition");
  99   return nextAddr;
 100 }
 101 
 102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 103   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 104   return (int) (diff >> _shifter);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 113          _bmWordSize  == heap_rs.word_size();
 114 }
 115 #endif
 116 
 117 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 118   _bm.print_on_error(st, prefix);
 119 }
 120 
 121 size_t CMBitMap::compute_size(size_t heap_size) {
 122   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 123 }
 124 
 125 size_t CMBitMap::mark_distance() {
 126   return MinObjAlignmentInBytes * BitsPerByte;
 127 }
 128 
 129 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 130   _bmStartWord = heap.start();
 131   _bmWordSize = heap.word_size();
 132 
 133   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 134   _bm.set_size(_bmWordSize >> _shifter);
 135 
 136   storage->set_mapping_changed_listener(&_listener);
 137 }
 138 
 139 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 140   if (zero_filled) {
 141     return;
 142   }
 143   // We need to clear the bitmap on commit, removing any existing information.
 144   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 145   _bm->clearRange(mr);
 146 }
 147 
 148 // Closure used for clearing the given mark bitmap.
 149 class ClearBitmapHRClosure : public HeapRegionClosure {
 150  private:
 151   ConcurrentMark* _cm;
 152   CMBitMap* _bitmap;
 153   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 154  public:
 155   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 156     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 157   }
 158 
 159   virtual bool doHeapRegion(HeapRegion* r) {
 160     size_t const chunk_size_in_words = M / HeapWordSize;
 161 
 162     HeapWord* cur = r->bottom();
 163     HeapWord* const end = r->end();
 164 
 165     while (cur < end) {
 166       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 167       _bitmap->clearRange(mr);
 168 
 169       cur += chunk_size_in_words;
 170 
 171       // Abort iteration if after yielding the marking has been aborted.
 172       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 173         return true;
 174       }
 175       // Repeat the asserts from before the start of the closure. We will do them
 176       // as asserts here to minimize their overhead on the product. However, we
 177       // will have them as guarantees at the beginning / end of the bitmap
 178       // clearing to get some checking in the product.
 179       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 180       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 181     }
 182 
 183     return false;
 184   }
 185 };
 186 
 187 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 188   ClearBitmapHRClosure* _cl;
 189   HeapRegionClaimer     _hrclaimer;
 190   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 191 
 192 public:
 193   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 194       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 195 
 196   void work(uint worker_id) {
 197     SuspendibleThreadSetJoiner sts_join(_suspendible);
 198     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 199   }
 200 };
 201 
 202 void CMBitMap::clearAll() {
 203   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 204   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 205   uint n_workers = g1h->workers()->active_workers();
 206   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 207   g1h->workers()->run_task(&task);
 208   guarantee(cl.complete(), "Must have completed iteration.");
 209   return;
 210 }
 211 
 212 void CMBitMap::markRange(MemRegion mr) {
 213   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 214   assert(!mr.is_empty(), "unexpected empty region");
 215   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 216           ((HeapWord *) mr.end())),
 217          "markRange memory region end is not card aligned");
 218   // convert address range into offset range
 219   _bm.at_put_range(heapWordToOffset(mr.start()),
 220                    heapWordToOffset(mr.end()), true);
 221 }
 222 
 223 void CMBitMap::clearRange(MemRegion mr) {
 224   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 225   assert(!mr.is_empty(), "unexpected empty region");
 226   // convert address range into offset range
 227   _bm.at_put_range(heapWordToOffset(mr.start()),
 228                    heapWordToOffset(mr.end()), false);
 229 }
 230 
 231 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 232                                             HeapWord* end_addr) {
 233   HeapWord* start = getNextMarkedWordAddress(addr);
 234   start = MIN2(start, end_addr);
 235   HeapWord* end   = getNextUnmarkedWordAddress(start);
 236   end = MIN2(end, end_addr);
 237   assert(start <= end, "Consistency check");
 238   MemRegion mr(start, end);
 239   if (!mr.is_empty()) {
 240     clearRange(mr);
 241   }
 242   return mr;
 243 }
 244 
 245 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 246   _base(NULL), _cm(cm)
 247 #ifdef ASSERT
 248   , _drain_in_progress(false)
 249   , _drain_in_progress_yields(false)
 250 #endif
 251 {}
 252 
 253 bool CMMarkStack::allocate(size_t capacity) {
 254   // allocate a stack of the requisite depth
 255   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 256   if (!rs.is_reserved()) {
 257     warning("ConcurrentMark MarkStack allocation failure");
 258     return false;
 259   }
 260   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 261   if (!_virtual_space.initialize(rs, rs.size())) {
 262     warning("ConcurrentMark MarkStack backing store failure");
 263     // Release the virtual memory reserved for the marking stack
 264     rs.release();
 265     return false;
 266   }
 267   assert(_virtual_space.committed_size() == rs.size(),
 268          "Didn't reserve backing store for all of ConcurrentMark stack?");
 269   _base = (oop*) _virtual_space.low();
 270   setEmpty();
 271   _capacity = (jint) capacity;
 272   _saved_index = -1;
 273   _should_expand = false;
 274   return true;
 275 }
 276 
 277 void CMMarkStack::expand() {
 278   // Called, during remark, if we've overflown the marking stack during marking.
 279   assert(isEmpty(), "stack should been emptied while handling overflow");
 280   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 281   // Clear expansion flag
 282   _should_expand = false;
 283   if (_capacity == (jint) MarkStackSizeMax) {
 284     if (PrintGCDetails && Verbose) {
 285       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 286     }
 287     return;
 288   }
 289   // Double capacity if possible
 290   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 291   // Do not give up existing stack until we have managed to
 292   // get the double capacity that we desired.
 293   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 294                                                            sizeof(oop)));
 295   if (rs.is_reserved()) {
 296     // Release the backing store associated with old stack
 297     _virtual_space.release();
 298     // Reinitialize virtual space for new stack
 299     if (!_virtual_space.initialize(rs, rs.size())) {
 300       fatal("Not enough swap for expanded marking stack capacity");
 301     }
 302     _base = (oop*)(_virtual_space.low());
 303     _index = 0;
 304     _capacity = new_capacity;
 305   } else {
 306     if (PrintGCDetails && Verbose) {
 307       // Failed to double capacity, continue;
 308       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 309                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 310                           _capacity / K, new_capacity / K);
 311     }
 312   }
 313 }
 314 
 315 void CMMarkStack::set_should_expand() {
 316   // If we're resetting the marking state because of an
 317   // marking stack overflow, record that we should, if
 318   // possible, expand the stack.
 319   _should_expand = _cm->has_overflown();
 320 }
 321 
 322 CMMarkStack::~CMMarkStack() {
 323   if (_base != NULL) {
 324     _base = NULL;
 325     _virtual_space.release();
 326   }
 327 }
 328 
 329 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 330   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 331   jint start = _index;
 332   jint next_index = start + n;
 333   if (next_index > _capacity) {
 334     _overflow = true;
 335     return;
 336   }
 337   // Otherwise.
 338   _index = next_index;
 339   for (int i = 0; i < n; i++) {
 340     int ind = start + i;
 341     assert(ind < _capacity, "By overflow test above.");
 342     _base[ind] = ptr_arr[i];
 343   }
 344 }
 345 
 346 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 347   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 348   jint index = _index;
 349   if (index == 0) {
 350     *n = 0;
 351     return false;
 352   } else {
 353     int k = MIN2(max, index);
 354     jint  new_ind = index - k;
 355     for (int j = 0; j < k; j++) {
 356       ptr_arr[j] = _base[new_ind + j];
 357     }
 358     _index = new_ind;
 359     *n = k;
 360     return true;
 361   }
 362 }
 363 
 364 template<class OopClosureClass>
 365 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 366   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 367          || SafepointSynchronize::is_at_safepoint(),
 368          "Drain recursion must be yield-safe.");
 369   bool res = true;
 370   debug_only(_drain_in_progress = true);
 371   debug_only(_drain_in_progress_yields = yield_after);
 372   while (!isEmpty()) {
 373     oop newOop = pop();
 374     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 375     assert(newOop->is_oop(), "Expected an oop");
 376     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 377            "only grey objects on this stack");
 378     newOop->oop_iterate(cl);
 379     if (yield_after && _cm->do_yield_check()) {
 380       res = false;
 381       break;
 382     }
 383   }
 384   debug_only(_drain_in_progress = false);
 385   return res;
 386 }
 387 
 388 void CMMarkStack::note_start_of_gc() {
 389   assert(_saved_index == -1,
 390          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 391   _saved_index = _index;
 392 }
 393 
 394 void CMMarkStack::note_end_of_gc() {
 395   // This is intentionally a guarantee, instead of an assert. If we
 396   // accidentally add something to the mark stack during GC, it
 397   // will be a correctness issue so it's better if we crash. we'll
 398   // only check this once per GC anyway, so it won't be a performance
 399   // issue in any way.
 400   guarantee(_saved_index == _index,
 401             err_msg("saved index: %d index: %d", _saved_index, _index));
 402   _saved_index = -1;
 403 }
 404 
 405 void CMMarkStack::oops_do(OopClosure* f) {
 406   assert(_saved_index == _index,
 407          err_msg("saved index: %d index: %d", _saved_index, _index));
 408   for (int i = 0; i < _index; i += 1) {
 409     f->do_oop(&_base[i]);
 410   }
 411 }
 412 
 413 CMRootRegions::CMRootRegions() :
 414   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 415   _should_abort(false),  _next_survivor(NULL) { }
 416 
 417 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 418   _young_list = g1h->young_list();
 419   _cm = cm;
 420 }
 421 
 422 void CMRootRegions::prepare_for_scan() {
 423   assert(!scan_in_progress(), "pre-condition");
 424 
 425   // Currently, only survivors can be root regions.
 426   assert(_next_survivor == NULL, "pre-condition");
 427   _next_survivor = _young_list->first_survivor_region();
 428   _scan_in_progress = (_next_survivor != NULL);
 429   _should_abort = false;
 430 }
 431 
 432 HeapRegion* CMRootRegions::claim_next() {
 433   if (_should_abort) {
 434     // If someone has set the should_abort flag, we return NULL to
 435     // force the caller to bail out of their loop.
 436     return NULL;
 437   }
 438 
 439   // Currently, only survivors can be root regions.
 440   HeapRegion* res = _next_survivor;
 441   if (res != NULL) {
 442     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 443     // Read it again in case it changed while we were waiting for the lock.
 444     res = _next_survivor;
 445     if (res != NULL) {
 446       if (res == _young_list->last_survivor_region()) {
 447         // We just claimed the last survivor so store NULL to indicate
 448         // that we're done.
 449         _next_survivor = NULL;
 450       } else {
 451         _next_survivor = res->get_next_young_region();
 452       }
 453     } else {
 454       // Someone else claimed the last survivor while we were trying
 455       // to take the lock so nothing else to do.
 456     }
 457   }
 458   assert(res == NULL || res->is_survivor(), "post-condition");
 459 
 460   return res;
 461 }
 462 
 463 void CMRootRegions::scan_finished() {
 464   assert(scan_in_progress(), "pre-condition");
 465 
 466   // Currently, only survivors can be root regions.
 467   if (!_should_abort) {
 468     assert(_next_survivor == NULL, "we should have claimed all survivors");
 469   }
 470   _next_survivor = NULL;
 471 
 472   {
 473     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 474     _scan_in_progress = false;
 475     RootRegionScan_lock->notify_all();
 476   }
 477 }
 478 
 479 bool CMRootRegions::wait_until_scan_finished() {
 480   if (!scan_in_progress()) return false;
 481 
 482   {
 483     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 484     while (scan_in_progress()) {
 485       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 486     }
 487   }
 488   return true;
 489 }
 490 
 491 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 492 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 493 #endif // _MSC_VER
 494 
 495 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 496   return MAX2((n_par_threads + 2) / 4, 1U);
 497 }
 498 
 499 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 500   _g1h(g1h),
 501   _markBitMap1(),
 502   _markBitMap2(),
 503   _parallel_marking_threads(0),
 504   _max_parallel_marking_threads(0),
 505   _sleep_factor(0.0),
 506   _marking_task_overhead(1.0),
 507   _cleanup_sleep_factor(0.0),
 508   _cleanup_task_overhead(1.0),
 509   _cleanup_list("Cleanup List"),
 510   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 511   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 512             CardTableModRefBS::card_shift,
 513             false /* in_resource_area*/),
 514 
 515   _prevMarkBitMap(&_markBitMap1),
 516   _nextMarkBitMap(&_markBitMap2),
 517 
 518   _markStack(this),
 519   // _finger set in set_non_marking_state
 520 
 521   _max_worker_id((uint)ParallelGCThreads),
 522   // _active_tasks set in set_non_marking_state
 523   // _tasks set inside the constructor
 524   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 525   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 526 
 527   _has_overflown(false),
 528   _concurrent(false),
 529   _has_aborted(false),
 530   _aborted_gc_id(GCId::undefined()),
 531   _restart_for_overflow(false),
 532   _concurrent_marking_in_progress(false),
 533 
 534   // _verbose_level set below
 535 
 536   _init_times(),
 537   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 538   _cleanup_times(),
 539   _total_counting_time(0.0),
 540   _total_rs_scrub_time(0.0),
 541 
 542   _parallel_workers(NULL),
 543 
 544   _count_card_bitmaps(NULL),
 545   _count_marked_bytes(NULL),
 546   _completed_initialization(false) {
 547   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 548   if (verbose_level < no_verbose) {
 549     verbose_level = no_verbose;
 550   }
 551   if (verbose_level > high_verbose) {
 552     verbose_level = high_verbose;
 553   }
 554   _verbose_level = verbose_level;
 555 
 556   if (verbose_low()) {
 557     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 558                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 559   }
 560 
 561   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 562   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 563 
 564   // Create & start a ConcurrentMark thread.
 565   _cmThread = new ConcurrentMarkThread(this);
 566   assert(cmThread() != NULL, "CM Thread should have been created");
 567   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 568   if (_cmThread->osthread() == NULL) {
 569       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 570   }
 571 
 572   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 573   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 574   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 575 
 576   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 577   satb_qs.set_buffer_size(G1SATBBufferSize);
 578 
 579   _root_regions.init(_g1h, this);
 580 
 581   if (ConcGCThreads > ParallelGCThreads) {
 582     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 583             "than ParallelGCThreads (" UINTX_FORMAT ").",
 584             ConcGCThreads, ParallelGCThreads);
 585     return;
 586   }
 587   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 588     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 589     // if both are set
 590     _sleep_factor             = 0.0;
 591     _marking_task_overhead    = 1.0;
 592   } else if (G1MarkingOverheadPercent > 0) {
 593     // We will calculate the number of parallel marking threads based
 594     // on a target overhead with respect to the soft real-time goal
 595     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 596     double overall_cm_overhead =
 597       (double) MaxGCPauseMillis * marking_overhead /
 598       (double) GCPauseIntervalMillis;
 599     double cpu_ratio = 1.0 / (double) os::processor_count();
 600     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 601     double marking_task_overhead =
 602       overall_cm_overhead / marking_thread_num *
 603                                               (double) os::processor_count();
 604     double sleep_factor =
 605                        (1.0 - marking_task_overhead) / marking_task_overhead;
 606 
 607     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 608     _sleep_factor             = sleep_factor;
 609     _marking_task_overhead    = marking_task_overhead;
 610   } else {
 611     // Calculate the number of parallel marking threads by scaling
 612     // the number of parallel GC threads.
 613     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 614     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 615     _sleep_factor             = 0.0;
 616     _marking_task_overhead    = 1.0;
 617   }
 618 
 619   assert(ConcGCThreads > 0, "Should have been set");
 620   _parallel_marking_threads = (uint) ConcGCThreads;
 621   _max_parallel_marking_threads = _parallel_marking_threads;
 622 
 623   if (parallel_marking_threads() > 1) {
 624     _cleanup_task_overhead = 1.0;
 625   } else {
 626     _cleanup_task_overhead = marking_task_overhead();
 627   }
 628   _cleanup_sleep_factor =
 629                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 630 
 631 #if 0
 632   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 633   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 634   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 635   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 636   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 637 #endif
 638 
 639   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 640        _max_parallel_marking_threads, false, true);
 641   if (_parallel_workers == NULL) {
 642     vm_exit_during_initialization("Failed necessary allocation.");
 643   } else {
 644     _parallel_workers->initialize_workers();
 645   }
 646 
 647   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 648     size_t mark_stack_size =
 649       MIN2(MarkStackSizeMax,
 650           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 651     // Verify that the calculated value for MarkStackSize is in range.
 652     // It would be nice to use the private utility routine from Arguments.
 653     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 654       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 655               "must be between 1 and " SIZE_FORMAT,
 656               mark_stack_size, MarkStackSizeMax);
 657       return;
 658     }
 659     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 660   } else {
 661     // Verify MarkStackSize is in range.
 662     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 663       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 664         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 665           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 666                   "must be between 1 and " SIZE_FORMAT,
 667                   MarkStackSize, MarkStackSizeMax);
 668           return;
 669         }
 670       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 671         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 672           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 673                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 674                   MarkStackSize, MarkStackSizeMax);
 675           return;
 676         }
 677       }
 678     }
 679   }
 680 
 681   if (!_markStack.allocate(MarkStackSize)) {
 682     warning("Failed to allocate CM marking stack");
 683     return;
 684   }
 685 
 686   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 687   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 688 
 689   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 690   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 691 
 692   BitMap::idx_t card_bm_size = _card_bm.size();
 693 
 694   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 695   _active_tasks = _max_worker_id;
 696 
 697   uint max_regions = _g1h->max_regions();
 698   for (uint i = 0; i < _max_worker_id; ++i) {
 699     CMTaskQueue* task_queue = new CMTaskQueue();
 700     task_queue->initialize();
 701     _task_queues->register_queue(i, task_queue);
 702 
 703     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 704     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 705 
 706     _tasks[i] = new CMTask(i, this,
 707                            _count_marked_bytes[i],
 708                            &_count_card_bitmaps[i],
 709                            task_queue, _task_queues);
 710 
 711     _accum_task_vtime[i] = 0.0;
 712   }
 713 
 714   // Calculate the card number for the bottom of the heap. Used
 715   // in biasing indexes into the accounting card bitmaps.
 716   _heap_bottom_card_num =
 717     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 718                                 CardTableModRefBS::card_shift);
 719 
 720   // Clear all the liveness counting data
 721   clear_all_count_data();
 722 
 723   // so that the call below can read a sensible value
 724   _heap_start = g1h->reserved_region().start();
 725   set_non_marking_state();
 726   _completed_initialization = true;
 727 }
 728 
 729 void ConcurrentMark::reset() {
 730   // Starting values for these two. This should be called in a STW
 731   // phase.
 732   MemRegion reserved = _g1h->g1_reserved();
 733   _heap_start = reserved.start();
 734   _heap_end   = reserved.end();
 735 
 736   // Separated the asserts so that we know which one fires.
 737   assert(_heap_start != NULL, "heap bounds should look ok");
 738   assert(_heap_end != NULL, "heap bounds should look ok");
 739   assert(_heap_start < _heap_end, "heap bounds should look ok");
 740 
 741   // Reset all the marking data structures and any necessary flags
 742   reset_marking_state();
 743 
 744   if (verbose_low()) {
 745     gclog_or_tty->print_cr("[global] resetting");
 746   }
 747 
 748   // We do reset all of them, since different phases will use
 749   // different number of active threads. So, it's easiest to have all
 750   // of them ready.
 751   for (uint i = 0; i < _max_worker_id; ++i) {
 752     _tasks[i]->reset(_nextMarkBitMap);
 753   }
 754 
 755   // we need this to make sure that the flag is on during the evac
 756   // pause with initial mark piggy-backed
 757   set_concurrent_marking_in_progress();
 758 }
 759 
 760 
 761 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 762   _markStack.set_should_expand();
 763   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 764   if (clear_overflow) {
 765     clear_has_overflown();
 766   } else {
 767     assert(has_overflown(), "pre-condition");
 768   }
 769   _finger = _heap_start;
 770 
 771   for (uint i = 0; i < _max_worker_id; ++i) {
 772     CMTaskQueue* queue = _task_queues->queue(i);
 773     queue->set_empty();
 774   }
 775 }
 776 
 777 void ConcurrentMark::set_concurrency(uint active_tasks) {
 778   assert(active_tasks <= _max_worker_id, "we should not have more");
 779 
 780   _active_tasks = active_tasks;
 781   // Need to update the three data structures below according to the
 782   // number of active threads for this phase.
 783   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 784   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 785   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 786 }
 787 
 788 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 789   set_concurrency(active_tasks);
 790 
 791   _concurrent = concurrent;
 792   // We propagate this to all tasks, not just the active ones.
 793   for (uint i = 0; i < _max_worker_id; ++i)
 794     _tasks[i]->set_concurrent(concurrent);
 795 
 796   if (concurrent) {
 797     set_concurrent_marking_in_progress();
 798   } else {
 799     // We currently assume that the concurrent flag has been set to
 800     // false before we start remark. At this point we should also be
 801     // in a STW phase.
 802     assert(!concurrent_marking_in_progress(), "invariant");
 803     assert(out_of_regions(),
 804            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 805                    p2i(_finger), p2i(_heap_end)));
 806   }
 807 }
 808 
 809 void ConcurrentMark::set_non_marking_state() {
 810   // We set the global marking state to some default values when we're
 811   // not doing marking.
 812   reset_marking_state();
 813   _active_tasks = 0;
 814   clear_concurrent_marking_in_progress();
 815 }
 816 
 817 ConcurrentMark::~ConcurrentMark() {
 818   // The ConcurrentMark instance is never freed.
 819   ShouldNotReachHere();
 820 }
 821 
 822 void ConcurrentMark::clearNextBitmap() {
 823   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 824 
 825   // Make sure that the concurrent mark thread looks to still be in
 826   // the current cycle.
 827   guarantee(cmThread()->during_cycle(), "invariant");
 828 
 829   // We are finishing up the current cycle by clearing the next
 830   // marking bitmap and getting it ready for the next cycle. During
 831   // this time no other cycle can start. So, let's make sure that this
 832   // is the case.
 833   guarantee(!g1h->mark_in_progress(), "invariant");
 834 
 835   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 836   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 837   _parallel_workers->run_task(&task);
 838 
 839   // Clear the liveness counting data. If the marking has been aborted, the abort()
 840   // call already did that.
 841   if (cl.complete()) {
 842     clear_all_count_data();
 843   }
 844 
 845   // Repeat the asserts from above.
 846   guarantee(cmThread()->during_cycle(), "invariant");
 847   guarantee(!g1h->mark_in_progress(), "invariant");
 848 }
 849 
 850 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 851   CMBitMap* _bitmap;
 852   bool _error;
 853  public:
 854   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 855   }
 856 
 857   virtual bool doHeapRegion(HeapRegion* r) {
 858     // This closure can be called concurrently to the mutator, so we must make sure
 859     // that the result of the getNextMarkedWordAddress() call is compared to the
 860     // value passed to it as limit to detect any found bits.
 861     // We can use the region's orig_end() for the limit and the comparison value
 862     // as it always contains the "real" end of the region that never changes and
 863     // has no side effects.
 864     // Due to the latter, there can also be no problem with the compiler generating
 865     // reloads of the orig_end() call.
 866     HeapWord* end = r->orig_end();
 867     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 868   }
 869 };
 870 
 871 bool ConcurrentMark::nextMarkBitmapIsClear() {
 872   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 873   _g1h->heap_region_iterate(&cl);
 874   return cl.complete();
 875 }
 876 
 877 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 878 public:
 879   bool doHeapRegion(HeapRegion* r) {
 880     if (!r->is_continues_humongous()) {
 881       r->note_start_of_marking();
 882     }
 883     return false;
 884   }
 885 };
 886 
 887 void ConcurrentMark::checkpointRootsInitialPre() {
 888   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 889   G1CollectorPolicy* g1p = g1h->g1_policy();
 890 
 891   _has_aborted = false;
 892 
 893   // Initialize marking structures. This has to be done in a STW phase.
 894   reset();
 895 
 896   // For each region note start of marking.
 897   NoteStartOfMarkHRClosure startcl;
 898   g1h->heap_region_iterate(&startcl);
 899 }
 900 
 901 
 902 void ConcurrentMark::checkpointRootsInitialPost() {
 903   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 904 
 905   // If we force an overflow during remark, the remark operation will
 906   // actually abort and we'll restart concurrent marking. If we always
 907   // force an overflow during remark we'll never actually complete the
 908   // marking phase. So, we initialize this here, at the start of the
 909   // cycle, so that at the remaining overflow number will decrease at
 910   // every remark and we'll eventually not need to cause one.
 911   force_overflow_stw()->init();
 912 
 913   // Start Concurrent Marking weak-reference discovery.
 914   ReferenceProcessor* rp = g1h->ref_processor_cm();
 915   // enable ("weak") refs discovery
 916   rp->enable_discovery();
 917   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 918 
 919   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 920   // This is the start of  the marking cycle, we're expected all
 921   // threads to have SATB queues with active set to false.
 922   satb_mq_set.set_active_all_threads(true, /* new active value */
 923                                      false /* expected_active */);
 924 
 925   _root_regions.prepare_for_scan();
 926 
 927   // update_g1_committed() will be called at the end of an evac pause
 928   // when marking is on. So, it's also called at the end of the
 929   // initial-mark pause to update the heap end, if the heap expands
 930   // during it. No need to call it here.
 931 }
 932 
 933 /*
 934  * Notice that in the next two methods, we actually leave the STS
 935  * during the barrier sync and join it immediately afterwards. If we
 936  * do not do this, the following deadlock can occur: one thread could
 937  * be in the barrier sync code, waiting for the other thread to also
 938  * sync up, whereas another one could be trying to yield, while also
 939  * waiting for the other threads to sync up too.
 940  *
 941  * Note, however, that this code is also used during remark and in
 942  * this case we should not attempt to leave / enter the STS, otherwise
 943  * we'll either hit an assert (debug / fastdebug) or deadlock
 944  * (product). So we should only leave / enter the STS if we are
 945  * operating concurrently.
 946  *
 947  * Because the thread that does the sync barrier has left the STS, it
 948  * is possible to be suspended for a Full GC or an evacuation pause
 949  * could occur. This is actually safe, since the entering the sync
 950  * barrier is one of the last things do_marking_step() does, and it
 951  * doesn't manipulate any data structures afterwards.
 952  */
 953 
 954 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 955   bool barrier_aborted;
 956 
 957   if (verbose_low()) {
 958     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 959   }
 960 
 961   {
 962     SuspendibleThreadSetLeaver sts_leave(concurrent());
 963     barrier_aborted = !_first_overflow_barrier_sync.enter();
 964   }
 965 
 966   // at this point everyone should have synced up and not be doing any
 967   // more work
 968 
 969   if (verbose_low()) {
 970     if (barrier_aborted) {
 971       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 972     } else {
 973       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 974     }
 975   }
 976 
 977   if (barrier_aborted) {
 978     // If the barrier aborted we ignore the overflow condition and
 979     // just abort the whole marking phase as quickly as possible.
 980     return;
 981   }
 982 
 983   // If we're executing the concurrent phase of marking, reset the marking
 984   // state; otherwise the marking state is reset after reference processing,
 985   // during the remark pause.
 986   // If we reset here as a result of an overflow during the remark we will
 987   // see assertion failures from any subsequent set_concurrency_and_phase()
 988   // calls.
 989   if (concurrent()) {
 990     // let the task associated with with worker 0 do this
 991     if (worker_id == 0) {
 992       // task 0 is responsible for clearing the global data structures
 993       // We should be here because of an overflow. During STW we should
 994       // not clear the overflow flag since we rely on it being true when
 995       // we exit this method to abort the pause and restart concurrent
 996       // marking.
 997       reset_marking_state(true /* clear_overflow */);
 998       force_overflow()->update();
 999 
1000       if (G1Log::fine()) {
1001         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1002         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1003       }
1004     }
1005   }
1006 
1007   // after this, each task should reset its own data structures then
1008   // then go into the second barrier
1009 }
1010 
1011 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1012   bool barrier_aborted;
1013 
1014   if (verbose_low()) {
1015     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1016   }
1017 
1018   {
1019     SuspendibleThreadSetLeaver sts_leave(concurrent());
1020     barrier_aborted = !_second_overflow_barrier_sync.enter();
1021   }
1022 
1023   // at this point everything should be re-initialized and ready to go
1024 
1025   if (verbose_low()) {
1026     if (barrier_aborted) {
1027       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1028     } else {
1029       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1030     }
1031   }
1032 }
1033 
1034 #ifndef PRODUCT
1035 void ForceOverflowSettings::init() {
1036   _num_remaining = G1ConcMarkForceOverflow;
1037   _force = false;
1038   update();
1039 }
1040 
1041 void ForceOverflowSettings::update() {
1042   if (_num_remaining > 0) {
1043     _num_remaining -= 1;
1044     _force = true;
1045   } else {
1046     _force = false;
1047   }
1048 }
1049 
1050 bool ForceOverflowSettings::should_force() {
1051   if (_force) {
1052     _force = false;
1053     return true;
1054   } else {
1055     return false;
1056   }
1057 }
1058 #endif // !PRODUCT
1059 
1060 class CMConcurrentMarkingTask: public AbstractGangTask {
1061 private:
1062   ConcurrentMark*       _cm;
1063   ConcurrentMarkThread* _cmt;
1064 
1065 public:
1066   void work(uint worker_id) {
1067     assert(Thread::current()->is_ConcurrentGC_thread(),
1068            "this should only be done by a conc GC thread");
1069     ResourceMark rm;
1070 
1071     double start_vtime = os::elapsedVTime();
1072 
1073     {
1074       SuspendibleThreadSetJoiner sts_join;
1075 
1076       assert(worker_id < _cm->active_tasks(), "invariant");
1077       CMTask* the_task = _cm->task(worker_id);
1078       the_task->record_start_time();
1079       if (!_cm->has_aborted()) {
1080         do {
1081           double start_vtime_sec = os::elapsedVTime();
1082           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1083 
1084           the_task->do_marking_step(mark_step_duration_ms,
1085                                     true  /* do_termination */,
1086                                     false /* is_serial*/);
1087 
1088           double end_vtime_sec = os::elapsedVTime();
1089           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1090           _cm->clear_has_overflown();
1091 
1092           _cm->do_yield_check(worker_id);
1093 
1094           jlong sleep_time_ms;
1095           if (!_cm->has_aborted() && the_task->has_aborted()) {
1096             sleep_time_ms =
1097               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1098             {
1099               SuspendibleThreadSetLeaver sts_leave;
1100               os::sleep(Thread::current(), sleep_time_ms, false);
1101             }
1102           }
1103         } while (!_cm->has_aborted() && the_task->has_aborted());
1104       }
1105       the_task->record_end_time();
1106       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1107     }
1108 
1109     double end_vtime = os::elapsedVTime();
1110     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1111   }
1112 
1113   CMConcurrentMarkingTask(ConcurrentMark* cm,
1114                           ConcurrentMarkThread* cmt) :
1115       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1116 
1117   ~CMConcurrentMarkingTask() { }
1118 };
1119 
1120 // Calculates the number of active workers for a concurrent
1121 // phase.
1122 uint ConcurrentMark::calc_parallel_marking_threads() {
1123   uint n_conc_workers = 0;
1124   if (!UseDynamicNumberOfGCThreads ||
1125       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1126        !ForceDynamicNumberOfGCThreads)) {
1127     n_conc_workers = max_parallel_marking_threads();
1128   } else {
1129     n_conc_workers =
1130       AdaptiveSizePolicy::calc_default_active_workers(
1131                                    max_parallel_marking_threads(),
1132                                    1, /* Minimum workers */
1133                                    parallel_marking_threads(),
1134                                    Threads::number_of_non_daemon_threads());
1135     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1136     // that scaling has already gone into "_max_parallel_marking_threads".
1137   }
1138   assert(n_conc_workers > 0, "Always need at least 1");
1139   return n_conc_workers;
1140 }
1141 
1142 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1143   // Currently, only survivors can be root regions.
1144   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1145   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1146 
1147   const uintx interval = PrefetchScanIntervalInBytes;
1148   HeapWord* curr = hr->bottom();
1149   const HeapWord* end = hr->top();
1150   while (curr < end) {
1151     Prefetch::read(curr, interval);
1152     oop obj = oop(curr);
1153     int size = obj->oop_iterate(&cl);
1154     assert(size == obj->size(), "sanity");
1155     curr += size;
1156   }
1157 }
1158 
1159 class CMRootRegionScanTask : public AbstractGangTask {
1160 private:
1161   ConcurrentMark* _cm;
1162 
1163 public:
1164   CMRootRegionScanTask(ConcurrentMark* cm) :
1165     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1166 
1167   void work(uint worker_id) {
1168     assert(Thread::current()->is_ConcurrentGC_thread(),
1169            "this should only be done by a conc GC thread");
1170 
1171     CMRootRegions* root_regions = _cm->root_regions();
1172     HeapRegion* hr = root_regions->claim_next();
1173     while (hr != NULL) {
1174       _cm->scanRootRegion(hr, worker_id);
1175       hr = root_regions->claim_next();
1176     }
1177   }
1178 };
1179 
1180 void ConcurrentMark::scanRootRegions() {
1181   // Start of concurrent marking.
1182   ClassLoaderDataGraph::clear_claimed_marks();
1183 
1184   // scan_in_progress() will have been set to true only if there was
1185   // at least one root region to scan. So, if it's false, we
1186   // should not attempt to do any further work.
1187   if (root_regions()->scan_in_progress()) {
1188     _parallel_marking_threads = calc_parallel_marking_threads();
1189     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1190            "Maximum number of marking threads exceeded");
1191     uint active_workers = MAX2(1U, parallel_marking_threads());
1192 
1193     CMRootRegionScanTask task(this);
1194     _parallel_workers->set_active_workers(active_workers);
1195     _parallel_workers->run_task(&task);
1196 
1197     // It's possible that has_aborted() is true here without actually
1198     // aborting the survivor scan earlier. This is OK as it's
1199     // mainly used for sanity checking.
1200     root_regions()->scan_finished();
1201   }
1202 }
1203 
1204 void ConcurrentMark::markFromRoots() {
1205   // we might be tempted to assert that:
1206   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1207   //        "inconsistent argument?");
1208   // However that wouldn't be right, because it's possible that
1209   // a safepoint is indeed in progress as a younger generation
1210   // stop-the-world GC happens even as we mark in this generation.
1211 
1212   _restart_for_overflow = false;
1213   force_overflow_conc()->init();
1214 
1215   // _g1h has _n_par_threads
1216   _parallel_marking_threads = calc_parallel_marking_threads();
1217   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1218     "Maximum number of marking threads exceeded");
1219 
1220   uint active_workers = MAX2(1U, parallel_marking_threads());
1221   assert(active_workers > 0, "Should have been set");
1222 
1223   // Parallel task terminator is set in "set_concurrency_and_phase()"
1224   set_concurrency_and_phase(active_workers, true /* concurrent */);
1225 
1226   CMConcurrentMarkingTask markingTask(this, cmThread());
1227   _parallel_workers->set_active_workers(active_workers);
1228   _parallel_workers->run_task(&markingTask);
1229   print_stats();
1230 }
1231 
1232 // Helper class to get rid of some boilerplate code.
1233 class G1CMTraceTime : public GCTraceTime {
1234   static bool doit_and_prepend(bool doit) {
1235     if (doit) {
1236       gclog_or_tty->put(' ');
1237     }
1238     return doit;
1239   }
1240 
1241  public:
1242   G1CMTraceTime(const char* title, bool doit)
1243     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1244         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1245   }
1246 };
1247 
1248 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1249   // world is stopped at this checkpoint
1250   assert(SafepointSynchronize::is_at_safepoint(),
1251          "world should be stopped");
1252 
1253   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1254 
1255   // If a full collection has happened, we shouldn't do this.
1256   if (has_aborted()) {
1257     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1258     return;
1259   }
1260 
1261   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1262 
1263   if (VerifyDuringGC) {
1264     HandleMark hm;  // handle scope
1265     g1h->prepare_for_verify();
1266     Universe::verify(VerifyOption_G1UsePrevMarking,
1267                      " VerifyDuringGC:(before)");
1268   }
1269   g1h->check_bitmaps("Remark Start");
1270 
1271   G1CollectorPolicy* g1p = g1h->g1_policy();
1272   g1p->record_concurrent_mark_remark_start();
1273 
1274   double start = os::elapsedTime();
1275 
1276   checkpointRootsFinalWork();
1277 
1278   double mark_work_end = os::elapsedTime();
1279 
1280   weakRefsWork(clear_all_soft_refs);
1281 
1282   if (has_overflown()) {
1283     // Oops.  We overflowed.  Restart concurrent marking.
1284     _restart_for_overflow = true;
1285     if (G1TraceMarkStackOverflow) {
1286       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1287     }
1288 
1289     // Verify the heap w.r.t. the previous marking bitmap.
1290     if (VerifyDuringGC) {
1291       HandleMark hm;  // handle scope
1292       g1h->prepare_for_verify();
1293       Universe::verify(VerifyOption_G1UsePrevMarking,
1294                        " VerifyDuringGC:(overflow)");
1295     }
1296 
1297     // Clear the marking state because we will be restarting
1298     // marking due to overflowing the global mark stack.
1299     reset_marking_state();
1300   } else {
1301     {
1302       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1303 
1304       // Aggregate the per-task counting data that we have accumulated
1305       // while marking.
1306       aggregate_count_data();
1307     }
1308 
1309     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1310     // We're done with marking.
1311     // This is the end of  the marking cycle, we're expected all
1312     // threads to have SATB queues with active set to true.
1313     satb_mq_set.set_active_all_threads(false, /* new active value */
1314                                        true /* expected_active */);
1315 
1316     if (VerifyDuringGC) {
1317       HandleMark hm;  // handle scope
1318       g1h->prepare_for_verify();
1319       Universe::verify(VerifyOption_G1UseNextMarking,
1320                        " VerifyDuringGC:(after)");
1321     }
1322     g1h->check_bitmaps("Remark End");
1323     assert(!restart_for_overflow(), "sanity");
1324     // Completely reset the marking state since marking completed
1325     set_non_marking_state();
1326   }
1327 
1328   // Expand the marking stack, if we have to and if we can.
1329   if (_markStack.should_expand()) {
1330     _markStack.expand();
1331   }
1332 
1333   // Statistics
1334   double now = os::elapsedTime();
1335   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1336   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1337   _remark_times.add((now - start) * 1000.0);
1338 
1339   g1p->record_concurrent_mark_remark_end();
1340 
1341   G1CMIsAliveClosure is_alive(g1h);
1342   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1343 }
1344 
1345 // Base class of the closures that finalize and verify the
1346 // liveness counting data.
1347 class CMCountDataClosureBase: public HeapRegionClosure {
1348 protected:
1349   G1CollectedHeap* _g1h;
1350   ConcurrentMark* _cm;
1351   CardTableModRefBS* _ct_bs;
1352 
1353   BitMap* _region_bm;
1354   BitMap* _card_bm;
1355 
1356   // Takes a region that's not empty (i.e., it has at least one
1357   // live object in it and sets its corresponding bit on the region
1358   // bitmap to 1. If the region is "starts humongous" it will also set
1359   // to 1 the bits on the region bitmap that correspond to its
1360   // associated "continues humongous" regions.
1361   void set_bit_for_region(HeapRegion* hr) {
1362     assert(!hr->is_continues_humongous(), "should have filtered those out");
1363 
1364     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1365     if (!hr->is_starts_humongous()) {
1366       // Normal (non-humongous) case: just set the bit.
1367       _region_bm->par_at_put(index, true);
1368     } else {
1369       // Starts humongous case: calculate how many regions are part of
1370       // this humongous region and then set the bit range.
1371       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1372       _region_bm->par_at_put_range(index, end_index, true);
1373     }
1374   }
1375 
1376 public:
1377   CMCountDataClosureBase(G1CollectedHeap* g1h,
1378                          BitMap* region_bm, BitMap* card_bm):
1379     _g1h(g1h), _cm(g1h->concurrent_mark()),
1380     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1381     _region_bm(region_bm), _card_bm(card_bm) { }
1382 };
1383 
1384 // Closure that calculates the # live objects per region. Used
1385 // for verification purposes during the cleanup pause.
1386 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1387   CMBitMapRO* _bm;
1388   size_t _region_marked_bytes;
1389 
1390 public:
1391   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1392                          BitMap* region_bm, BitMap* card_bm) :
1393     CMCountDataClosureBase(g1h, region_bm, card_bm),
1394     _bm(bm), _region_marked_bytes(0) { }
1395 
1396   bool doHeapRegion(HeapRegion* hr) {
1397 
1398     if (hr->is_continues_humongous()) {
1399       // We will ignore these here and process them when their
1400       // associated "starts humongous" region is processed (see
1401       // set_bit_for_heap_region()). Note that we cannot rely on their
1402       // associated "starts humongous" region to have their bit set to
1403       // 1 since, due to the region chunking in the parallel region
1404       // iteration, a "continues humongous" region might be visited
1405       // before its associated "starts humongous".
1406       return false;
1407     }
1408 
1409     HeapWord* ntams = hr->next_top_at_mark_start();
1410     HeapWord* start = hr->bottom();
1411 
1412     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1413            err_msg("Preconditions not met - "
1414                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1415                    p2i(start), p2i(ntams), p2i(hr->end())));
1416 
1417     // Find the first marked object at or after "start".
1418     start = _bm->getNextMarkedWordAddress(start, ntams);
1419 
1420     size_t marked_bytes = 0;
1421 
1422     while (start < ntams) {
1423       oop obj = oop(start);
1424       int obj_sz = obj->size();
1425       HeapWord* obj_end = start + obj_sz;
1426 
1427       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1428       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1429 
1430       // Note: if we're looking at the last region in heap - obj_end
1431       // could be actually just beyond the end of the heap; end_idx
1432       // will then correspond to a (non-existent) card that is also
1433       // just beyond the heap.
1434       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1435         // end of object is not card aligned - increment to cover
1436         // all the cards spanned by the object
1437         end_idx += 1;
1438       }
1439 
1440       // Set the bits in the card BM for the cards spanned by this object.
1441       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1442 
1443       // Add the size of this object to the number of marked bytes.
1444       marked_bytes += (size_t)obj_sz * HeapWordSize;
1445 
1446       // Find the next marked object after this one.
1447       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1448     }
1449 
1450     // Mark the allocated-since-marking portion...
1451     HeapWord* top = hr->top();
1452     if (ntams < top) {
1453       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1454       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1455 
1456       // Note: if we're looking at the last region in heap - top
1457       // could be actually just beyond the end of the heap; end_idx
1458       // will then correspond to a (non-existent) card that is also
1459       // just beyond the heap.
1460       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1461         // end of object is not card aligned - increment to cover
1462         // all the cards spanned by the object
1463         end_idx += 1;
1464       }
1465       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1466 
1467       // This definitely means the region has live objects.
1468       set_bit_for_region(hr);
1469     }
1470 
1471     // Update the live region bitmap.
1472     if (marked_bytes > 0) {
1473       set_bit_for_region(hr);
1474     }
1475 
1476     // Set the marked bytes for the current region so that
1477     // it can be queried by a calling verification routine
1478     _region_marked_bytes = marked_bytes;
1479 
1480     return false;
1481   }
1482 
1483   size_t region_marked_bytes() const { return _region_marked_bytes; }
1484 };
1485 
1486 // Heap region closure used for verifying the counting data
1487 // that was accumulated concurrently and aggregated during
1488 // the remark pause. This closure is applied to the heap
1489 // regions during the STW cleanup pause.
1490 
1491 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1492   G1CollectedHeap* _g1h;
1493   ConcurrentMark* _cm;
1494   CalcLiveObjectsClosure _calc_cl;
1495   BitMap* _region_bm;   // Region BM to be verified
1496   BitMap* _card_bm;     // Card BM to be verified
1497   bool _verbose;        // verbose output?
1498 
1499   BitMap* _exp_region_bm; // Expected Region BM values
1500   BitMap* _exp_card_bm;   // Expected card BM values
1501 
1502   int _failures;
1503 
1504 public:
1505   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1506                                 BitMap* region_bm,
1507                                 BitMap* card_bm,
1508                                 BitMap* exp_region_bm,
1509                                 BitMap* exp_card_bm,
1510                                 bool verbose) :
1511     _g1h(g1h), _cm(g1h->concurrent_mark()),
1512     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1513     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1514     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1515     _failures(0) { }
1516 
1517   int failures() const { return _failures; }
1518 
1519   bool doHeapRegion(HeapRegion* hr) {
1520     if (hr->is_continues_humongous()) {
1521       // We will ignore these here and process them when their
1522       // associated "starts humongous" region is processed (see
1523       // set_bit_for_heap_region()). Note that we cannot rely on their
1524       // associated "starts humongous" region to have their bit set to
1525       // 1 since, due to the region chunking in the parallel region
1526       // iteration, a "continues humongous" region might be visited
1527       // before its associated "starts humongous".
1528       return false;
1529     }
1530 
1531     int failures = 0;
1532 
1533     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1534     // this region and set the corresponding bits in the expected region
1535     // and card bitmaps.
1536     bool res = _calc_cl.doHeapRegion(hr);
1537     assert(res == false, "should be continuing");
1538 
1539     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1540                     Mutex::_no_safepoint_check_flag);
1541 
1542     // Verify the marked bytes for this region.
1543     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1544     size_t act_marked_bytes = hr->next_marked_bytes();
1545 
1546     // We're not OK if expected marked bytes > actual marked bytes. It means
1547     // we have missed accounting some objects during the actual marking.
1548     if (exp_marked_bytes > act_marked_bytes) {
1549       if (_verbose) {
1550         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1551                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1552                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1553       }
1554       failures += 1;
1555     }
1556 
1557     // Verify the bit, for this region, in the actual and expected
1558     // (which was just calculated) region bit maps.
1559     // We're not OK if the bit in the calculated expected region
1560     // bitmap is set and the bit in the actual region bitmap is not.
1561     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1562 
1563     bool expected = _exp_region_bm->at(index);
1564     bool actual = _region_bm->at(index);
1565     if (expected && !actual) {
1566       if (_verbose) {
1567         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1568                                "expected: %s, actual: %s",
1569                                hr->hrm_index(),
1570                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1571       }
1572       failures += 1;
1573     }
1574 
1575     // Verify that the card bit maps for the cards spanned by the current
1576     // region match. We have an error if we have a set bit in the expected
1577     // bit map and the corresponding bit in the actual bitmap is not set.
1578 
1579     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1580     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1581 
1582     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1583       expected = _exp_card_bm->at(i);
1584       actual = _card_bm->at(i);
1585 
1586       if (expected && !actual) {
1587         if (_verbose) {
1588           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1589                                  "expected: %s, actual: %s",
1590                                  hr->hrm_index(), i,
1591                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1592         }
1593         failures += 1;
1594       }
1595     }
1596 
1597     if (failures > 0 && _verbose)  {
1598       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1599                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1600                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1601                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1602     }
1603 
1604     _failures += failures;
1605 
1606     // We could stop iteration over the heap when we
1607     // find the first violating region by returning true.
1608     return false;
1609   }
1610 };
1611 
1612 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1613 protected:
1614   G1CollectedHeap* _g1h;
1615   ConcurrentMark* _cm;
1616   BitMap* _actual_region_bm;
1617   BitMap* _actual_card_bm;
1618 
1619   uint    _n_workers;
1620 
1621   BitMap* _expected_region_bm;
1622   BitMap* _expected_card_bm;
1623 
1624   int  _failures;
1625   bool _verbose;
1626 
1627   HeapRegionClaimer _hrclaimer;
1628 
1629 public:
1630   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1631                             BitMap* region_bm, BitMap* card_bm,
1632                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1633     : AbstractGangTask("G1 verify final counting"),
1634       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1635       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1636       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1637       _failures(0), _verbose(false),
1638       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1639     assert(VerifyDuringGC, "don't call this otherwise");
1640     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1641     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1642 
1643     _verbose = _cm->verbose_medium();
1644   }
1645 
1646   void work(uint worker_id) {
1647     assert(worker_id < _n_workers, "invariant");
1648 
1649     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1650                                             _actual_region_bm, _actual_card_bm,
1651                                             _expected_region_bm,
1652                                             _expected_card_bm,
1653                                             _verbose);
1654 
1655     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1656 
1657     Atomic::add(verify_cl.failures(), &_failures);
1658   }
1659 
1660   int failures() const { return _failures; }
1661 };
1662 
1663 // Closure that finalizes the liveness counting data.
1664 // Used during the cleanup pause.
1665 // Sets the bits corresponding to the interval [NTAMS, top]
1666 // (which contains the implicitly live objects) in the
1667 // card liveness bitmap. Also sets the bit for each region,
1668 // containing live data, in the region liveness bitmap.
1669 
1670 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1671  public:
1672   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1673                               BitMap* region_bm,
1674                               BitMap* card_bm) :
1675     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1676 
1677   bool doHeapRegion(HeapRegion* hr) {
1678 
1679     if (hr->is_continues_humongous()) {
1680       // We will ignore these here and process them when their
1681       // associated "starts humongous" region is processed (see
1682       // set_bit_for_heap_region()). Note that we cannot rely on their
1683       // associated "starts humongous" region to have their bit set to
1684       // 1 since, due to the region chunking in the parallel region
1685       // iteration, a "continues humongous" region might be visited
1686       // before its associated "starts humongous".
1687       return false;
1688     }
1689 
1690     HeapWord* ntams = hr->next_top_at_mark_start();
1691     HeapWord* top   = hr->top();
1692 
1693     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1694 
1695     // Mark the allocated-since-marking portion...
1696     if (ntams < top) {
1697       // This definitely means the region has live objects.
1698       set_bit_for_region(hr);
1699 
1700       // Now set the bits in the card bitmap for [ntams, top)
1701       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1702       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1703 
1704       // Note: if we're looking at the last region in heap - top
1705       // could be actually just beyond the end of the heap; end_idx
1706       // will then correspond to a (non-existent) card that is also
1707       // just beyond the heap.
1708       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1709         // end of object is not card aligned - increment to cover
1710         // all the cards spanned by the object
1711         end_idx += 1;
1712       }
1713 
1714       assert(end_idx <= _card_bm->size(),
1715              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1716                      end_idx, _card_bm->size()));
1717       assert(start_idx < _card_bm->size(),
1718              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1719                      start_idx, _card_bm->size()));
1720 
1721       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1722     }
1723 
1724     // Set the bit for the region if it contains live data
1725     if (hr->next_marked_bytes() > 0) {
1726       set_bit_for_region(hr);
1727     }
1728 
1729     return false;
1730   }
1731 };
1732 
1733 class G1ParFinalCountTask: public AbstractGangTask {
1734 protected:
1735   G1CollectedHeap* _g1h;
1736   ConcurrentMark* _cm;
1737   BitMap* _actual_region_bm;
1738   BitMap* _actual_card_bm;
1739 
1740   uint    _n_workers;
1741   HeapRegionClaimer _hrclaimer;
1742 
1743 public:
1744   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1745     : AbstractGangTask("G1 final counting"),
1746       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1747       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1748       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1749   }
1750 
1751   void work(uint worker_id) {
1752     assert(worker_id < _n_workers, "invariant");
1753 
1754     FinalCountDataUpdateClosure final_update_cl(_g1h,
1755                                                 _actual_region_bm,
1756                                                 _actual_card_bm);
1757 
1758     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1759   }
1760 };
1761 
1762 class G1ParNoteEndTask;
1763 
1764 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1765   G1CollectedHeap* _g1;
1766   size_t _max_live_bytes;
1767   uint _regions_claimed;
1768   size_t _freed_bytes;
1769   FreeRegionList* _local_cleanup_list;
1770   HeapRegionSetCount _old_regions_removed;
1771   HeapRegionSetCount _humongous_regions_removed;
1772   HRRSCleanupTask* _hrrs_cleanup_task;
1773   double _claimed_region_time;
1774   double _max_region_time;
1775 
1776 public:
1777   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1778                              FreeRegionList* local_cleanup_list,
1779                              HRRSCleanupTask* hrrs_cleanup_task) :
1780     _g1(g1),
1781     _max_live_bytes(0), _regions_claimed(0),
1782     _freed_bytes(0),
1783     _claimed_region_time(0.0), _max_region_time(0.0),
1784     _local_cleanup_list(local_cleanup_list),
1785     _old_regions_removed(),
1786     _humongous_regions_removed(),
1787     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1788 
1789   size_t freed_bytes() { return _freed_bytes; }
1790   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1791   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1792 
1793   bool doHeapRegion(HeapRegion *hr) {
1794     if (hr->is_continues_humongous()) {
1795       return false;
1796     }
1797     // We use a claim value of zero here because all regions
1798     // were claimed with value 1 in the FinalCount task.
1799     _g1->reset_gc_time_stamps(hr);
1800     double start = os::elapsedTime();
1801     _regions_claimed++;
1802     hr->note_end_of_marking();
1803     _max_live_bytes += hr->max_live_bytes();
1804 
1805     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1806       _freed_bytes += hr->used();
1807       hr->set_containing_set(NULL);
1808       if (hr->is_humongous()) {
1809         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1810         _humongous_regions_removed.increment(1u, hr->capacity());
1811         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1812       } else {
1813         _old_regions_removed.increment(1u, hr->capacity());
1814         _g1->free_region(hr, _local_cleanup_list, true);
1815       }
1816     } else {
1817       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1818     }
1819 
1820     double region_time = (os::elapsedTime() - start);
1821     _claimed_region_time += region_time;
1822     if (region_time > _max_region_time) {
1823       _max_region_time = region_time;
1824     }
1825     return false;
1826   }
1827 
1828   size_t max_live_bytes() { return _max_live_bytes; }
1829   uint regions_claimed() { return _regions_claimed; }
1830   double claimed_region_time_sec() { return _claimed_region_time; }
1831   double max_region_time_sec() { return _max_region_time; }
1832 };
1833 
1834 class G1ParNoteEndTask: public AbstractGangTask {
1835   friend class G1NoteEndOfConcMarkClosure;
1836 
1837 protected:
1838   G1CollectedHeap* _g1h;
1839   size_t _max_live_bytes;
1840   size_t _freed_bytes;
1841   FreeRegionList* _cleanup_list;
1842   HeapRegionClaimer _hrclaimer;
1843 
1844 public:
1845   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1846       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1847   }
1848 
1849   void work(uint worker_id) {
1850     FreeRegionList local_cleanup_list("Local Cleanup List");
1851     HRRSCleanupTask hrrs_cleanup_task;
1852     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1853                                            &hrrs_cleanup_task);
1854     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1855     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1856 
1857     // Now update the lists
1858     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1859     {
1860       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1861       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1862       _max_live_bytes += g1_note_end.max_live_bytes();
1863       _freed_bytes += g1_note_end.freed_bytes();
1864 
1865       // If we iterate over the global cleanup list at the end of
1866       // cleanup to do this printing we will not guarantee to only
1867       // generate output for the newly-reclaimed regions (the list
1868       // might not be empty at the beginning of cleanup; we might
1869       // still be working on its previous contents). So we do the
1870       // printing here, before we append the new regions to the global
1871       // cleanup list.
1872 
1873       G1HRPrinter* hr_printer = _g1h->hr_printer();
1874       if (hr_printer->is_active()) {
1875         FreeRegionListIterator iter(&local_cleanup_list);
1876         while (iter.more_available()) {
1877           HeapRegion* hr = iter.get_next();
1878           hr_printer->cleanup(hr);
1879         }
1880       }
1881 
1882       _cleanup_list->add_ordered(&local_cleanup_list);
1883       assert(local_cleanup_list.is_empty(), "post-condition");
1884 
1885       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1886     }
1887   }
1888   size_t max_live_bytes() { return _max_live_bytes; }
1889   size_t freed_bytes() { return _freed_bytes; }
1890 };
1891 
1892 class G1ParScrubRemSetTask: public AbstractGangTask {
1893 protected:
1894   G1RemSet* _g1rs;
1895   BitMap* _region_bm;
1896   BitMap* _card_bm;
1897   HeapRegionClaimer _hrclaimer;
1898 
1899 public:
1900   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1901       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1902   }
1903 
1904   void work(uint worker_id) {
1905     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1906   }
1907 
1908 };
1909 
1910 void ConcurrentMark::cleanup() {
1911   // world is stopped at this checkpoint
1912   assert(SafepointSynchronize::is_at_safepoint(),
1913          "world should be stopped");
1914   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1915 
1916   // If a full collection has happened, we shouldn't do this.
1917   if (has_aborted()) {
1918     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1919     return;
1920   }
1921 
1922   g1h->verify_region_sets_optional();
1923 
1924   if (VerifyDuringGC) {
1925     HandleMark hm;  // handle scope
1926     g1h->prepare_for_verify();
1927     Universe::verify(VerifyOption_G1UsePrevMarking,
1928                      " VerifyDuringGC:(before)");
1929   }
1930   g1h->check_bitmaps("Cleanup Start");
1931 
1932   G1CollectorPolicy* g1p = g1h->g1_policy();
1933   g1p->record_concurrent_mark_cleanup_start();
1934 
1935   double start = os::elapsedTime();
1936 
1937   HeapRegionRemSet::reset_for_cleanup_tasks();
1938 
1939   // Do counting once more with the world stopped for good measure.
1940   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1941 
1942   g1h->workers()->run_task(&g1_par_count_task);
1943 
1944   if (VerifyDuringGC) {
1945     // Verify that the counting data accumulated during marking matches
1946     // that calculated by walking the marking bitmap.
1947 
1948     // Bitmaps to hold expected values
1949     BitMap expected_region_bm(_region_bm.size(), true);
1950     BitMap expected_card_bm(_card_bm.size(), true);
1951 
1952     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1953                                                  &_region_bm,
1954                                                  &_card_bm,
1955                                                  &expected_region_bm,
1956                                                  &expected_card_bm);
1957 
1958     g1h->workers()->run_task(&g1_par_verify_task);
1959 
1960     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1961   }
1962 
1963   size_t start_used_bytes = g1h->used();
1964   g1h->set_marking_complete();
1965 
1966   double count_end = os::elapsedTime();
1967   double this_final_counting_time = (count_end - start);
1968   _total_counting_time += this_final_counting_time;
1969 
1970   if (G1PrintRegionLivenessInfo) {
1971     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1972     _g1h->heap_region_iterate(&cl);
1973   }
1974 
1975   // Install newly created mark bitMap as "prev".
1976   swapMarkBitMaps();
1977 
1978   g1h->reset_gc_time_stamp();
1979 
1980   uint n_workers = _g1h->workers()->active_workers();
1981 
1982   // Note end of marking in all heap regions.
1983   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1984   g1h->workers()->run_task(&g1_par_note_end_task);
1985   g1h->check_gc_time_stamps();
1986 
1987   if (!cleanup_list_is_empty()) {
1988     // The cleanup list is not empty, so we'll have to process it
1989     // concurrently. Notify anyone else that might be wanting free
1990     // regions that there will be more free regions coming soon.
1991     g1h->set_free_regions_coming();
1992   }
1993 
1994   // call below, since it affects the metric by which we sort the heap
1995   // regions.
1996   if (G1ScrubRemSets) {
1997     double rs_scrub_start = os::elapsedTime();
1998     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1999     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2000 
2001     double rs_scrub_end = os::elapsedTime();
2002     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2003     _total_rs_scrub_time += this_rs_scrub_time;
2004   }
2005 
2006   // this will also free any regions totally full of garbage objects,
2007   // and sort the regions.
2008   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
2009 
2010   // Statistics.
2011   double end = os::elapsedTime();
2012   _cleanup_times.add((end - start) * 1000.0);
2013 
2014   if (G1Log::fine()) {
2015     g1h->g1_policy()->print_heap_transition(start_used_bytes);
2016   }
2017 
2018   // Clean up will have freed any regions completely full of garbage.
2019   // Update the soft reference policy with the new heap occupancy.
2020   Universe::update_heap_info_at_gc();
2021 
2022   if (VerifyDuringGC) {
2023     HandleMark hm;  // handle scope
2024     g1h->prepare_for_verify();
2025     Universe::verify(VerifyOption_G1UsePrevMarking,
2026                      " VerifyDuringGC:(after)");
2027   }
2028 
2029   g1h->check_bitmaps("Cleanup End");
2030 
2031   g1h->verify_region_sets_optional();
2032 
2033   // We need to make this be a "collection" so any collection pause that
2034   // races with it goes around and waits for completeCleanup to finish.
2035   g1h->increment_total_collections();
2036 
2037   // Clean out dead classes and update Metaspace sizes.
2038   if (ClassUnloadingWithConcurrentMark) {
2039     ClassLoaderDataGraph::purge();
2040   }
2041   MetaspaceGC::compute_new_size();
2042 
2043   // We reclaimed old regions so we should calculate the sizes to make
2044   // sure we update the old gen/space data.
2045   g1h->g1mm()->update_sizes();
2046   g1h->allocation_context_stats().update_after_mark();
2047 
2048   g1h->trace_heap_after_concurrent_cycle();
2049 }
2050 
2051 void ConcurrentMark::completeCleanup() {
2052   if (has_aborted()) return;
2053 
2054   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2055 
2056   _cleanup_list.verify_optional();
2057   FreeRegionList tmp_free_list("Tmp Free List");
2058 
2059   if (G1ConcRegionFreeingVerbose) {
2060     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2061                            "cleanup list has %u entries",
2062                            _cleanup_list.length());
2063   }
2064 
2065   // No one else should be accessing the _cleanup_list at this point,
2066   // so it is not necessary to take any locks
2067   while (!_cleanup_list.is_empty()) {
2068     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2069     assert(hr != NULL, "Got NULL from a non-empty list");
2070     hr->par_clear();
2071     tmp_free_list.add_ordered(hr);
2072 
2073     // Instead of adding one region at a time to the secondary_free_list,
2074     // we accumulate them in the local list and move them a few at a
2075     // time. This also cuts down on the number of notify_all() calls
2076     // we do during this process. We'll also append the local list when
2077     // _cleanup_list is empty (which means we just removed the last
2078     // region from the _cleanup_list).
2079     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2080         _cleanup_list.is_empty()) {
2081       if (G1ConcRegionFreeingVerbose) {
2082         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2083                                "appending %u entries to the secondary_free_list, "
2084                                "cleanup list still has %u entries",
2085                                tmp_free_list.length(),
2086                                _cleanup_list.length());
2087       }
2088 
2089       {
2090         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2091         g1h->secondary_free_list_add(&tmp_free_list);
2092         SecondaryFreeList_lock->notify_all();
2093       }
2094 #ifndef PRODUCT
2095       if (G1StressConcRegionFreeing) {
2096         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2097           os::sleep(Thread::current(), (jlong) 1, false);
2098         }
2099       }
2100 #endif
2101     }
2102   }
2103   assert(tmp_free_list.is_empty(), "post-condition");
2104 }
2105 
2106 // Supporting Object and Oop closures for reference discovery
2107 // and processing in during marking
2108 
2109 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2110   HeapWord* addr = (HeapWord*)obj;
2111   return addr != NULL &&
2112          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2113 }
2114 
2115 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2116 // Uses the CMTask associated with a worker thread (for serial reference
2117 // processing the CMTask for worker 0 is used) to preserve (mark) and
2118 // trace referent objects.
2119 //
2120 // Using the CMTask and embedded local queues avoids having the worker
2121 // threads operating on the global mark stack. This reduces the risk
2122 // of overflowing the stack - which we would rather avoid at this late
2123 // state. Also using the tasks' local queues removes the potential
2124 // of the workers interfering with each other that could occur if
2125 // operating on the global stack.
2126 
2127 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2128   ConcurrentMark* _cm;
2129   CMTask*         _task;
2130   int             _ref_counter_limit;
2131   int             _ref_counter;
2132   bool            _is_serial;
2133  public:
2134   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2135     _cm(cm), _task(task), _is_serial(is_serial),
2136     _ref_counter_limit(G1RefProcDrainInterval) {
2137     assert(_ref_counter_limit > 0, "sanity");
2138     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2139     _ref_counter = _ref_counter_limit;
2140   }
2141 
2142   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2143   virtual void do_oop(      oop* p) { do_oop_work(p); }
2144 
2145   template <class T> void do_oop_work(T* p) {
2146     if (!_cm->has_overflown()) {
2147       oop obj = oopDesc::load_decode_heap_oop(p);
2148       if (_cm->verbose_high()) {
2149         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2150                                "*"PTR_FORMAT" = "PTR_FORMAT,
2151                                _task->worker_id(), p2i(p), p2i((void*) obj));
2152       }
2153 
2154       _task->deal_with_reference(obj);
2155       _ref_counter--;
2156 
2157       if (_ref_counter == 0) {
2158         // We have dealt with _ref_counter_limit references, pushing them
2159         // and objects reachable from them on to the local stack (and
2160         // possibly the global stack). Call CMTask::do_marking_step() to
2161         // process these entries.
2162         //
2163         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2164         // there's nothing more to do (i.e. we're done with the entries that
2165         // were pushed as a result of the CMTask::deal_with_reference() calls
2166         // above) or we overflow.
2167         //
2168         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2169         // flag while there may still be some work to do. (See the comment at
2170         // the beginning of CMTask::do_marking_step() for those conditions -
2171         // one of which is reaching the specified time target.) It is only
2172         // when CMTask::do_marking_step() returns without setting the
2173         // has_aborted() flag that the marking step has completed.
2174         do {
2175           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2176           _task->do_marking_step(mark_step_duration_ms,
2177                                  false      /* do_termination */,
2178                                  _is_serial);
2179         } while (_task->has_aborted() && !_cm->has_overflown());
2180         _ref_counter = _ref_counter_limit;
2181       }
2182     } else {
2183       if (_cm->verbose_high()) {
2184          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2185       }
2186     }
2187   }
2188 };
2189 
2190 // 'Drain' oop closure used by both serial and parallel reference processing.
2191 // Uses the CMTask associated with a given worker thread (for serial
2192 // reference processing the CMtask for worker 0 is used). Calls the
2193 // do_marking_step routine, with an unbelievably large timeout value,
2194 // to drain the marking data structures of the remaining entries
2195 // added by the 'keep alive' oop closure above.
2196 
2197 class G1CMDrainMarkingStackClosure: public VoidClosure {
2198   ConcurrentMark* _cm;
2199   CMTask*         _task;
2200   bool            _is_serial;
2201  public:
2202   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2203     _cm(cm), _task(task), _is_serial(is_serial) {
2204     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2205   }
2206 
2207   void do_void() {
2208     do {
2209       if (_cm->verbose_high()) {
2210         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2211                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2212       }
2213 
2214       // We call CMTask::do_marking_step() to completely drain the local
2215       // and global marking stacks of entries pushed by the 'keep alive'
2216       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2217       //
2218       // CMTask::do_marking_step() is called in a loop, which we'll exit
2219       // if there's nothing more to do (i.e. we've completely drained the
2220       // entries that were pushed as a a result of applying the 'keep alive'
2221       // closure to the entries on the discovered ref lists) or we overflow
2222       // the global marking stack.
2223       //
2224       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2225       // flag while there may still be some work to do. (See the comment at
2226       // the beginning of CMTask::do_marking_step() for those conditions -
2227       // one of which is reaching the specified time target.) It is only
2228       // when CMTask::do_marking_step() returns without setting the
2229       // has_aborted() flag that the marking step has completed.
2230 
2231       _task->do_marking_step(1000000000.0 /* something very large */,
2232                              true         /* do_termination */,
2233                              _is_serial);
2234     } while (_task->has_aborted() && !_cm->has_overflown());
2235   }
2236 };
2237 
2238 // Implementation of AbstractRefProcTaskExecutor for parallel
2239 // reference processing at the end of G1 concurrent marking
2240 
2241 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2242 private:
2243   G1CollectedHeap* _g1h;
2244   ConcurrentMark*  _cm;
2245   WorkGang*        _workers;
2246   uint             _active_workers;
2247 
2248 public:
2249   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2250                           ConcurrentMark* cm,
2251                           WorkGang* workers,
2252                           uint n_workers) :
2253     _g1h(g1h), _cm(cm),
2254     _workers(workers), _active_workers(n_workers) { }
2255 
2256   // Executes the given task using concurrent marking worker threads.
2257   virtual void execute(ProcessTask& task);
2258   virtual void execute(EnqueueTask& task);
2259 };
2260 
2261 class G1CMRefProcTaskProxy: public AbstractGangTask {
2262   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2263   ProcessTask&     _proc_task;
2264   G1CollectedHeap* _g1h;
2265   ConcurrentMark*  _cm;
2266 
2267 public:
2268   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2269                      G1CollectedHeap* g1h,
2270                      ConcurrentMark* cm) :
2271     AbstractGangTask("Process reference objects in parallel"),
2272     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2273     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2274     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2275   }
2276 
2277   virtual void work(uint worker_id) {
2278     ResourceMark rm;
2279     HandleMark hm;
2280     CMTask* task = _cm->task(worker_id);
2281     G1CMIsAliveClosure g1_is_alive(_g1h);
2282     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2283     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2284 
2285     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2286   }
2287 };
2288 
2289 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2290   assert(_workers != NULL, "Need parallel worker threads.");
2291   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2292 
2293   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2294 
2295   // We need to reset the concurrency level before each
2296   // proxy task execution, so that the termination protocol
2297   // and overflow handling in CMTask::do_marking_step() knows
2298   // how many workers to wait for.
2299   _cm->set_concurrency(_active_workers);
2300   _workers->run_task(&proc_task_proxy);
2301 }
2302 
2303 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2304   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2305   EnqueueTask& _enq_task;
2306 
2307 public:
2308   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2309     AbstractGangTask("Enqueue reference objects in parallel"),
2310     _enq_task(enq_task) { }
2311 
2312   virtual void work(uint worker_id) {
2313     _enq_task.work(worker_id);
2314   }
2315 };
2316 
2317 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2318   assert(_workers != NULL, "Need parallel worker threads.");
2319   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2320 
2321   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2322 
2323   // Not strictly necessary but...
2324   //
2325   // We need to reset the concurrency level before each
2326   // proxy task execution, so that the termination protocol
2327   // and overflow handling in CMTask::do_marking_step() knows
2328   // how many workers to wait for.
2329   _cm->set_concurrency(_active_workers);
2330   _workers->run_task(&enq_task_proxy);
2331 }
2332 
2333 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2334   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2335 }
2336 
2337 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2338   if (has_overflown()) {
2339     // Skip processing the discovered references if we have
2340     // overflown the global marking stack. Reference objects
2341     // only get discovered once so it is OK to not
2342     // de-populate the discovered reference lists. We could have,
2343     // but the only benefit would be that, when marking restarts,
2344     // less reference objects are discovered.
2345     return;
2346   }
2347 
2348   ResourceMark rm;
2349   HandleMark   hm;
2350 
2351   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2352 
2353   // Is alive closure.
2354   G1CMIsAliveClosure g1_is_alive(g1h);
2355 
2356   // Inner scope to exclude the cleaning of the string and symbol
2357   // tables from the displayed time.
2358   {
2359     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2360 
2361     ReferenceProcessor* rp = g1h->ref_processor_cm();
2362 
2363     // See the comment in G1CollectedHeap::ref_processing_init()
2364     // about how reference processing currently works in G1.
2365 
2366     // Set the soft reference policy
2367     rp->setup_policy(clear_all_soft_refs);
2368     assert(_markStack.isEmpty(), "mark stack should be empty");
2369 
2370     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2371     // in serial reference processing. Note these closures are also
2372     // used for serially processing (by the the current thread) the
2373     // JNI references during parallel reference processing.
2374     //
2375     // These closures do not need to synchronize with the worker
2376     // threads involved in parallel reference processing as these
2377     // instances are executed serially by the current thread (e.g.
2378     // reference processing is not multi-threaded and is thus
2379     // performed by the current thread instead of a gang worker).
2380     //
2381     // The gang tasks involved in parallel reference processing create
2382     // their own instances of these closures, which do their own
2383     // synchronization among themselves.
2384     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2385     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2386 
2387     // We need at least one active thread. If reference processing
2388     // is not multi-threaded we use the current (VMThread) thread,
2389     // otherwise we use the work gang from the G1CollectedHeap and
2390     // we utilize all the worker threads we can.
2391     bool processing_is_mt = rp->processing_is_mt();
2392     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2393     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2394 
2395     // Parallel processing task executor.
2396     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2397                                               g1h->workers(), active_workers);
2398     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2399 
2400     // Set the concurrency level. The phase was already set prior to
2401     // executing the remark task.
2402     set_concurrency(active_workers);
2403 
2404     // Set the degree of MT processing here.  If the discovery was done MT,
2405     // the number of threads involved during discovery could differ from
2406     // the number of active workers.  This is OK as long as the discovered
2407     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2408     rp->set_active_mt_degree(active_workers);
2409 
2410     // Process the weak references.
2411     const ReferenceProcessorStats& stats =
2412         rp->process_discovered_references(&g1_is_alive,
2413                                           &g1_keep_alive,
2414                                           &g1_drain_mark_stack,
2415                                           executor,
2416                                           g1h->gc_timer_cm(),
2417                                           concurrent_gc_id());
2418     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2419 
2420     // The do_oop work routines of the keep_alive and drain_marking_stack
2421     // oop closures will set the has_overflown flag if we overflow the
2422     // global marking stack.
2423 
2424     assert(_markStack.overflow() || _markStack.isEmpty(),
2425             "mark stack should be empty (unless it overflowed)");
2426 
2427     if (_markStack.overflow()) {
2428       // This should have been done already when we tried to push an
2429       // entry on to the global mark stack. But let's do it again.
2430       set_has_overflown();
2431     }
2432 
2433     assert(rp->num_q() == active_workers, "why not");
2434 
2435     rp->enqueue_discovered_references(executor);
2436 
2437     rp->verify_no_references_recorded();
2438     assert(!rp->discovery_enabled(), "Post condition");
2439   }
2440 
2441   if (has_overflown()) {
2442     // We can not trust g1_is_alive if the marking stack overflowed
2443     return;
2444   }
2445 
2446   assert(_markStack.isEmpty(), "Marking should have completed");
2447 
2448   // Unload Klasses, String, Symbols, Code Cache, etc.
2449   {
2450     G1CMTraceTime trace("Unloading", G1Log::finer());
2451 
2452     if (ClassUnloadingWithConcurrentMark) {
2453       bool purged_classes;
2454 
2455       {
2456         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2457         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2458       }
2459 
2460       {
2461         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2462         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2463       }
2464     }
2465 
2466     if (G1StringDedup::is_enabled()) {
2467       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2468       G1StringDedup::unlink(&g1_is_alive);
2469     }
2470   }
2471 }
2472 
2473 void ConcurrentMark::swapMarkBitMaps() {
2474   CMBitMapRO* temp = _prevMarkBitMap;
2475   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2476   _nextMarkBitMap  = (CMBitMap*)  temp;
2477 }
2478 
2479 // Closure for marking entries in SATB buffers.
2480 class CMSATBBufferClosure : public SATBBufferClosure {
2481 private:
2482   CMTask* _task;
2483   G1CollectedHeap* _g1h;
2484 
2485   // This is very similar to CMTask::deal_with_reference, but with
2486   // more relaxed requirements for the argument, so this must be more
2487   // circumspect about treating the argument as an object.
2488   void do_entry(void* entry) const {
2489     _task->increment_refs_reached();
2490     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2491     if (entry < hr->next_top_at_mark_start()) {
2492       // Until we get here, we don't know whether entry refers to a valid
2493       // object; it could instead have been a stale reference.
2494       oop obj = static_cast<oop>(entry);
2495       assert(obj->is_oop(true /* ignore mark word */),
2496              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2497       _task->make_reference_grey(obj, hr);
2498     }
2499   }
2500 
2501 public:
2502   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2503     : _task(task), _g1h(g1h) { }
2504 
2505   virtual void do_buffer(void** buffer, size_t size) {
2506     for (size_t i = 0; i < size; ++i) {
2507       do_entry(buffer[i]);
2508     }
2509   }
2510 };
2511 
2512 class G1RemarkThreadsClosure : public ThreadClosure {
2513   CMSATBBufferClosure _cm_satb_cl;
2514   G1CMOopClosure _cm_cl;
2515   MarkingCodeBlobClosure _code_cl;
2516   int _thread_parity;
2517 
2518  public:
2519   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2520     _cm_satb_cl(task, g1h),
2521     _cm_cl(g1h, g1h->concurrent_mark(), task),
2522     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2523     _thread_parity(Threads::thread_claim_parity()) {}
2524 
2525   void do_thread(Thread* thread) {
2526     if (thread->is_Java_thread()) {
2527       if (thread->claim_oops_do(true, _thread_parity)) {
2528         JavaThread* jt = (JavaThread*)thread;
2529 
2530         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2531         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2532         // * Alive if on the stack of an executing method
2533         // * Weakly reachable otherwise
2534         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2535         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2536         jt->nmethods_do(&_code_cl);
2537 
2538         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2539       }
2540     } else if (thread->is_VM_thread()) {
2541       if (thread->claim_oops_do(true, _thread_parity)) {
2542         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2543       }
2544     }
2545   }
2546 };
2547 
2548 class CMRemarkTask: public AbstractGangTask {
2549 private:
2550   ConcurrentMark* _cm;
2551 public:
2552   void work(uint worker_id) {
2553     // Since all available tasks are actually started, we should
2554     // only proceed if we're supposed to be active.
2555     if (worker_id < _cm->active_tasks()) {
2556       CMTask* task = _cm->task(worker_id);
2557       task->record_start_time();
2558       {
2559         ResourceMark rm;
2560         HandleMark hm;
2561 
2562         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2563         Threads::threads_do(&threads_f);
2564       }
2565 
2566       do {
2567         task->do_marking_step(1000000000.0 /* something very large */,
2568                               true         /* do_termination       */,
2569                               false        /* is_serial            */);
2570       } while (task->has_aborted() && !_cm->has_overflown());
2571       // If we overflow, then we do not want to restart. We instead
2572       // want to abort remark and do concurrent marking again.
2573       task->record_end_time();
2574     }
2575   }
2576 
2577   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2578     AbstractGangTask("Par Remark"), _cm(cm) {
2579     _cm->terminator()->reset_for_reuse(active_workers);
2580   }
2581 };
2582 
2583 void ConcurrentMark::checkpointRootsFinalWork() {
2584   ResourceMark rm;
2585   HandleMark   hm;
2586   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2587 
2588   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2589 
2590   g1h->ensure_parsability(false);
2591 
2592   // this is remark, so we'll use up all active threads
2593   uint active_workers = g1h->workers()->active_workers();
2594   set_concurrency_and_phase(active_workers, false /* concurrent */);
2595   // Leave _parallel_marking_threads at it's
2596   // value originally calculated in the ConcurrentMark
2597   // constructor and pass values of the active workers
2598   // through the gang in the task.
2599 
2600   {
2601     StrongRootsScope srs(active_workers);
2602 
2603     CMRemarkTask remarkTask(this, active_workers);
2604     // We will start all available threads, even if we decide that the
2605     // active_workers will be fewer. The extra ones will just bail out
2606     // immediately.
2607     g1h->workers()->run_task(&remarkTask);
2608   }
2609 
2610   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2611   guarantee(has_overflown() ||
2612             satb_mq_set.completed_buffers_num() == 0,
2613             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2614                     BOOL_TO_STR(has_overflown()),
2615                     satb_mq_set.completed_buffers_num()));
2616 
2617   print_stats();
2618 }
2619 
2620 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2621   // Note we are overriding the read-only view of the prev map here, via
2622   // the cast.
2623   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2624 }
2625 
2626 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2627   _nextMarkBitMap->clearRange(mr);
2628 }
2629 
2630 HeapRegion*
2631 ConcurrentMark::claim_region(uint worker_id) {
2632   // "checkpoint" the finger
2633   HeapWord* finger = _finger;
2634 
2635   // _heap_end will not change underneath our feet; it only changes at
2636   // yield points.
2637   while (finger < _heap_end) {
2638     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2639 
2640     // Note on how this code handles humongous regions. In the
2641     // normal case the finger will reach the start of a "starts
2642     // humongous" (SH) region. Its end will either be the end of the
2643     // last "continues humongous" (CH) region in the sequence, or the
2644     // standard end of the SH region (if the SH is the only region in
2645     // the sequence). That way claim_region() will skip over the CH
2646     // regions. However, there is a subtle race between a CM thread
2647     // executing this method and a mutator thread doing a humongous
2648     // object allocation. The two are not mutually exclusive as the CM
2649     // thread does not need to hold the Heap_lock when it gets
2650     // here. So there is a chance that claim_region() will come across
2651     // a free region that's in the progress of becoming a SH or a CH
2652     // region. In the former case, it will either
2653     //   a) Miss the update to the region's end, in which case it will
2654     //      visit every subsequent CH region, will find their bitmaps
2655     //      empty, and do nothing, or
2656     //   b) Will observe the update of the region's end (in which case
2657     //      it will skip the subsequent CH regions).
2658     // If it comes across a region that suddenly becomes CH, the
2659     // scenario will be similar to b). So, the race between
2660     // claim_region() and a humongous object allocation might force us
2661     // to do a bit of unnecessary work (due to some unnecessary bitmap
2662     // iterations) but it should not introduce and correctness issues.
2663     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2664 
2665     // Above heap_region_containing_raw may return NULL as we always scan claim
2666     // until the end of the heap. In this case, just jump to the next region.
2667     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2668 
2669     // Is the gap between reading the finger and doing the CAS too long?
2670     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2671     if (res == finger && curr_region != NULL) {
2672       // we succeeded
2673       HeapWord*   bottom        = curr_region->bottom();
2674       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2675 
2676       if (verbose_low()) {
2677         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2678                                "["PTR_FORMAT", "PTR_FORMAT"), "
2679                                "limit = "PTR_FORMAT,
2680                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2681       }
2682 
2683       // notice that _finger == end cannot be guaranteed here since,
2684       // someone else might have moved the finger even further
2685       assert(_finger >= end, "the finger should have moved forward");
2686 
2687       if (verbose_low()) {
2688         gclog_or_tty->print_cr("[%u] we were successful with region = "
2689                                PTR_FORMAT, worker_id, p2i(curr_region));
2690       }
2691 
2692       if (limit > bottom) {
2693         if (verbose_low()) {
2694           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2695                                  "returning it ", worker_id, p2i(curr_region));
2696         }
2697         return curr_region;
2698       } else {
2699         assert(limit == bottom,
2700                "the region limit should be at bottom");
2701         if (verbose_low()) {
2702           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2703                                  "returning NULL", worker_id, p2i(curr_region));
2704         }
2705         // we return NULL and the caller should try calling
2706         // claim_region() again.
2707         return NULL;
2708       }
2709     } else {
2710       assert(_finger > finger, "the finger should have moved forward");
2711       if (verbose_low()) {
2712         if (curr_region == NULL) {
2713           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2714                                  "global finger = "PTR_FORMAT", "
2715                                  "our finger = "PTR_FORMAT,
2716                                  worker_id, p2i(_finger), p2i(finger));
2717         } else {
2718           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2719                                  "global finger = "PTR_FORMAT", "
2720                                  "our finger = "PTR_FORMAT,
2721                                  worker_id, p2i(_finger), p2i(finger));
2722         }
2723       }
2724 
2725       // read it again
2726       finger = _finger;
2727     }
2728   }
2729 
2730   return NULL;
2731 }
2732 
2733 #ifndef PRODUCT
2734 enum VerifyNoCSetOopsPhase {
2735   VerifyNoCSetOopsStack,
2736   VerifyNoCSetOopsQueues
2737 };
2738 
2739 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2740 private:
2741   G1CollectedHeap* _g1h;
2742   VerifyNoCSetOopsPhase _phase;
2743   int _info;
2744 
2745   const char* phase_str() {
2746     switch (_phase) {
2747     case VerifyNoCSetOopsStack:         return "Stack";
2748     case VerifyNoCSetOopsQueues:        return "Queue";
2749     default:                            ShouldNotReachHere();
2750     }
2751     return NULL;
2752   }
2753 
2754   void do_object_work(oop obj) {
2755     guarantee(!_g1h->obj_in_cs(obj),
2756               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2757                       p2i((void*) obj), phase_str(), _info));
2758   }
2759 
2760 public:
2761   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2762 
2763   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2764     _phase = phase;
2765     _info = info;
2766   }
2767 
2768   virtual void do_oop(oop* p) {
2769     oop obj = oopDesc::load_decode_heap_oop(p);
2770     do_object_work(obj);
2771   }
2772 
2773   virtual void do_oop(narrowOop* p) {
2774     // We should not come across narrow oops while scanning marking
2775     // stacks
2776     ShouldNotReachHere();
2777   }
2778 
2779   virtual void do_object(oop obj) {
2780     do_object_work(obj);
2781   }
2782 };
2783 
2784 void ConcurrentMark::verify_no_cset_oops() {
2785   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2786   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2787     return;
2788   }
2789 
2790   VerifyNoCSetOopsClosure cl;
2791 
2792   // Verify entries on the global mark stack
2793   cl.set_phase(VerifyNoCSetOopsStack);
2794   _markStack.oops_do(&cl);
2795 
2796   // Verify entries on the task queues
2797   for (uint i = 0; i < _max_worker_id; i += 1) {
2798     cl.set_phase(VerifyNoCSetOopsQueues, i);
2799     CMTaskQueue* queue = _task_queues->queue(i);
2800     queue->oops_do(&cl);
2801   }
2802 
2803   // Verify the global finger
2804   HeapWord* global_finger = finger();
2805   if (global_finger != NULL && global_finger < _heap_end) {
2806     // The global finger always points to a heap region boundary. We
2807     // use heap_region_containing_raw() to get the containing region
2808     // given that the global finger could be pointing to a free region
2809     // which subsequently becomes continues humongous. If that
2810     // happens, heap_region_containing() will return the bottom of the
2811     // corresponding starts humongous region and the check below will
2812     // not hold any more.
2813     // Since we always iterate over all regions, we might get a NULL HeapRegion
2814     // here.
2815     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2816     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2817               err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2818                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2819   }
2820 
2821   // Verify the task fingers
2822   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2823   for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2824     CMTask* task = _tasks[i];
2825     HeapWord* task_finger = task->finger();
2826     if (task_finger != NULL && task_finger < _heap_end) {
2827       // See above note on the global finger verification.
2828       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2829       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2830                 !task_hr->in_collection_set(),
2831                 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2832                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2833     }
2834   }
2835 }
2836 #endif // PRODUCT
2837 
2838 // Aggregate the counting data that was constructed concurrently
2839 // with marking.
2840 class AggregateCountDataHRClosure: public HeapRegionClosure {
2841   G1CollectedHeap* _g1h;
2842   ConcurrentMark* _cm;
2843   CardTableModRefBS* _ct_bs;
2844   BitMap* _cm_card_bm;
2845   uint _max_worker_id;
2846 
2847  public:
2848   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2849                               BitMap* cm_card_bm,
2850                               uint max_worker_id) :
2851     _g1h(g1h), _cm(g1h->concurrent_mark()),
2852     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2853     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2854 
2855   bool doHeapRegion(HeapRegion* hr) {
2856     if (hr->is_continues_humongous()) {
2857       // We will ignore these here and process them when their
2858       // associated "starts humongous" region is processed.
2859       // Note that we cannot rely on their associated
2860       // "starts humongous" region to have their bit set to 1
2861       // since, due to the region chunking in the parallel region
2862       // iteration, a "continues humongous" region might be visited
2863       // before its associated "starts humongous".
2864       return false;
2865     }
2866 
2867     HeapWord* start = hr->bottom();
2868     HeapWord* limit = hr->next_top_at_mark_start();
2869     HeapWord* end = hr->end();
2870 
2871     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2872            err_msg("Preconditions not met - "
2873                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2874                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2875                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2876 
2877     assert(hr->next_marked_bytes() == 0, "Precondition");
2878 
2879     if (start == limit) {
2880       // NTAMS of this region has not been set so nothing to do.
2881       return false;
2882     }
2883 
2884     // 'start' should be in the heap.
2885     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2886     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2887     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2888 
2889     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2890     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2891     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2892 
2893     // If ntams is not card aligned then we bump card bitmap index
2894     // for limit so that we get the all the cards spanned by
2895     // the object ending at ntams.
2896     // Note: if this is the last region in the heap then ntams
2897     // could be actually just beyond the end of the the heap;
2898     // limit_idx will then  correspond to a (non-existent) card
2899     // that is also outside the heap.
2900     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2901       limit_idx += 1;
2902     }
2903 
2904     assert(limit_idx <= end_idx, "or else use atomics");
2905 
2906     // Aggregate the "stripe" in the count data associated with hr.
2907     uint hrm_index = hr->hrm_index();
2908     size_t marked_bytes = 0;
2909 
2910     for (uint i = 0; i < _max_worker_id; i += 1) {
2911       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2912       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2913 
2914       // Fetch the marked_bytes in this region for task i and
2915       // add it to the running total for this region.
2916       marked_bytes += marked_bytes_array[hrm_index];
2917 
2918       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2919       // into the global card bitmap.
2920       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2921 
2922       while (scan_idx < limit_idx) {
2923         assert(task_card_bm->at(scan_idx) == true, "should be");
2924         _cm_card_bm->set_bit(scan_idx);
2925         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2926 
2927         // BitMap::get_next_one_offset() can handle the case when
2928         // its left_offset parameter is greater than its right_offset
2929         // parameter. It does, however, have an early exit if
2930         // left_offset == right_offset. So let's limit the value
2931         // passed in for left offset here.
2932         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2933         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2934       }
2935     }
2936 
2937     // Update the marked bytes for this region.
2938     hr->add_to_marked_bytes(marked_bytes);
2939 
2940     // Next heap region
2941     return false;
2942   }
2943 };
2944 
2945 class G1AggregateCountDataTask: public AbstractGangTask {
2946 protected:
2947   G1CollectedHeap* _g1h;
2948   ConcurrentMark* _cm;
2949   BitMap* _cm_card_bm;
2950   uint _max_worker_id;
2951   uint _active_workers;
2952   HeapRegionClaimer _hrclaimer;
2953 
2954 public:
2955   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2956                            ConcurrentMark* cm,
2957                            BitMap* cm_card_bm,
2958                            uint max_worker_id,
2959                            uint n_workers) :
2960       AbstractGangTask("Count Aggregation"),
2961       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2962       _max_worker_id(max_worker_id),
2963       _active_workers(n_workers),
2964       _hrclaimer(_active_workers) {
2965   }
2966 
2967   void work(uint worker_id) {
2968     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2969 
2970     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2971   }
2972 };
2973 
2974 
2975 void ConcurrentMark::aggregate_count_data() {
2976   uint n_workers = _g1h->workers()->active_workers();
2977 
2978   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2979                                            _max_worker_id, n_workers);
2980 
2981   _g1h->workers()->run_task(&g1_par_agg_task);
2982 }
2983 
2984 // Clear the per-worker arrays used to store the per-region counting data
2985 void ConcurrentMark::clear_all_count_data() {
2986   // Clear the global card bitmap - it will be filled during
2987   // liveness count aggregation (during remark) and the
2988   // final counting task.
2989   _card_bm.clear();
2990 
2991   // Clear the global region bitmap - it will be filled as part
2992   // of the final counting task.
2993   _region_bm.clear();
2994 
2995   uint max_regions = _g1h->max_regions();
2996   assert(_max_worker_id > 0, "uninitialized");
2997 
2998   for (uint i = 0; i < _max_worker_id; i += 1) {
2999     BitMap* task_card_bm = count_card_bitmap_for(i);
3000     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3001 
3002     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3003     assert(marked_bytes_array != NULL, "uninitialized");
3004 
3005     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3006     task_card_bm->clear();
3007   }
3008 }
3009 
3010 void ConcurrentMark::print_stats() {
3011   if (verbose_stats()) {
3012     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3013     for (size_t i = 0; i < _active_tasks; ++i) {
3014       _tasks[i]->print_stats();
3015       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3016     }
3017   }
3018 }
3019 
3020 // abandon current marking iteration due to a Full GC
3021 void ConcurrentMark::abort() {
3022   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3023   // concurrent bitmap clearing.
3024   _nextMarkBitMap->clearAll();
3025 
3026   // Note we cannot clear the previous marking bitmap here
3027   // since VerifyDuringGC verifies the objects marked during
3028   // a full GC against the previous bitmap.
3029 
3030   // Clear the liveness counting data
3031   clear_all_count_data();
3032   // Empty mark stack
3033   reset_marking_state();
3034   for (uint i = 0; i < _max_worker_id; ++i) {
3035     _tasks[i]->clear_region_fields();
3036   }
3037   _first_overflow_barrier_sync.abort();
3038   _second_overflow_barrier_sync.abort();
3039   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3040   if (!gc_id.is_undefined()) {
3041     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3042     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3043     _aborted_gc_id = gc_id;
3044    }
3045   _has_aborted = true;
3046 
3047   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3048   satb_mq_set.abandon_partial_marking();
3049   // This can be called either during or outside marking, we'll read
3050   // the expected_active value from the SATB queue set.
3051   satb_mq_set.set_active_all_threads(
3052                                  false, /* new active value */
3053                                  satb_mq_set.is_active() /* expected_active */);
3054 
3055   _g1h->trace_heap_after_concurrent_cycle();
3056   _g1h->register_concurrent_cycle_end();
3057 }
3058 
3059 const GCId& ConcurrentMark::concurrent_gc_id() {
3060   if (has_aborted()) {
3061     return _aborted_gc_id;
3062   }
3063   return _g1h->gc_tracer_cm()->gc_id();
3064 }
3065 
3066 static void print_ms_time_info(const char* prefix, const char* name,
3067                                NumberSeq& ns) {
3068   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3069                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3070   if (ns.num() > 0) {
3071     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3072                            prefix, ns.sd(), ns.maximum());
3073   }
3074 }
3075 
3076 void ConcurrentMark::print_summary_info() {
3077   gclog_or_tty->print_cr(" Concurrent marking:");
3078   print_ms_time_info("  ", "init marks", _init_times);
3079   print_ms_time_info("  ", "remarks", _remark_times);
3080   {
3081     print_ms_time_info("     ", "final marks", _remark_mark_times);
3082     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3083 
3084   }
3085   print_ms_time_info("  ", "cleanups", _cleanup_times);
3086   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3087                          _total_counting_time,
3088                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3089                           (double)_cleanup_times.num()
3090                          : 0.0));
3091   if (G1ScrubRemSets) {
3092     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3093                            _total_rs_scrub_time,
3094                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3095                             (double)_cleanup_times.num()
3096                            : 0.0));
3097   }
3098   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3099                          (_init_times.sum() + _remark_times.sum() +
3100                           _cleanup_times.sum())/1000.0);
3101   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3102                 "(%8.2f s marking).",
3103                 cmThread()->vtime_accum(),
3104                 cmThread()->vtime_mark_accum());
3105 }
3106 
3107 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3108   _parallel_workers->print_worker_threads_on(st);
3109 }
3110 
3111 void ConcurrentMark::print_on_error(outputStream* st) const {
3112   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3113       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3114   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3115   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3116 }
3117 
3118 // We take a break if someone is trying to stop the world.
3119 bool ConcurrentMark::do_yield_check(uint worker_id) {
3120   if (SuspendibleThreadSet::should_yield()) {
3121     if (worker_id == 0) {
3122       _g1h->g1_policy()->record_concurrent_pause();
3123     }
3124     SuspendibleThreadSet::yield();
3125     return true;
3126   } else {
3127     return false;
3128   }
3129 }
3130 
3131 #ifndef PRODUCT
3132 // for debugging purposes
3133 void ConcurrentMark::print_finger() {
3134   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3135                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3136   for (uint i = 0; i < _max_worker_id; ++i) {
3137     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3138   }
3139   gclog_or_tty->cr();
3140 }
3141 #endif
3142 
3143 template<bool scan>
3144 inline void CMTask::process_grey_object(oop obj) {
3145   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3146   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3147 
3148   if (_cm->verbose_high()) {
3149     gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3150                            _worker_id, p2i((void*) obj));
3151   }
3152 
3153   size_t obj_size = obj->size();
3154   _words_scanned += obj_size;
3155 
3156   if (scan) {
3157     obj->oop_iterate(_cm_oop_closure);
3158   }
3159   statsOnly( ++_objs_scanned );
3160   check_limits();
3161 }
3162 
3163 template void CMTask::process_grey_object<true>(oop);
3164 template void CMTask::process_grey_object<false>(oop);
3165 
3166 // Closure for iteration over bitmaps
3167 class CMBitMapClosure : public BitMapClosure {
3168 private:
3169   // the bitmap that is being iterated over
3170   CMBitMap*                   _nextMarkBitMap;
3171   ConcurrentMark*             _cm;
3172   CMTask*                     _task;
3173 
3174 public:
3175   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3176     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3177 
3178   bool do_bit(size_t offset) {
3179     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3180     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3181     assert( addr < _cm->finger(), "invariant");
3182 
3183     statsOnly( _task->increase_objs_found_on_bitmap() );
3184     assert(addr >= _task->finger(), "invariant");
3185 
3186     // We move that task's local finger along.
3187     _task->move_finger_to(addr);
3188 
3189     _task->scan_object(oop(addr));
3190     // we only partially drain the local queue and global stack
3191     _task->drain_local_queue(true);
3192     _task->drain_global_stack(true);
3193 
3194     // if the has_aborted flag has been raised, we need to bail out of
3195     // the iteration
3196     return !_task->has_aborted();
3197   }
3198 };
3199 
3200 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3201                                ConcurrentMark* cm,
3202                                CMTask* task)
3203   : _g1h(g1h), _cm(cm), _task(task) {
3204   assert(_ref_processor == NULL, "should be initialized to NULL");
3205 
3206   if (G1UseConcMarkReferenceProcessing) {
3207     _ref_processor = g1h->ref_processor_cm();
3208     assert(_ref_processor != NULL, "should not be NULL");
3209   }
3210 }
3211 
3212 void CMTask::setup_for_region(HeapRegion* hr) {
3213   assert(hr != NULL,
3214         "claim_region() should have filtered out NULL regions");
3215   assert(!hr->is_continues_humongous(),
3216         "claim_region() should have filtered out continues humongous regions");
3217 
3218   if (_cm->verbose_low()) {
3219     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3220                            _worker_id, p2i(hr));
3221   }
3222 
3223   _curr_region  = hr;
3224   _finger       = hr->bottom();
3225   update_region_limit();
3226 }
3227 
3228 void CMTask::update_region_limit() {
3229   HeapRegion* hr            = _curr_region;
3230   HeapWord* bottom          = hr->bottom();
3231   HeapWord* limit           = hr->next_top_at_mark_start();
3232 
3233   if (limit == bottom) {
3234     if (_cm->verbose_low()) {
3235       gclog_or_tty->print_cr("[%u] found an empty region "
3236                              "["PTR_FORMAT", "PTR_FORMAT")",
3237                              _worker_id, p2i(bottom), p2i(limit));
3238     }
3239     // The region was collected underneath our feet.
3240     // We set the finger to bottom to ensure that the bitmap
3241     // iteration that will follow this will not do anything.
3242     // (this is not a condition that holds when we set the region up,
3243     // as the region is not supposed to be empty in the first place)
3244     _finger = bottom;
3245   } else if (limit >= _region_limit) {
3246     assert(limit >= _finger, "peace of mind");
3247   } else {
3248     assert(limit < _region_limit, "only way to get here");
3249     // This can happen under some pretty unusual circumstances.  An
3250     // evacuation pause empties the region underneath our feet (NTAMS
3251     // at bottom). We then do some allocation in the region (NTAMS
3252     // stays at bottom), followed by the region being used as a GC
3253     // alloc region (NTAMS will move to top() and the objects
3254     // originally below it will be grayed). All objects now marked in
3255     // the region are explicitly grayed, if below the global finger,
3256     // and we do not need in fact to scan anything else. So, we simply
3257     // set _finger to be limit to ensure that the bitmap iteration
3258     // doesn't do anything.
3259     _finger = limit;
3260   }
3261 
3262   _region_limit = limit;
3263 }
3264 
3265 void CMTask::giveup_current_region() {
3266   assert(_curr_region != NULL, "invariant");
3267   if (_cm->verbose_low()) {
3268     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3269                            _worker_id, p2i(_curr_region));
3270   }
3271   clear_region_fields();
3272 }
3273 
3274 void CMTask::clear_region_fields() {
3275   // Values for these three fields that indicate that we're not
3276   // holding on to a region.
3277   _curr_region   = NULL;
3278   _finger        = NULL;
3279   _region_limit  = NULL;
3280 }
3281 
3282 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3283   if (cm_oop_closure == NULL) {
3284     assert(_cm_oop_closure != NULL, "invariant");
3285   } else {
3286     assert(_cm_oop_closure == NULL, "invariant");
3287   }
3288   _cm_oop_closure = cm_oop_closure;
3289 }
3290 
3291 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3292   guarantee(nextMarkBitMap != NULL, "invariant");
3293 
3294   if (_cm->verbose_low()) {
3295     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3296   }
3297 
3298   _nextMarkBitMap                = nextMarkBitMap;
3299   clear_region_fields();
3300 
3301   _calls                         = 0;
3302   _elapsed_time_ms               = 0.0;
3303   _termination_time_ms           = 0.0;
3304   _termination_start_time_ms     = 0.0;
3305 
3306 #if _MARKING_STATS_
3307   _aborted                       = 0;
3308   _aborted_overflow              = 0;
3309   _aborted_cm_aborted            = 0;
3310   _aborted_yield                 = 0;
3311   _aborted_timed_out             = 0;
3312   _aborted_satb                  = 0;
3313   _aborted_termination           = 0;
3314   _steal_attempts                = 0;
3315   _steals                        = 0;
3316   _local_pushes                  = 0;
3317   _local_pops                    = 0;
3318   _local_max_size                = 0;
3319   _objs_scanned                  = 0;
3320   _global_pushes                 = 0;
3321   _global_pops                   = 0;
3322   _global_max_size               = 0;
3323   _global_transfers_to           = 0;
3324   _global_transfers_from         = 0;
3325   _regions_claimed               = 0;
3326   _objs_found_on_bitmap          = 0;
3327   _satb_buffers_processed        = 0;
3328 #endif // _MARKING_STATS_
3329 }
3330 
3331 bool CMTask::should_exit_termination() {
3332   regular_clock_call();
3333   // This is called when we are in the termination protocol. We should
3334   // quit if, for some reason, this task wants to abort or the global
3335   // stack is not empty (this means that we can get work from it).
3336   return !_cm->mark_stack_empty() || has_aborted();
3337 }
3338 
3339 void CMTask::reached_limit() {
3340   assert(_words_scanned >= _words_scanned_limit ||
3341          _refs_reached >= _refs_reached_limit ,
3342          "shouldn't have been called otherwise");
3343   regular_clock_call();
3344 }
3345 
3346 void CMTask::regular_clock_call() {
3347   if (has_aborted()) return;
3348 
3349   // First, we need to recalculate the words scanned and refs reached
3350   // limits for the next clock call.
3351   recalculate_limits();
3352 
3353   // During the regular clock call we do the following
3354 
3355   // (1) If an overflow has been flagged, then we abort.
3356   if (_cm->has_overflown()) {
3357     set_has_aborted();
3358     return;
3359   }
3360 
3361   // If we are not concurrent (i.e. we're doing remark) we don't need
3362   // to check anything else. The other steps are only needed during
3363   // the concurrent marking phase.
3364   if (!concurrent()) return;
3365 
3366   // (2) If marking has been aborted for Full GC, then we also abort.
3367   if (_cm->has_aborted()) {
3368     set_has_aborted();
3369     statsOnly( ++_aborted_cm_aborted );
3370     return;
3371   }
3372 
3373   double curr_time_ms = os::elapsedVTime() * 1000.0;
3374 
3375   // (3) If marking stats are enabled, then we update the step history.
3376 #if _MARKING_STATS_
3377   if (_words_scanned >= _words_scanned_limit) {
3378     ++_clock_due_to_scanning;
3379   }
3380   if (_refs_reached >= _refs_reached_limit) {
3381     ++_clock_due_to_marking;
3382   }
3383 
3384   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3385   _interval_start_time_ms = curr_time_ms;
3386   _all_clock_intervals_ms.add(last_interval_ms);
3387 
3388   if (_cm->verbose_medium()) {
3389       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3390                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3391                         _worker_id, last_interval_ms,
3392                         _words_scanned,
3393                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3394                         _refs_reached,
3395                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3396   }
3397 #endif // _MARKING_STATS_
3398 
3399   // (4) We check whether we should yield. If we have to, then we abort.
3400   if (SuspendibleThreadSet::should_yield()) {
3401     // We should yield. To do this we abort the task. The caller is
3402     // responsible for yielding.
3403     set_has_aborted();
3404     statsOnly( ++_aborted_yield );
3405     return;
3406   }
3407 
3408   // (5) We check whether we've reached our time quota. If we have,
3409   // then we abort.
3410   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3411   if (elapsed_time_ms > _time_target_ms) {
3412     set_has_aborted();
3413     _has_timed_out = true;
3414     statsOnly( ++_aborted_timed_out );
3415     return;
3416   }
3417 
3418   // (6) Finally, we check whether there are enough completed STAB
3419   // buffers available for processing. If there are, we abort.
3420   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3421   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3422     if (_cm->verbose_low()) {
3423       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3424                              _worker_id);
3425     }
3426     // we do need to process SATB buffers, we'll abort and restart
3427     // the marking task to do so
3428     set_has_aborted();
3429     statsOnly( ++_aborted_satb );
3430     return;
3431   }
3432 }
3433 
3434 void CMTask::recalculate_limits() {
3435   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3436   _words_scanned_limit      = _real_words_scanned_limit;
3437 
3438   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3439   _refs_reached_limit       = _real_refs_reached_limit;
3440 }
3441 
3442 void CMTask::decrease_limits() {
3443   // This is called when we believe that we're going to do an infrequent
3444   // operation which will increase the per byte scanned cost (i.e. move
3445   // entries to/from the global stack). It basically tries to decrease the
3446   // scanning limit so that the clock is called earlier.
3447 
3448   if (_cm->verbose_medium()) {
3449     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3450   }
3451 
3452   _words_scanned_limit = _real_words_scanned_limit -
3453     3 * words_scanned_period / 4;
3454   _refs_reached_limit  = _real_refs_reached_limit -
3455     3 * refs_reached_period / 4;
3456 }
3457 
3458 void CMTask::move_entries_to_global_stack() {
3459   // local array where we'll store the entries that will be popped
3460   // from the local queue
3461   oop buffer[global_stack_transfer_size];
3462 
3463   int n = 0;
3464   oop obj;
3465   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3466     buffer[n] = obj;
3467     ++n;
3468   }
3469 
3470   if (n > 0) {
3471     // we popped at least one entry from the local queue
3472 
3473     statsOnly( ++_global_transfers_to; _local_pops += n );
3474 
3475     if (!_cm->mark_stack_push(buffer, n)) {
3476       if (_cm->verbose_low()) {
3477         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3478                                _worker_id);
3479       }
3480       set_has_aborted();
3481     } else {
3482       // the transfer was successful
3483 
3484       if (_cm->verbose_medium()) {
3485         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3486                                _worker_id, n);
3487       }
3488       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3489                  if (tmp_size > _global_max_size) {
3490                    _global_max_size = tmp_size;
3491                  }
3492                  _global_pushes += n );
3493     }
3494   }
3495 
3496   // this operation was quite expensive, so decrease the limits
3497   decrease_limits();
3498 }
3499 
3500 void CMTask::get_entries_from_global_stack() {
3501   // local array where we'll store the entries that will be popped
3502   // from the global stack.
3503   oop buffer[global_stack_transfer_size];
3504   int n;
3505   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3506   assert(n <= global_stack_transfer_size,
3507          "we should not pop more than the given limit");
3508   if (n > 0) {
3509     // yes, we did actually pop at least one entry
3510 
3511     statsOnly( ++_global_transfers_from; _global_pops += n );
3512     if (_cm->verbose_medium()) {
3513       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3514                              _worker_id, n);
3515     }
3516     for (int i = 0; i < n; ++i) {
3517       bool success = _task_queue->push(buffer[i]);
3518       // We only call this when the local queue is empty or under a
3519       // given target limit. So, we do not expect this push to fail.
3520       assert(success, "invariant");
3521     }
3522 
3523     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3524                if (tmp_size > _local_max_size) {
3525                  _local_max_size = tmp_size;
3526                }
3527                _local_pushes += n );
3528   }
3529 
3530   // this operation was quite expensive, so decrease the limits
3531   decrease_limits();
3532 }
3533 
3534 void CMTask::drain_local_queue(bool partially) {
3535   if (has_aborted()) return;
3536 
3537   // Decide what the target size is, depending whether we're going to
3538   // drain it partially (so that other tasks can steal if they run out
3539   // of things to do) or totally (at the very end).
3540   size_t target_size;
3541   if (partially) {
3542     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3543   } else {
3544     target_size = 0;
3545   }
3546 
3547   if (_task_queue->size() > target_size) {
3548     if (_cm->verbose_high()) {
3549       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3550                              _worker_id, target_size);
3551     }
3552 
3553     oop obj;
3554     bool ret = _task_queue->pop_local(obj);
3555     while (ret) {
3556       statsOnly( ++_local_pops );
3557 
3558       if (_cm->verbose_high()) {
3559         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3560                                p2i((void*) obj));
3561       }
3562 
3563       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3564       assert(!_g1h->is_on_master_free_list(
3565                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3566 
3567       scan_object(obj);
3568 
3569       if (_task_queue->size() <= target_size || has_aborted()) {
3570         ret = false;
3571       } else {
3572         ret = _task_queue->pop_local(obj);
3573       }
3574     }
3575 
3576     if (_cm->verbose_high()) {
3577       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3578                              _worker_id, _task_queue->size());
3579     }
3580   }
3581 }
3582 
3583 void CMTask::drain_global_stack(bool partially) {
3584   if (has_aborted()) return;
3585 
3586   // We have a policy to drain the local queue before we attempt to
3587   // drain the global stack.
3588   assert(partially || _task_queue->size() == 0, "invariant");
3589 
3590   // Decide what the target size is, depending whether we're going to
3591   // drain it partially (so that other tasks can steal if they run out
3592   // of things to do) or totally (at the very end).  Notice that,
3593   // because we move entries from the global stack in chunks or
3594   // because another task might be doing the same, we might in fact
3595   // drop below the target. But, this is not a problem.
3596   size_t target_size;
3597   if (partially) {
3598     target_size = _cm->partial_mark_stack_size_target();
3599   } else {
3600     target_size = 0;
3601   }
3602 
3603   if (_cm->mark_stack_size() > target_size) {
3604     if (_cm->verbose_low()) {
3605       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3606                              _worker_id, target_size);
3607     }
3608 
3609     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3610       get_entries_from_global_stack();
3611       drain_local_queue(partially);
3612     }
3613 
3614     if (_cm->verbose_low()) {
3615       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3616                              _worker_id, _cm->mark_stack_size());
3617     }
3618   }
3619 }
3620 
3621 // SATB Queue has several assumptions on whether to call the par or
3622 // non-par versions of the methods. this is why some of the code is
3623 // replicated. We should really get rid of the single-threaded version
3624 // of the code to simplify things.
3625 void CMTask::drain_satb_buffers() {
3626   if (has_aborted()) return;
3627 
3628   // We set this so that the regular clock knows that we're in the
3629   // middle of draining buffers and doesn't set the abort flag when it
3630   // notices that SATB buffers are available for draining. It'd be
3631   // very counter productive if it did that. :-)
3632   _draining_satb_buffers = true;
3633 
3634   CMSATBBufferClosure satb_cl(this, _g1h);
3635   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3636 
3637   // This keeps claiming and applying the closure to completed buffers
3638   // until we run out of buffers or we need to abort.
3639   while (!has_aborted() &&
3640          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3641     if (_cm->verbose_medium()) {
3642       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3643     }
3644     statsOnly( ++_satb_buffers_processed );
3645     regular_clock_call();
3646   }
3647 
3648   _draining_satb_buffers = false;
3649 
3650   assert(has_aborted() ||
3651          concurrent() ||
3652          satb_mq_set.completed_buffers_num() == 0, "invariant");
3653 
3654   // again, this was a potentially expensive operation, decrease the
3655   // limits to get the regular clock call early
3656   decrease_limits();
3657 }
3658 
3659 void CMTask::print_stats() {
3660   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3661                          _worker_id, _calls);
3662   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3663                          _elapsed_time_ms, _termination_time_ms);
3664   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3665                          _step_times_ms.num(), _step_times_ms.avg(),
3666                          _step_times_ms.sd());
3667   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3668                          _step_times_ms.maximum(), _step_times_ms.sum());
3669 
3670 #if _MARKING_STATS_
3671   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3672                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3673                          _all_clock_intervals_ms.sd());
3674   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3675                          _all_clock_intervals_ms.maximum(),
3676                          _all_clock_intervals_ms.sum());
3677   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3678                          _clock_due_to_scanning, _clock_due_to_marking);
3679   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3680                          _objs_scanned, _objs_found_on_bitmap);
3681   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3682                          _local_pushes, _local_pops, _local_max_size);
3683   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3684                          _global_pushes, _global_pops, _global_max_size);
3685   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3686                          _global_transfers_to,_global_transfers_from);
3687   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3688   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3689   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3690                          _steal_attempts, _steals);
3691   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3692   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3693                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3694   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3695                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3696 #endif // _MARKING_STATS_
3697 }
3698 
3699 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3700   return _task_queues->steal(worker_id, hash_seed, obj);
3701 }
3702 
3703 /*****************************************************************************
3704 
3705     The do_marking_step(time_target_ms, ...) method is the building
3706     block of the parallel marking framework. It can be called in parallel
3707     with other invocations of do_marking_step() on different tasks
3708     (but only one per task, obviously) and concurrently with the
3709     mutator threads, or during remark, hence it eliminates the need
3710     for two versions of the code. When called during remark, it will
3711     pick up from where the task left off during the concurrent marking
3712     phase. Interestingly, tasks are also claimable during evacuation
3713     pauses too, since do_marking_step() ensures that it aborts before
3714     it needs to yield.
3715 
3716     The data structures that it uses to do marking work are the
3717     following:
3718 
3719       (1) Marking Bitmap. If there are gray objects that appear only
3720       on the bitmap (this happens either when dealing with an overflow
3721       or when the initial marking phase has simply marked the roots
3722       and didn't push them on the stack), then tasks claim heap
3723       regions whose bitmap they then scan to find gray objects. A
3724       global finger indicates where the end of the last claimed region
3725       is. A local finger indicates how far into the region a task has
3726       scanned. The two fingers are used to determine how to gray an
3727       object (i.e. whether simply marking it is OK, as it will be
3728       visited by a task in the future, or whether it needs to be also
3729       pushed on a stack).
3730 
3731       (2) Local Queue. The local queue of the task which is accessed
3732       reasonably efficiently by the task. Other tasks can steal from
3733       it when they run out of work. Throughout the marking phase, a
3734       task attempts to keep its local queue short but not totally
3735       empty, so that entries are available for stealing by other
3736       tasks. Only when there is no more work, a task will totally
3737       drain its local queue.
3738 
3739       (3) Global Mark Stack. This handles local queue overflow. During
3740       marking only sets of entries are moved between it and the local
3741       queues, as access to it requires a mutex and more fine-grain
3742       interaction with it which might cause contention. If it
3743       overflows, then the marking phase should restart and iterate
3744       over the bitmap to identify gray objects. Throughout the marking
3745       phase, tasks attempt to keep the global mark stack at a small
3746       length but not totally empty, so that entries are available for
3747       popping by other tasks. Only when there is no more work, tasks
3748       will totally drain the global mark stack.
3749 
3750       (4) SATB Buffer Queue. This is where completed SATB buffers are
3751       made available. Buffers are regularly removed from this queue
3752       and scanned for roots, so that the queue doesn't get too
3753       long. During remark, all completed buffers are processed, as
3754       well as the filled in parts of any uncompleted buffers.
3755 
3756     The do_marking_step() method tries to abort when the time target
3757     has been reached. There are a few other cases when the
3758     do_marking_step() method also aborts:
3759 
3760       (1) When the marking phase has been aborted (after a Full GC).
3761 
3762       (2) When a global overflow (on the global stack) has been
3763       triggered. Before the task aborts, it will actually sync up with
3764       the other tasks to ensure that all the marking data structures
3765       (local queues, stacks, fingers etc.)  are re-initialized so that
3766       when do_marking_step() completes, the marking phase can
3767       immediately restart.
3768 
3769       (3) When enough completed SATB buffers are available. The
3770       do_marking_step() method only tries to drain SATB buffers right
3771       at the beginning. So, if enough buffers are available, the
3772       marking step aborts and the SATB buffers are processed at
3773       the beginning of the next invocation.
3774 
3775       (4) To yield. when we have to yield then we abort and yield
3776       right at the end of do_marking_step(). This saves us from a lot
3777       of hassle as, by yielding we might allow a Full GC. If this
3778       happens then objects will be compacted underneath our feet, the
3779       heap might shrink, etc. We save checking for this by just
3780       aborting and doing the yield right at the end.
3781 
3782     From the above it follows that the do_marking_step() method should
3783     be called in a loop (or, otherwise, regularly) until it completes.
3784 
3785     If a marking step completes without its has_aborted() flag being
3786     true, it means it has completed the current marking phase (and
3787     also all other marking tasks have done so and have all synced up).
3788 
3789     A method called regular_clock_call() is invoked "regularly" (in
3790     sub ms intervals) throughout marking. It is this clock method that
3791     checks all the abort conditions which were mentioned above and
3792     decides when the task should abort. A work-based scheme is used to
3793     trigger this clock method: when the number of object words the
3794     marking phase has scanned or the number of references the marking
3795     phase has visited reach a given limit. Additional invocations to
3796     the method clock have been planted in a few other strategic places
3797     too. The initial reason for the clock method was to avoid calling
3798     vtime too regularly, as it is quite expensive. So, once it was in
3799     place, it was natural to piggy-back all the other conditions on it
3800     too and not constantly check them throughout the code.
3801 
3802     If do_termination is true then do_marking_step will enter its
3803     termination protocol.
3804 
3805     The value of is_serial must be true when do_marking_step is being
3806     called serially (i.e. by the VMThread) and do_marking_step should
3807     skip any synchronization in the termination and overflow code.
3808     Examples include the serial remark code and the serial reference
3809     processing closures.
3810 
3811     The value of is_serial must be false when do_marking_step is
3812     being called by any of the worker threads in a work gang.
3813     Examples include the concurrent marking code (CMMarkingTask),
3814     the MT remark code, and the MT reference processing closures.
3815 
3816  *****************************************************************************/
3817 
3818 void CMTask::do_marking_step(double time_target_ms,
3819                              bool do_termination,
3820                              bool is_serial) {
3821   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3822   assert(concurrent() == _cm->concurrent(), "they should be the same");
3823 
3824   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3825   assert(_task_queues != NULL, "invariant");
3826   assert(_task_queue != NULL, "invariant");
3827   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3828 
3829   assert(!_claimed,
3830          "only one thread should claim this task at any one time");
3831 
3832   // OK, this doesn't safeguard again all possible scenarios, as it is
3833   // possible for two threads to set the _claimed flag at the same
3834   // time. But it is only for debugging purposes anyway and it will
3835   // catch most problems.
3836   _claimed = true;
3837 
3838   _start_time_ms = os::elapsedVTime() * 1000.0;
3839   statsOnly( _interval_start_time_ms = _start_time_ms );
3840 
3841   // If do_stealing is true then do_marking_step will attempt to
3842   // steal work from the other CMTasks. It only makes sense to
3843   // enable stealing when the termination protocol is enabled
3844   // and do_marking_step() is not being called serially.
3845   bool do_stealing = do_termination && !is_serial;
3846 
3847   double diff_prediction_ms =
3848     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3849   _time_target_ms = time_target_ms - diff_prediction_ms;
3850 
3851   // set up the variables that are used in the work-based scheme to
3852   // call the regular clock method
3853   _words_scanned = 0;
3854   _refs_reached  = 0;
3855   recalculate_limits();
3856 
3857   // clear all flags
3858   clear_has_aborted();
3859   _has_timed_out = false;
3860   _draining_satb_buffers = false;
3861 
3862   ++_calls;
3863 
3864   if (_cm->verbose_low()) {
3865     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3866                            "target = %1.2lfms >>>>>>>>>>",
3867                            _worker_id, _calls, _time_target_ms);
3868   }
3869 
3870   // Set up the bitmap and oop closures. Anything that uses them is
3871   // eventually called from this method, so it is OK to allocate these
3872   // statically.
3873   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3874   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3875   set_cm_oop_closure(&cm_oop_closure);
3876 
3877   if (_cm->has_overflown()) {
3878     // This can happen if the mark stack overflows during a GC pause
3879     // and this task, after a yield point, restarts. We have to abort
3880     // as we need to get into the overflow protocol which happens
3881     // right at the end of this task.
3882     set_has_aborted();
3883   }
3884 
3885   // First drain any available SATB buffers. After this, we will not
3886   // look at SATB buffers before the next invocation of this method.
3887   // If enough completed SATB buffers are queued up, the regular clock
3888   // will abort this task so that it restarts.
3889   drain_satb_buffers();
3890   // ...then partially drain the local queue and the global stack
3891   drain_local_queue(true);
3892   drain_global_stack(true);
3893 
3894   do {
3895     if (!has_aborted() && _curr_region != NULL) {
3896       // This means that we're already holding on to a region.
3897       assert(_finger != NULL, "if region is not NULL, then the finger "
3898              "should not be NULL either");
3899 
3900       // We might have restarted this task after an evacuation pause
3901       // which might have evacuated the region we're holding on to
3902       // underneath our feet. Let's read its limit again to make sure
3903       // that we do not iterate over a region of the heap that
3904       // contains garbage (update_region_limit() will also move
3905       // _finger to the start of the region if it is found empty).
3906       update_region_limit();
3907       // We will start from _finger not from the start of the region,
3908       // as we might be restarting this task after aborting half-way
3909       // through scanning this region. In this case, _finger points to
3910       // the address where we last found a marked object. If this is a
3911       // fresh region, _finger points to start().
3912       MemRegion mr = MemRegion(_finger, _region_limit);
3913 
3914       if (_cm->verbose_low()) {
3915         gclog_or_tty->print_cr("[%u] we're scanning part "
3916                                "["PTR_FORMAT", "PTR_FORMAT") "
3917                                "of region "HR_FORMAT,
3918                                _worker_id, p2i(_finger), p2i(_region_limit),
3919                                HR_FORMAT_PARAMS(_curr_region));
3920       }
3921 
3922       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3923              "humongous regions should go around loop once only");
3924 
3925       // Some special cases:
3926       // If the memory region is empty, we can just give up the region.
3927       // If the current region is humongous then we only need to check
3928       // the bitmap for the bit associated with the start of the object,
3929       // scan the object if it's live, and give up the region.
3930       // Otherwise, let's iterate over the bitmap of the part of the region
3931       // that is left.
3932       // If the iteration is successful, give up the region.
3933       if (mr.is_empty()) {
3934         giveup_current_region();
3935         regular_clock_call();
3936       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3937         if (_nextMarkBitMap->isMarked(mr.start())) {
3938           // The object is marked - apply the closure
3939           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3940           bitmap_closure.do_bit(offset);
3941         }
3942         // Even if this task aborted while scanning the humongous object
3943         // we can (and should) give up the current region.
3944         giveup_current_region();
3945         regular_clock_call();
3946       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3947         giveup_current_region();
3948         regular_clock_call();
3949       } else {
3950         assert(has_aborted(), "currently the only way to do so");
3951         // The only way to abort the bitmap iteration is to return
3952         // false from the do_bit() method. However, inside the
3953         // do_bit() method we move the _finger to point to the
3954         // object currently being looked at. So, if we bail out, we
3955         // have definitely set _finger to something non-null.
3956         assert(_finger != NULL, "invariant");
3957 
3958         // Region iteration was actually aborted. So now _finger
3959         // points to the address of the object we last scanned. If we
3960         // leave it there, when we restart this task, we will rescan
3961         // the object. It is easy to avoid this. We move the finger by
3962         // enough to point to the next possible object header (the
3963         // bitmap knows by how much we need to move it as it knows its
3964         // granularity).
3965         assert(_finger < _region_limit, "invariant");
3966         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3967         // Check if bitmap iteration was aborted while scanning the last object
3968         if (new_finger >= _region_limit) {
3969           giveup_current_region();
3970         } else {
3971           move_finger_to(new_finger);
3972         }
3973       }
3974     }
3975     // At this point we have either completed iterating over the
3976     // region we were holding on to, or we have aborted.
3977 
3978     // We then partially drain the local queue and the global stack.
3979     // (Do we really need this?)
3980     drain_local_queue(true);
3981     drain_global_stack(true);
3982 
3983     // Read the note on the claim_region() method on why it might
3984     // return NULL with potentially more regions available for
3985     // claiming and why we have to check out_of_regions() to determine
3986     // whether we're done or not.
3987     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3988       // We are going to try to claim a new region. We should have
3989       // given up on the previous one.
3990       // Separated the asserts so that we know which one fires.
3991       assert(_curr_region  == NULL, "invariant");
3992       assert(_finger       == NULL, "invariant");
3993       assert(_region_limit == NULL, "invariant");
3994       if (_cm->verbose_low()) {
3995         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3996       }
3997       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3998       if (claimed_region != NULL) {
3999         // Yes, we managed to claim one
4000         statsOnly( ++_regions_claimed );
4001 
4002         if (_cm->verbose_low()) {
4003           gclog_or_tty->print_cr("[%u] we successfully claimed "
4004                                  "region "PTR_FORMAT,
4005                                  _worker_id, p2i(claimed_region));
4006         }
4007 
4008         setup_for_region(claimed_region);
4009         assert(_curr_region == claimed_region, "invariant");
4010       }
4011       // It is important to call the regular clock here. It might take
4012       // a while to claim a region if, for example, we hit a large
4013       // block of empty regions. So we need to call the regular clock
4014       // method once round the loop to make sure it's called
4015       // frequently enough.
4016       regular_clock_call();
4017     }
4018 
4019     if (!has_aborted() && _curr_region == NULL) {
4020       assert(_cm->out_of_regions(),
4021              "at this point we should be out of regions");
4022     }
4023   } while ( _curr_region != NULL && !has_aborted());
4024 
4025   if (!has_aborted()) {
4026     // We cannot check whether the global stack is empty, since other
4027     // tasks might be pushing objects to it concurrently.
4028     assert(_cm->out_of_regions(),
4029            "at this point we should be out of regions");
4030 
4031     if (_cm->verbose_low()) {
4032       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4033     }
4034 
4035     // Try to reduce the number of available SATB buffers so that
4036     // remark has less work to do.
4037     drain_satb_buffers();
4038   }
4039 
4040   // Since we've done everything else, we can now totally drain the
4041   // local queue and global stack.
4042   drain_local_queue(false);
4043   drain_global_stack(false);
4044 
4045   // Attempt at work stealing from other task's queues.
4046   if (do_stealing && !has_aborted()) {
4047     // We have not aborted. This means that we have finished all that
4048     // we could. Let's try to do some stealing...
4049 
4050     // We cannot check whether the global stack is empty, since other
4051     // tasks might be pushing objects to it concurrently.
4052     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4053            "only way to reach here");
4054 
4055     if (_cm->verbose_low()) {
4056       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4057     }
4058 
4059     while (!has_aborted()) {
4060       oop obj;
4061       statsOnly( ++_steal_attempts );
4062 
4063       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4064         if (_cm->verbose_medium()) {
4065           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4066                                  _worker_id, p2i((void*) obj));
4067         }
4068 
4069         statsOnly( ++_steals );
4070 
4071         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4072                "any stolen object should be marked");
4073         scan_object(obj);
4074 
4075         // And since we're towards the end, let's totally drain the
4076         // local queue and global stack.
4077         drain_local_queue(false);
4078         drain_global_stack(false);
4079       } else {
4080         break;
4081       }
4082     }
4083   }
4084 
4085   // If we are about to wrap up and go into termination, check if we
4086   // should raise the overflow flag.
4087   if (do_termination && !has_aborted()) {
4088     if (_cm->force_overflow()->should_force()) {
4089       _cm->set_has_overflown();
4090       regular_clock_call();
4091     }
4092   }
4093 
4094   // We still haven't aborted. Now, let's try to get into the
4095   // termination protocol.
4096   if (do_termination && !has_aborted()) {
4097     // We cannot check whether the global stack is empty, since other
4098     // tasks might be concurrently pushing objects on it.
4099     // Separated the asserts so that we know which one fires.
4100     assert(_cm->out_of_regions(), "only way to reach here");
4101     assert(_task_queue->size() == 0, "only way to reach here");
4102 
4103     if (_cm->verbose_low()) {
4104       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4105     }
4106 
4107     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4108 
4109     // The CMTask class also extends the TerminatorTerminator class,
4110     // hence its should_exit_termination() method will also decide
4111     // whether to exit the termination protocol or not.
4112     bool finished = (is_serial ||
4113                      _cm->terminator()->offer_termination(this));
4114     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4115     _termination_time_ms +=
4116       termination_end_time_ms - _termination_start_time_ms;
4117 
4118     if (finished) {
4119       // We're all done.
4120 
4121       if (_worker_id == 0) {
4122         // let's allow task 0 to do this
4123         if (concurrent()) {
4124           assert(_cm->concurrent_marking_in_progress(), "invariant");
4125           // we need to set this to false before the next
4126           // safepoint. This way we ensure that the marking phase
4127           // doesn't observe any more heap expansions.
4128           _cm->clear_concurrent_marking_in_progress();
4129         }
4130       }
4131 
4132       // We can now guarantee that the global stack is empty, since
4133       // all other tasks have finished. We separated the guarantees so
4134       // that, if a condition is false, we can immediately find out
4135       // which one.
4136       guarantee(_cm->out_of_regions(), "only way to reach here");
4137       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4138       guarantee(_task_queue->size() == 0, "only way to reach here");
4139       guarantee(!_cm->has_overflown(), "only way to reach here");
4140       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4141 
4142       if (_cm->verbose_low()) {
4143         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4144       }
4145     } else {
4146       // Apparently there's more work to do. Let's abort this task. It
4147       // will restart it and we can hopefully find more things to do.
4148 
4149       if (_cm->verbose_low()) {
4150         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4151                                _worker_id);
4152       }
4153 
4154       set_has_aborted();
4155       statsOnly( ++_aborted_termination );
4156     }
4157   }
4158 
4159   // Mainly for debugging purposes to make sure that a pointer to the
4160   // closure which was statically allocated in this frame doesn't
4161   // escape it by accident.
4162   set_cm_oop_closure(NULL);
4163   double end_time_ms = os::elapsedVTime() * 1000.0;
4164   double elapsed_time_ms = end_time_ms - _start_time_ms;
4165   // Update the step history.
4166   _step_times_ms.add(elapsed_time_ms);
4167 
4168   if (has_aborted()) {
4169     // The task was aborted for some reason.
4170 
4171     statsOnly( ++_aborted );
4172 
4173     if (_has_timed_out) {
4174       double diff_ms = elapsed_time_ms - _time_target_ms;
4175       // Keep statistics of how well we did with respect to hitting
4176       // our target only if we actually timed out (if we aborted for
4177       // other reasons, then the results might get skewed).
4178       _marking_step_diffs_ms.add(diff_ms);
4179     }
4180 
4181     if (_cm->has_overflown()) {
4182       // This is the interesting one. We aborted because a global
4183       // overflow was raised. This means we have to restart the
4184       // marking phase and start iterating over regions. However, in
4185       // order to do this we have to make sure that all tasks stop
4186       // what they are doing and re-initialize in a safe manner. We
4187       // will achieve this with the use of two barrier sync points.
4188 
4189       if (_cm->verbose_low()) {
4190         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4191       }
4192 
4193       if (!is_serial) {
4194         // We only need to enter the sync barrier if being called
4195         // from a parallel context
4196         _cm->enter_first_sync_barrier(_worker_id);
4197 
4198         // When we exit this sync barrier we know that all tasks have
4199         // stopped doing marking work. So, it's now safe to
4200         // re-initialize our data structures. At the end of this method,
4201         // task 0 will clear the global data structures.
4202       }
4203 
4204       statsOnly( ++_aborted_overflow );
4205 
4206       // We clear the local state of this task...
4207       clear_region_fields();
4208 
4209       if (!is_serial) {
4210         // ...and enter the second barrier.
4211         _cm->enter_second_sync_barrier(_worker_id);
4212       }
4213       // At this point, if we're during the concurrent phase of
4214       // marking, everything has been re-initialized and we're
4215       // ready to restart.
4216     }
4217 
4218     if (_cm->verbose_low()) {
4219       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4220                              "elapsed = %1.2lfms <<<<<<<<<<",
4221                              _worker_id, _time_target_ms, elapsed_time_ms);
4222       if (_cm->has_aborted()) {
4223         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4224                                _worker_id);
4225       }
4226     }
4227   } else {
4228     if (_cm->verbose_low()) {
4229       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4230                              "elapsed = %1.2lfms <<<<<<<<<<",
4231                              _worker_id, _time_target_ms, elapsed_time_ms);
4232     }
4233   }
4234 
4235   _claimed = false;
4236 }
4237 
4238 CMTask::CMTask(uint worker_id,
4239                ConcurrentMark* cm,
4240                size_t* marked_bytes,
4241                BitMap* card_bm,
4242                CMTaskQueue* task_queue,
4243                CMTaskQueueSet* task_queues)
4244   : _g1h(G1CollectedHeap::heap()),
4245     _worker_id(worker_id), _cm(cm),
4246     _claimed(false),
4247     _nextMarkBitMap(NULL), _hash_seed(17),
4248     _task_queue(task_queue),
4249     _task_queues(task_queues),
4250     _cm_oop_closure(NULL),
4251     _marked_bytes_array(marked_bytes),
4252     _card_bm(card_bm) {
4253   guarantee(task_queue != NULL, "invariant");
4254   guarantee(task_queues != NULL, "invariant");
4255 
4256   statsOnly( _clock_due_to_scanning = 0;
4257              _clock_due_to_marking  = 0 );
4258 
4259   _marking_step_diffs_ms.add(0.5);
4260 }
4261 
4262 // These are formatting macros that are used below to ensure
4263 // consistent formatting. The *_H_* versions are used to format the
4264 // header for a particular value and they should be kept consistent
4265 // with the corresponding macro. Also note that most of the macros add
4266 // the necessary white space (as a prefix) which makes them a bit
4267 // easier to compose.
4268 
4269 // All the output lines are prefixed with this string to be able to
4270 // identify them easily in a large log file.
4271 #define G1PPRL_LINE_PREFIX            "###"
4272 
4273 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4274 #ifdef _LP64
4275 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4276 #else // _LP64
4277 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4278 #endif // _LP64
4279 
4280 // For per-region info
4281 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4282 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4283 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4284 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4285 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4286 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4287 
4288 // For summary info
4289 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4290 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4291 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4292 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4293 
4294 G1PrintRegionLivenessInfoClosure::
4295 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4296   : _out(out),
4297     _total_used_bytes(0), _total_capacity_bytes(0),
4298     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4299     _hum_used_bytes(0), _hum_capacity_bytes(0),
4300     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4301     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4302   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4303   MemRegion g1_reserved = g1h->g1_reserved();
4304   double now = os::elapsedTime();
4305 
4306   // Print the header of the output.
4307   _out->cr();
4308   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4309   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4310                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4311                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4312                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4313                  HeapRegion::GrainBytes);
4314   _out->print_cr(G1PPRL_LINE_PREFIX);
4315   _out->print_cr(G1PPRL_LINE_PREFIX
4316                 G1PPRL_TYPE_H_FORMAT
4317                 G1PPRL_ADDR_BASE_H_FORMAT
4318                 G1PPRL_BYTE_H_FORMAT
4319                 G1PPRL_BYTE_H_FORMAT
4320                 G1PPRL_BYTE_H_FORMAT
4321                 G1PPRL_DOUBLE_H_FORMAT
4322                 G1PPRL_BYTE_H_FORMAT
4323                 G1PPRL_BYTE_H_FORMAT,
4324                 "type", "address-range",
4325                 "used", "prev-live", "next-live", "gc-eff",
4326                 "remset", "code-roots");
4327   _out->print_cr(G1PPRL_LINE_PREFIX
4328                 G1PPRL_TYPE_H_FORMAT
4329                 G1PPRL_ADDR_BASE_H_FORMAT
4330                 G1PPRL_BYTE_H_FORMAT
4331                 G1PPRL_BYTE_H_FORMAT
4332                 G1PPRL_BYTE_H_FORMAT
4333                 G1PPRL_DOUBLE_H_FORMAT
4334                 G1PPRL_BYTE_H_FORMAT
4335                 G1PPRL_BYTE_H_FORMAT,
4336                 "", "",
4337                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4338                 "(bytes)", "(bytes)");
4339 }
4340 
4341 // It takes as a parameter a reference to one of the _hum_* fields, it
4342 // deduces the corresponding value for a region in a humongous region
4343 // series (either the region size, or what's left if the _hum_* field
4344 // is < the region size), and updates the _hum_* field accordingly.
4345 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4346   size_t bytes = 0;
4347   // The > 0 check is to deal with the prev and next live bytes which
4348   // could be 0.
4349   if (*hum_bytes > 0) {
4350     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4351     *hum_bytes -= bytes;
4352   }
4353   return bytes;
4354 }
4355 
4356 // It deduces the values for a region in a humongous region series
4357 // from the _hum_* fields and updates those accordingly. It assumes
4358 // that that _hum_* fields have already been set up from the "starts
4359 // humongous" region and we visit the regions in address order.
4360 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4361                                                      size_t* capacity_bytes,
4362                                                      size_t* prev_live_bytes,
4363                                                      size_t* next_live_bytes) {
4364   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4365   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4366   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4367   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4368   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4369 }
4370 
4371 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4372   const char* type       = r->get_type_str();
4373   HeapWord* bottom       = r->bottom();
4374   HeapWord* end          = r->end();
4375   size_t capacity_bytes  = r->capacity();
4376   size_t used_bytes      = r->used();
4377   size_t prev_live_bytes = r->live_bytes();
4378   size_t next_live_bytes = r->next_live_bytes();
4379   double gc_eff          = r->gc_efficiency();
4380   size_t remset_bytes    = r->rem_set()->mem_size();
4381   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4382 
4383   if (r->is_starts_humongous()) {
4384     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4385            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4386            "they should have been zeroed after the last time we used them");
4387     // Set up the _hum_* fields.
4388     _hum_capacity_bytes  = capacity_bytes;
4389     _hum_used_bytes      = used_bytes;
4390     _hum_prev_live_bytes = prev_live_bytes;
4391     _hum_next_live_bytes = next_live_bytes;
4392     get_hum_bytes(&used_bytes, &capacity_bytes,
4393                   &prev_live_bytes, &next_live_bytes);
4394     end = bottom + HeapRegion::GrainWords;
4395   } else if (r->is_continues_humongous()) {
4396     get_hum_bytes(&used_bytes, &capacity_bytes,
4397                   &prev_live_bytes, &next_live_bytes);
4398     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4399   }
4400 
4401   _total_used_bytes      += used_bytes;
4402   _total_capacity_bytes  += capacity_bytes;
4403   _total_prev_live_bytes += prev_live_bytes;
4404   _total_next_live_bytes += next_live_bytes;
4405   _total_remset_bytes    += remset_bytes;
4406   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4407 
4408   // Print a line for this particular region.
4409   _out->print_cr(G1PPRL_LINE_PREFIX
4410                  G1PPRL_TYPE_FORMAT
4411                  G1PPRL_ADDR_BASE_FORMAT
4412                  G1PPRL_BYTE_FORMAT
4413                  G1PPRL_BYTE_FORMAT
4414                  G1PPRL_BYTE_FORMAT
4415                  G1PPRL_DOUBLE_FORMAT
4416                  G1PPRL_BYTE_FORMAT
4417                  G1PPRL_BYTE_FORMAT,
4418                  type, p2i(bottom), p2i(end),
4419                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4420                  remset_bytes, strong_code_roots_bytes);
4421 
4422   return false;
4423 }
4424 
4425 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4426   // add static memory usages to remembered set sizes
4427   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4428   // Print the footer of the output.
4429   _out->print_cr(G1PPRL_LINE_PREFIX);
4430   _out->print_cr(G1PPRL_LINE_PREFIX
4431                  " SUMMARY"
4432                  G1PPRL_SUM_MB_FORMAT("capacity")
4433                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4434                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4435                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4436                  G1PPRL_SUM_MB_FORMAT("remset")
4437                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4438                  bytes_to_mb(_total_capacity_bytes),
4439                  bytes_to_mb(_total_used_bytes),
4440                  perc(_total_used_bytes, _total_capacity_bytes),
4441                  bytes_to_mb(_total_prev_live_bytes),
4442                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4443                  bytes_to_mb(_total_next_live_bytes),
4444                  perc(_total_next_live_bytes, _total_capacity_bytes),
4445                  bytes_to_mb(_total_remset_bytes),
4446                  bytes_to_mb(_total_strong_code_roots_bytes));
4447   _out->cr();
4448 }