1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1ErgoVerbose.hpp"
  34 #include "gc/g1/g1Log.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1RemSet.hpp"
  37 #include "gc/g1/g1StringDedup.hpp"
  38 #include "gc/g1/heapRegion.inline.hpp"
  39 #include "gc/g1/heapRegionManager.inline.hpp"
  40 #include "gc/g1/heapRegionRemSet.hpp"
  41 #include "gc/g1/heapRegionSet.inline.hpp"
  42 #include "gc/g1/suspendibleThreadSet.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 CMBitMapRO::CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   if (limit == NULL) {
  76     limit = _bmStartWord + _bmWordSize;
  77   }
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  88                                                  const HeapWord* limit) const {
  89   size_t addrOffset = heapWordToOffset(addr);
  90   if (limit == NULL) {
  91     limit = _bmStartWord + _bmWordSize;
  92   }
  93   size_t limitOffset = heapWordToOffset(limit);
  94   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  95   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  96   assert(nextAddr >= addr, "get_next_one postcondition");
  97   assert(nextAddr == limit || !isMarked(nextAddr),
  98          "get_next_one postcondition");
  99   return nextAddr;
 100 }
 101 
 102 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 103   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 104   return (int) (diff >> _shifter);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 113          _bmWordSize  == heap_rs.word_size();
 114 }
 115 #endif
 116 
 117 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 118   _bm.print_on_error(st, prefix);
 119 }
 120 
 121 size_t CMBitMap::compute_size(size_t heap_size) {
 122   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 123 }
 124 
 125 size_t CMBitMap::mark_distance() {
 126   return MinObjAlignmentInBytes * BitsPerByte;
 127 }
 128 
 129 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 130   _bmStartWord = heap.start();
 131   _bmWordSize = heap.word_size();
 132 
 133   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 134   _bm.set_size(_bmWordSize >> _shifter);
 135 
 136   storage->set_mapping_changed_listener(&_listener);
 137 }
 138 
 139 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 140   if (zero_filled) {
 141     return;
 142   }
 143   // We need to clear the bitmap on commit, removing any existing information.
 144   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 145   _bm->clearRange(mr);
 146 }
 147 
 148 // Closure used for clearing the given mark bitmap.
 149 class ClearBitmapHRClosure : public HeapRegionClosure {
 150  private:
 151   ConcurrentMark* _cm;
 152   CMBitMap* _bitmap;
 153   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 154  public:
 155   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 156     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 157   }
 158 
 159   virtual bool doHeapRegion(HeapRegion* r) {
 160     size_t const chunk_size_in_words = M / HeapWordSize;
 161 
 162     HeapWord* cur = r->bottom();
 163     HeapWord* const end = r->end();
 164 
 165     while (cur < end) {
 166       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 167       _bitmap->clearRange(mr);
 168 
 169       cur += chunk_size_in_words;
 170 
 171       // Abort iteration if after yielding the marking has been aborted.
 172       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 173         return true;
 174       }
 175       // Repeat the asserts from before the start of the closure. We will do them
 176       // as asserts here to minimize their overhead on the product. However, we
 177       // will have them as guarantees at the beginning / end of the bitmap
 178       // clearing to get some checking in the product.
 179       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 180       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 181     }
 182 
 183     return false;
 184   }
 185 };
 186 
 187 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 188   ClearBitmapHRClosure* _cl;
 189   HeapRegionClaimer     _hrclaimer;
 190   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 191 
 192 public:
 193   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 194       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 195 
 196   void work(uint worker_id) {
 197     SuspendibleThreadSetJoiner sts_join(_suspendible);
 198     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 199   }
 200 };
 201 
 202 void CMBitMap::clearAll() {
 203   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 204   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 205   uint n_workers = g1h->workers()->active_workers();
 206   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 207   g1h->workers()->run_task(&task);
 208   guarantee(cl.complete(), "Must have completed iteration.");
 209   return;
 210 }
 211 
 212 void CMBitMap::markRange(MemRegion mr) {
 213   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 214   assert(!mr.is_empty(), "unexpected empty region");
 215   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 216           ((HeapWord *) mr.end())),
 217          "markRange memory region end is not card aligned");
 218   // convert address range into offset range
 219   _bm.at_put_range(heapWordToOffset(mr.start()),
 220                    heapWordToOffset(mr.end()), true);
 221 }
 222 
 223 void CMBitMap::clearRange(MemRegion mr) {
 224   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 225   assert(!mr.is_empty(), "unexpected empty region");
 226   // convert address range into offset range
 227   _bm.at_put_range(heapWordToOffset(mr.start()),
 228                    heapWordToOffset(mr.end()), false);
 229 }
 230 
 231 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 232                                             HeapWord* end_addr) {
 233   HeapWord* start = getNextMarkedWordAddress(addr);
 234   start = MIN2(start, end_addr);
 235   HeapWord* end   = getNextUnmarkedWordAddress(start);
 236   end = MIN2(end, end_addr);
 237   assert(start <= end, "Consistency check");
 238   MemRegion mr(start, end);
 239   if (!mr.is_empty()) {
 240     clearRange(mr);
 241   }
 242   return mr;
 243 }
 244 
 245 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 246   _base(NULL), _cm(cm)
 247 #ifdef ASSERT
 248   , _drain_in_progress(false)
 249   , _drain_in_progress_yields(false)
 250 #endif
 251 {}
 252 
 253 bool CMMarkStack::allocate(size_t capacity) {
 254   // allocate a stack of the requisite depth
 255   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 256   if (!rs.is_reserved()) {
 257     warning("ConcurrentMark MarkStack allocation failure");
 258     return false;
 259   }
 260   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 261   if (!_virtual_space.initialize(rs, rs.size())) {
 262     warning("ConcurrentMark MarkStack backing store failure");
 263     // Release the virtual memory reserved for the marking stack
 264     rs.release();
 265     return false;
 266   }
 267   assert(_virtual_space.committed_size() == rs.size(),
 268          "Didn't reserve backing store for all of ConcurrentMark stack?");
 269   _base = (oop*) _virtual_space.low();
 270   setEmpty();
 271   _capacity = (jint) capacity;
 272   _saved_index = -1;
 273   _should_expand = false;
 274   return true;
 275 }
 276 
 277 void CMMarkStack::expand() {
 278   // Called, during remark, if we've overflown the marking stack during marking.
 279   assert(isEmpty(), "stack should been emptied while handling overflow");
 280   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 281   // Clear expansion flag
 282   _should_expand = false;
 283   if (_capacity == (jint) MarkStackSizeMax) {
 284     if (PrintGCDetails && Verbose) {
 285       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 286     }
 287     return;
 288   }
 289   // Double capacity if possible
 290   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 291   // Do not give up existing stack until we have managed to
 292   // get the double capacity that we desired.
 293   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 294                                                            sizeof(oop)));
 295   if (rs.is_reserved()) {
 296     // Release the backing store associated with old stack
 297     _virtual_space.release();
 298     // Reinitialize virtual space for new stack
 299     if (!_virtual_space.initialize(rs, rs.size())) {
 300       fatal("Not enough swap for expanded marking stack capacity");
 301     }
 302     _base = (oop*)(_virtual_space.low());
 303     _index = 0;
 304     _capacity = new_capacity;
 305   } else {
 306     if (PrintGCDetails && Verbose) {
 307       // Failed to double capacity, continue;
 308       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 309                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 310                           _capacity / K, new_capacity / K);
 311     }
 312   }
 313 }
 314 
 315 void CMMarkStack::set_should_expand() {
 316   // If we're resetting the marking state because of an
 317   // marking stack overflow, record that we should, if
 318   // possible, expand the stack.
 319   _should_expand = _cm->has_overflown();
 320 }
 321 
 322 CMMarkStack::~CMMarkStack() {
 323   if (_base != NULL) {
 324     _base = NULL;
 325     _virtual_space.release();
 326   }
 327 }
 328 
 329 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 330   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 331   jint start = _index;
 332   jint next_index = start + n;
 333   if (next_index > _capacity) {
 334     _overflow = true;
 335     return;
 336   }
 337   // Otherwise.
 338   _index = next_index;
 339   for (int i = 0; i < n; i++) {
 340     int ind = start + i;
 341     assert(ind < _capacity, "By overflow test above.");
 342     _base[ind] = ptr_arr[i];
 343   }
 344 }
 345 
 346 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 347   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 348   jint index = _index;
 349   if (index == 0) {
 350     *n = 0;
 351     return false;
 352   } else {
 353     int k = MIN2(max, index);
 354     jint  new_ind = index - k;
 355     for (int j = 0; j < k; j++) {
 356       ptr_arr[j] = _base[new_ind + j];
 357     }
 358     _index = new_ind;
 359     *n = k;
 360     return true;
 361   }
 362 }
 363 
 364 template<class OopClosureClass>
 365 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 366   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 367          || SafepointSynchronize::is_at_safepoint(),
 368          "Drain recursion must be yield-safe.");
 369   bool res = true;
 370   debug_only(_drain_in_progress = true);
 371   debug_only(_drain_in_progress_yields = yield_after);
 372   while (!isEmpty()) {
 373     oop newOop = pop();
 374     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 375     assert(newOop->is_oop(), "Expected an oop");
 376     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 377            "only grey objects on this stack");
 378     newOop->oop_iterate(cl);
 379     if (yield_after && _cm->do_yield_check()) {
 380       res = false;
 381       break;
 382     }
 383   }
 384   debug_only(_drain_in_progress = false);
 385   return res;
 386 }
 387 
 388 void CMMarkStack::note_start_of_gc() {
 389   assert(_saved_index == -1,
 390          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 391   _saved_index = _index;
 392 }
 393 
 394 void CMMarkStack::note_end_of_gc() {
 395   // This is intentionally a guarantee, instead of an assert. If we
 396   // accidentally add something to the mark stack during GC, it
 397   // will be a correctness issue so it's better if we crash. we'll
 398   // only check this once per GC anyway, so it won't be a performance
 399   // issue in any way.
 400   guarantee(_saved_index == _index,
 401             err_msg("saved index: %d index: %d", _saved_index, _index));
 402   _saved_index = -1;
 403 }
 404 
 405 void CMMarkStack::oops_do(OopClosure* f) {
 406   assert(_saved_index == _index,
 407          err_msg("saved index: %d index: %d", _saved_index, _index));
 408   for (int i = 0; i < _index; i += 1) {
 409     f->do_oop(&_base[i]);
 410   }
 411 }
 412 
 413 CMRootRegions::CMRootRegions() :
 414   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 415   _should_abort(false),  _next_survivor(NULL) { }
 416 
 417 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 418   _young_list = g1h->young_list();
 419   _cm = cm;
 420 }
 421 
 422 void CMRootRegions::prepare_for_scan() {
 423   assert(!scan_in_progress(), "pre-condition");
 424 
 425   // Currently, only survivors can be root regions.
 426   assert(_next_survivor == NULL, "pre-condition");
 427   _next_survivor = _young_list->first_survivor_region();
 428   _scan_in_progress = (_next_survivor != NULL);
 429   _should_abort = false;
 430 }
 431 
 432 HeapRegion* CMRootRegions::claim_next() {
 433   if (_should_abort) {
 434     // If someone has set the should_abort flag, we return NULL to
 435     // force the caller to bail out of their loop.
 436     return NULL;
 437   }
 438 
 439   // Currently, only survivors can be root regions.
 440   HeapRegion* res = _next_survivor;
 441   if (res != NULL) {
 442     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 443     // Read it again in case it changed while we were waiting for the lock.
 444     res = _next_survivor;
 445     if (res != NULL) {
 446       if (res == _young_list->last_survivor_region()) {
 447         // We just claimed the last survivor so store NULL to indicate
 448         // that we're done.
 449         _next_survivor = NULL;
 450       } else {
 451         _next_survivor = res->get_next_young_region();
 452       }
 453     } else {
 454       // Someone else claimed the last survivor while we were trying
 455       // to take the lock so nothing else to do.
 456     }
 457   }
 458   assert(res == NULL || res->is_survivor(), "post-condition");
 459 
 460   return res;
 461 }
 462 
 463 void CMRootRegions::scan_finished() {
 464   assert(scan_in_progress(), "pre-condition");
 465 
 466   // Currently, only survivors can be root regions.
 467   if (!_should_abort) {
 468     assert(_next_survivor == NULL, "we should have claimed all survivors");
 469   }
 470   _next_survivor = NULL;
 471 
 472   {
 473     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 474     _scan_in_progress = false;
 475     RootRegionScan_lock->notify_all();
 476   }
 477 }
 478 
 479 bool CMRootRegions::wait_until_scan_finished() {
 480   if (!scan_in_progress()) return false;
 481 
 482   {
 483     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 484     while (scan_in_progress()) {
 485       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 486     }
 487   }
 488   return true;
 489 }
 490 
 491 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 492 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 493 #endif // _MSC_VER
 494 
 495 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 496   return MAX2((n_par_threads + 2) / 4, 1U);
 497 }
 498 
 499 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 500   _g1h(g1h),
 501   _markBitMap1(),
 502   _markBitMap2(),
 503   _parallel_marking_threads(0),
 504   _max_parallel_marking_threads(0),
 505   _sleep_factor(0.0),
 506   _marking_task_overhead(1.0),
 507   _cleanup_sleep_factor(0.0),
 508   _cleanup_task_overhead(1.0),
 509   _cleanup_list("Cleanup List"),
 510   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 511   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 512             CardTableModRefBS::card_shift,
 513             false /* in_resource_area*/),
 514 
 515   _prevMarkBitMap(&_markBitMap1),
 516   _nextMarkBitMap(&_markBitMap2),
 517 
 518   _markStack(this),
 519   // _finger set in set_non_marking_state
 520 
 521   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 522   // _active_tasks set in set_non_marking_state
 523   // _tasks set inside the constructor
 524   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 525   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 526 
 527   _has_overflown(false),
 528   _concurrent(false),
 529   _has_aborted(false),
 530   _aborted_gc_id(GCId::undefined()),
 531   _restart_for_overflow(false),
 532   _concurrent_marking_in_progress(false),
 533 
 534   // _verbose_level set below
 535 
 536   _init_times(),
 537   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 538   _cleanup_times(),
 539   _total_counting_time(0.0),
 540   _total_rs_scrub_time(0.0),
 541 
 542   _parallel_workers(NULL),
 543 
 544   _count_card_bitmaps(NULL),
 545   _count_marked_bytes(NULL),
 546   _completed_initialization(false) {
 547   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 548   if (verbose_level < no_verbose) {
 549     verbose_level = no_verbose;
 550   }
 551   if (verbose_level > high_verbose) {
 552     verbose_level = high_verbose;
 553   }
 554   _verbose_level = verbose_level;
 555 
 556   if (verbose_low()) {
 557     gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", "
 558                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 559   }
 560 
 561   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 562   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 563 
 564   // Create & start a ConcurrentMark thread.
 565   _cmThread = new ConcurrentMarkThread(this);
 566   assert(cmThread() != NULL, "CM Thread should have been created");
 567   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 568   if (_cmThread->osthread() == NULL) {
 569       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 570   }
 571 
 572   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 573   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 574   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 575 
 576   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 577   satb_qs.set_buffer_size(G1SATBBufferSize);
 578 
 579   _root_regions.init(_g1h, this);
 580 
 581   if (ConcGCThreads > ParallelGCThreads) {
 582     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 583             "than ParallelGCThreads (" UINTX_FORMAT ").",
 584             ConcGCThreads, ParallelGCThreads);
 585     return;
 586   }
 587   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 588     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 589     // if both are set
 590     _sleep_factor             = 0.0;
 591     _marking_task_overhead    = 1.0;
 592   } else if (G1MarkingOverheadPercent > 0) {
 593     // We will calculate the number of parallel marking threads based
 594     // on a target overhead with respect to the soft real-time goal
 595     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 596     double overall_cm_overhead =
 597       (double) MaxGCPauseMillis * marking_overhead /
 598       (double) GCPauseIntervalMillis;
 599     double cpu_ratio = 1.0 / (double) os::processor_count();
 600     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 601     double marking_task_overhead =
 602       overall_cm_overhead / marking_thread_num *
 603                                               (double) os::processor_count();
 604     double sleep_factor =
 605                        (1.0 - marking_task_overhead) / marking_task_overhead;
 606 
 607     FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 608     _sleep_factor             = sleep_factor;
 609     _marking_task_overhead    = marking_task_overhead;
 610   } else {
 611     // Calculate the number of parallel marking threads by scaling
 612     // the number of parallel GC threads.
 613     uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 614     FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 615     _sleep_factor             = 0.0;
 616     _marking_task_overhead    = 1.0;
 617   }
 618 
 619   assert(ConcGCThreads > 0, "Should have been set");
 620   _parallel_marking_threads = (uint) ConcGCThreads;
 621   _max_parallel_marking_threads = _parallel_marking_threads;
 622 
 623   if (parallel_marking_threads() > 1) {
 624     _cleanup_task_overhead = 1.0;
 625   } else {
 626     _cleanup_task_overhead = marking_task_overhead();
 627   }
 628   _cleanup_sleep_factor =
 629                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 630 
 631 #if 0
 632   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 633   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 634   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 635   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 636   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 637 #endif
 638 
 639   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 640        _max_parallel_marking_threads, false, true);
 641   if (_parallel_workers == NULL) {
 642     vm_exit_during_initialization("Failed necessary allocation.");
 643   } else {
 644     _parallel_workers->initialize_workers();
 645   }
 646 
 647   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 648     size_t mark_stack_size =
 649       MIN2(MarkStackSizeMax,
 650           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 651     // Verify that the calculated value for MarkStackSize is in range.
 652     // It would be nice to use the private utility routine from Arguments.
 653     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 654       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 655               "must be between 1 and " SIZE_FORMAT,
 656               mark_stack_size, MarkStackSizeMax);
 657       return;
 658     }
 659     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 660   } else {
 661     // Verify MarkStackSize is in range.
 662     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 663       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 664         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 665           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 666                   "must be between 1 and " SIZE_FORMAT,
 667                   MarkStackSize, MarkStackSizeMax);
 668           return;
 669         }
 670       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 671         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 672           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 673                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 674                   MarkStackSize, MarkStackSizeMax);
 675           return;
 676         }
 677       }
 678     }
 679   }
 680 
 681   if (!_markStack.allocate(MarkStackSize)) {
 682     warning("Failed to allocate CM marking stack");
 683     return;
 684   }
 685 
 686   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 687   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 688 
 689   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 690   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 691 
 692   BitMap::idx_t card_bm_size = _card_bm.size();
 693 
 694   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 695   _active_tasks = _max_worker_id;
 696 
 697   uint max_regions = _g1h->max_regions();
 698   for (uint i = 0; i < _max_worker_id; ++i) {
 699     CMTaskQueue* task_queue = new CMTaskQueue();
 700     task_queue->initialize();
 701     _task_queues->register_queue(i, task_queue);
 702 
 703     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 704     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 705 
 706     _tasks[i] = new CMTask(i, this,
 707                            _count_marked_bytes[i],
 708                            &_count_card_bitmaps[i],
 709                            task_queue, _task_queues);
 710 
 711     _accum_task_vtime[i] = 0.0;
 712   }
 713 
 714   // Calculate the card number for the bottom of the heap. Used
 715   // in biasing indexes into the accounting card bitmaps.
 716   _heap_bottom_card_num =
 717     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 718                                 CardTableModRefBS::card_shift);
 719 
 720   // Clear all the liveness counting data
 721   clear_all_count_data();
 722 
 723   // so that the call below can read a sensible value
 724   _heap_start = g1h->reserved_region().start();
 725   set_non_marking_state();
 726   _completed_initialization = true;
 727 }
 728 
 729 void ConcurrentMark::reset() {
 730   // Starting values for these two. This should be called in a STW
 731   // phase.
 732   MemRegion reserved = _g1h->g1_reserved();
 733   _heap_start = reserved.start();
 734   _heap_end   = reserved.end();
 735 
 736   // Separated the asserts so that we know which one fires.
 737   assert(_heap_start != NULL, "heap bounds should look ok");
 738   assert(_heap_end != NULL, "heap bounds should look ok");
 739   assert(_heap_start < _heap_end, "heap bounds should look ok");
 740 
 741   // Reset all the marking data structures and any necessary flags
 742   reset_marking_state();
 743 
 744   if (verbose_low()) {
 745     gclog_or_tty->print_cr("[global] resetting");
 746   }
 747 
 748   // We do reset all of them, since different phases will use
 749   // different number of active threads. So, it's easiest to have all
 750   // of them ready.
 751   for (uint i = 0; i < _max_worker_id; ++i) {
 752     _tasks[i]->reset(_nextMarkBitMap);
 753   }
 754 
 755   // we need this to make sure that the flag is on during the evac
 756   // pause with initial mark piggy-backed
 757   set_concurrent_marking_in_progress();
 758 }
 759 
 760 
 761 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 762   _markStack.set_should_expand();
 763   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 764   if (clear_overflow) {
 765     clear_has_overflown();
 766   } else {
 767     assert(has_overflown(), "pre-condition");
 768   }
 769   _finger = _heap_start;
 770 
 771   for (uint i = 0; i < _max_worker_id; ++i) {
 772     CMTaskQueue* queue = _task_queues->queue(i);
 773     queue->set_empty();
 774   }
 775 }
 776 
 777 void ConcurrentMark::set_concurrency(uint active_tasks) {
 778   assert(active_tasks <= _max_worker_id, "we should not have more");
 779 
 780   _active_tasks = active_tasks;
 781   // Need to update the three data structures below according to the
 782   // number of active threads for this phase.
 783   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 784   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 785   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 786 }
 787 
 788 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 789   set_concurrency(active_tasks);
 790 
 791   _concurrent = concurrent;
 792   // We propagate this to all tasks, not just the active ones.
 793   for (uint i = 0; i < _max_worker_id; ++i)
 794     _tasks[i]->set_concurrent(concurrent);
 795 
 796   if (concurrent) {
 797     set_concurrent_marking_in_progress();
 798   } else {
 799     // We currently assume that the concurrent flag has been set to
 800     // false before we start remark. At this point we should also be
 801     // in a STW phase.
 802     assert(!concurrent_marking_in_progress(), "invariant");
 803     assert(out_of_regions(),
 804            err_msg("only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 805                    p2i(_finger), p2i(_heap_end)));
 806   }
 807 }
 808 
 809 void ConcurrentMark::set_non_marking_state() {
 810   // We set the global marking state to some default values when we're
 811   // not doing marking.
 812   reset_marking_state();
 813   _active_tasks = 0;
 814   clear_concurrent_marking_in_progress();
 815 }
 816 
 817 ConcurrentMark::~ConcurrentMark() {
 818   // The ConcurrentMark instance is never freed.
 819   ShouldNotReachHere();
 820 }
 821 
 822 void ConcurrentMark::clearNextBitmap() {
 823   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 824 
 825   // Make sure that the concurrent mark thread looks to still be in
 826   // the current cycle.
 827   guarantee(cmThread()->during_cycle(), "invariant");
 828 
 829   // We are finishing up the current cycle by clearing the next
 830   // marking bitmap and getting it ready for the next cycle. During
 831   // this time no other cycle can start. So, let's make sure that this
 832   // is the case.
 833   guarantee(!g1h->mark_in_progress(), "invariant");
 834 
 835   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 836   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 837   _parallel_workers->run_task(&task);
 838 
 839   // Clear the liveness counting data. If the marking has been aborted, the abort()
 840   // call already did that.
 841   if (cl.complete()) {
 842     clear_all_count_data();
 843   }
 844 
 845   // Repeat the asserts from above.
 846   guarantee(cmThread()->during_cycle(), "invariant");
 847   guarantee(!g1h->mark_in_progress(), "invariant");
 848 }
 849 
 850 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 851   CMBitMap* _bitmap;
 852   bool _error;
 853  public:
 854   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 855   }
 856 
 857   virtual bool doHeapRegion(HeapRegion* r) {
 858     // This closure can be called concurrently to the mutator, so we must make sure
 859     // that the result of the getNextMarkedWordAddress() call is compared to the
 860     // value passed to it as limit to detect any found bits.
 861     // We can use the region's orig_end() for the limit and the comparison value
 862     // as it always contains the "real" end of the region that never changes and
 863     // has no side effects.
 864     // Due to the latter, there can also be no problem with the compiler generating
 865     // reloads of the orig_end() call.
 866     HeapWord* end = r->orig_end();
 867     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 868   }
 869 };
 870 
 871 bool ConcurrentMark::nextMarkBitmapIsClear() {
 872   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 873   _g1h->heap_region_iterate(&cl);
 874   return cl.complete();
 875 }
 876 
 877 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 878 public:
 879   bool doHeapRegion(HeapRegion* r) {
 880     if (!r->is_continues_humongous()) {
 881       r->note_start_of_marking();
 882     }
 883     return false;
 884   }
 885 };
 886 
 887 void ConcurrentMark::checkpointRootsInitialPre() {
 888   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 889   G1CollectorPolicy* g1p = g1h->g1_policy();
 890 
 891   _has_aborted = false;
 892 
 893   // Initialize marking structures. This has to be done in a STW phase.
 894   reset();
 895 
 896   // For each region note start of marking.
 897   NoteStartOfMarkHRClosure startcl;
 898   g1h->heap_region_iterate(&startcl);
 899 }
 900 
 901 
 902 void ConcurrentMark::checkpointRootsInitialPost() {
 903   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 904 
 905   // If we force an overflow during remark, the remark operation will
 906   // actually abort and we'll restart concurrent marking. If we always
 907   // force an overflow during remark we'll never actually complete the
 908   // marking phase. So, we initialize this here, at the start of the
 909   // cycle, so that at the remaining overflow number will decrease at
 910   // every remark and we'll eventually not need to cause one.
 911   force_overflow_stw()->init();
 912 
 913   // Start Concurrent Marking weak-reference discovery.
 914   ReferenceProcessor* rp = g1h->ref_processor_cm();
 915   // enable ("weak") refs discovery
 916   rp->enable_discovery();
 917   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 918 
 919   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 920   // This is the start of  the marking cycle, we're expected all
 921   // threads to have SATB queues with active set to false.
 922   satb_mq_set.set_active_all_threads(true, /* new active value */
 923                                      false /* expected_active */);
 924 
 925   _root_regions.prepare_for_scan();
 926 
 927   // update_g1_committed() will be called at the end of an evac pause
 928   // when marking is on. So, it's also called at the end of the
 929   // initial-mark pause to update the heap end, if the heap expands
 930   // during it. No need to call it here.
 931 }
 932 
 933 /*
 934  * Notice that in the next two methods, we actually leave the STS
 935  * during the barrier sync and join it immediately afterwards. If we
 936  * do not do this, the following deadlock can occur: one thread could
 937  * be in the barrier sync code, waiting for the other thread to also
 938  * sync up, whereas another one could be trying to yield, while also
 939  * waiting for the other threads to sync up too.
 940  *
 941  * Note, however, that this code is also used during remark and in
 942  * this case we should not attempt to leave / enter the STS, otherwise
 943  * we'll either hit an assert (debug / fastdebug) or deadlock
 944  * (product). So we should only leave / enter the STS if we are
 945  * operating concurrently.
 946  *
 947  * Because the thread that does the sync barrier has left the STS, it
 948  * is possible to be suspended for a Full GC or an evacuation pause
 949  * could occur. This is actually safe, since the entering the sync
 950  * barrier is one of the last things do_marking_step() does, and it
 951  * doesn't manipulate any data structures afterwards.
 952  */
 953 
 954 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 955   bool barrier_aborted;
 956 
 957   if (verbose_low()) {
 958     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 959   }
 960 
 961   {
 962     SuspendibleThreadSetLeaver sts_leave(concurrent());
 963     barrier_aborted = !_first_overflow_barrier_sync.enter();
 964   }
 965 
 966   // at this point everyone should have synced up and not be doing any
 967   // more work
 968 
 969   if (verbose_low()) {
 970     if (barrier_aborted) {
 971       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 972     } else {
 973       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 974     }
 975   }
 976 
 977   if (barrier_aborted) {
 978     // If the barrier aborted we ignore the overflow condition and
 979     // just abort the whole marking phase as quickly as possible.
 980     return;
 981   }
 982 
 983   // If we're executing the concurrent phase of marking, reset the marking
 984   // state; otherwise the marking state is reset after reference processing,
 985   // during the remark pause.
 986   // If we reset here as a result of an overflow during the remark we will
 987   // see assertion failures from any subsequent set_concurrency_and_phase()
 988   // calls.
 989   if (concurrent()) {
 990     // let the task associated with with worker 0 do this
 991     if (worker_id == 0) {
 992       // task 0 is responsible for clearing the global data structures
 993       // We should be here because of an overflow. During STW we should
 994       // not clear the overflow flag since we rely on it being true when
 995       // we exit this method to abort the pause and restart concurrent
 996       // marking.
 997       reset_marking_state(true /* clear_overflow */);
 998       force_overflow()->update();
 999 
1000       if (G1Log::fine()) {
1001         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1002         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1003       }
1004     }
1005   }
1006 
1007   // after this, each task should reset its own data structures then
1008   // then go into the second barrier
1009 }
1010 
1011 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1012   bool barrier_aborted;
1013 
1014   if (verbose_low()) {
1015     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1016   }
1017 
1018   {
1019     SuspendibleThreadSetLeaver sts_leave(concurrent());
1020     barrier_aborted = !_second_overflow_barrier_sync.enter();
1021   }
1022 
1023   // at this point everything should be re-initialized and ready to go
1024 
1025   if (verbose_low()) {
1026     if (barrier_aborted) {
1027       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1028     } else {
1029       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1030     }
1031   }
1032 }
1033 
1034 #ifndef PRODUCT
1035 void ForceOverflowSettings::init() {
1036   _num_remaining = G1ConcMarkForceOverflow;
1037   _force = false;
1038   update();
1039 }
1040 
1041 void ForceOverflowSettings::update() {
1042   if (_num_remaining > 0) {
1043     _num_remaining -= 1;
1044     _force = true;
1045   } else {
1046     _force = false;
1047   }
1048 }
1049 
1050 bool ForceOverflowSettings::should_force() {
1051   if (_force) {
1052     _force = false;
1053     return true;
1054   } else {
1055     return false;
1056   }
1057 }
1058 #endif // !PRODUCT
1059 
1060 class CMConcurrentMarkingTask: public AbstractGangTask {
1061 private:
1062   ConcurrentMark*       _cm;
1063   ConcurrentMarkThread* _cmt;
1064 
1065 public:
1066   void work(uint worker_id) {
1067     assert(Thread::current()->is_ConcurrentGC_thread(),
1068            "this should only be done by a conc GC thread");
1069     ResourceMark rm;
1070 
1071     double start_vtime = os::elapsedVTime();
1072 
1073     {
1074       SuspendibleThreadSetJoiner sts_join;
1075 
1076       assert(worker_id < _cm->active_tasks(), "invariant");
1077       CMTask* the_task = _cm->task(worker_id);
1078       the_task->record_start_time();
1079       if (!_cm->has_aborted()) {
1080         do {
1081           double start_vtime_sec = os::elapsedVTime();
1082           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1083 
1084           the_task->do_marking_step(mark_step_duration_ms,
1085                                     true  /* do_termination */,
1086                                     false /* is_serial*/);
1087 
1088           double end_vtime_sec = os::elapsedVTime();
1089           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1090           _cm->clear_has_overflown();
1091 
1092           _cm->do_yield_check(worker_id);
1093 
1094           jlong sleep_time_ms;
1095           if (!_cm->has_aborted() && the_task->has_aborted()) {
1096             sleep_time_ms =
1097               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1098             {
1099               SuspendibleThreadSetLeaver sts_leave;
1100               os::sleep(Thread::current(), sleep_time_ms, false);
1101             }
1102           }
1103         } while (!_cm->has_aborted() && the_task->has_aborted());
1104       }
1105       the_task->record_end_time();
1106       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1107     }
1108 
1109     double end_vtime = os::elapsedVTime();
1110     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1111   }
1112 
1113   CMConcurrentMarkingTask(ConcurrentMark* cm,
1114                           ConcurrentMarkThread* cmt) :
1115       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1116 
1117   ~CMConcurrentMarkingTask() { }
1118 };
1119 
1120 // Calculates the number of active workers for a concurrent
1121 // phase.
1122 uint ConcurrentMark::calc_parallel_marking_threads() {
1123   uint n_conc_workers = 0;
1124   if (!UseDynamicNumberOfGCThreads ||
1125       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1126        !ForceDynamicNumberOfGCThreads)) {
1127     n_conc_workers = max_parallel_marking_threads();
1128   } else {
1129     n_conc_workers =
1130       AdaptiveSizePolicy::calc_default_active_workers(
1131                                    max_parallel_marking_threads(),
1132                                    1, /* Minimum workers */
1133                                    parallel_marking_threads(),
1134                                    Threads::number_of_non_daemon_threads());
1135     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1136     // that scaling has already gone into "_max_parallel_marking_threads".
1137   }
1138   assert(n_conc_workers > 0, "Always need at least 1");
1139   return n_conc_workers;
1140 }
1141 
1142 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1143   // Currently, only survivors can be root regions.
1144   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1145   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1146 
1147   const uintx interval = PrefetchScanIntervalInBytes;
1148   HeapWord* curr = hr->bottom();
1149   const HeapWord* end = hr->top();
1150   while (curr < end) {
1151     Prefetch::read(curr, interval);
1152     oop obj = oop(curr);
1153     int size = obj->oop_iterate(&cl);
1154     assert(size == obj->size(), "sanity");
1155     curr += size;
1156   }
1157 }
1158 
1159 class CMRootRegionScanTask : public AbstractGangTask {
1160 private:
1161   ConcurrentMark* _cm;
1162 
1163 public:
1164   CMRootRegionScanTask(ConcurrentMark* cm) :
1165     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1166 
1167   void work(uint worker_id) {
1168     assert(Thread::current()->is_ConcurrentGC_thread(),
1169            "this should only be done by a conc GC thread");
1170 
1171     CMRootRegions* root_regions = _cm->root_regions();
1172     HeapRegion* hr = root_regions->claim_next();
1173     while (hr != NULL) {
1174       _cm->scanRootRegion(hr, worker_id);
1175       hr = root_regions->claim_next();
1176     }
1177   }
1178 };
1179 
1180 void ConcurrentMark::scanRootRegions() {
1181   // Start of concurrent marking.
1182   ClassLoaderDataGraph::clear_claimed_marks();
1183 
1184   // scan_in_progress() will have been set to true only if there was
1185   // at least one root region to scan. So, if it's false, we
1186   // should not attempt to do any further work.
1187   if (root_regions()->scan_in_progress()) {
1188     _parallel_marking_threads = calc_parallel_marking_threads();
1189     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1190            "Maximum number of marking threads exceeded");
1191     uint active_workers = MAX2(1U, parallel_marking_threads());
1192 
1193     CMRootRegionScanTask task(this);
1194     _parallel_workers->set_active_workers(active_workers);
1195     _parallel_workers->run_task(&task);
1196 
1197     // It's possible that has_aborted() is true here without actually
1198     // aborting the survivor scan earlier. This is OK as it's
1199     // mainly used for sanity checking.
1200     root_regions()->scan_finished();
1201   }
1202 }
1203 
1204 void ConcurrentMark::markFromRoots() {
1205   // we might be tempted to assert that:
1206   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1207   //        "inconsistent argument?");
1208   // However that wouldn't be right, because it's possible that
1209   // a safepoint is indeed in progress as a younger generation
1210   // stop-the-world GC happens even as we mark in this generation.
1211 
1212   _restart_for_overflow = false;
1213   force_overflow_conc()->init();
1214 
1215   // _g1h has _n_par_threads
1216   _parallel_marking_threads = calc_parallel_marking_threads();
1217   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1218     "Maximum number of marking threads exceeded");
1219 
1220   uint active_workers = MAX2(1U, parallel_marking_threads());
1221 
1222   // Parallel task terminator is set in "set_concurrency_and_phase()"
1223   set_concurrency_and_phase(active_workers, true /* concurrent */);
1224 
1225   CMConcurrentMarkingTask markingTask(this, cmThread());
1226   _parallel_workers->set_active_workers(active_workers);
1227   // Don't set _n_par_threads because it affects MT in process_roots()
1228   // and the decisions on that MT processing is made elsewhere.
1229   assert(_parallel_workers->active_workers() > 0, "Should have been set");
1230   _parallel_workers->run_task(&markingTask);
1231   print_stats();
1232 }
1233 
1234 // Helper class to get rid of some boilerplate code.
1235 class G1CMTraceTime : public GCTraceTime {
1236   static bool doit_and_prepend(bool doit) {
1237     if (doit) {
1238       gclog_or_tty->put(' ');
1239     }
1240     return doit;
1241   }
1242 
1243  public:
1244   G1CMTraceTime(const char* title, bool doit)
1245     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1246         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1247   }
1248 };
1249 
1250 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1251   // world is stopped at this checkpoint
1252   assert(SafepointSynchronize::is_at_safepoint(),
1253          "world should be stopped");
1254 
1255   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1256 
1257   // If a full collection has happened, we shouldn't do this.
1258   if (has_aborted()) {
1259     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1260     return;
1261   }
1262 
1263   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1264 
1265   if (VerifyDuringGC) {
1266     HandleMark hm;  // handle scope
1267     g1h->prepare_for_verify();
1268     Universe::verify(VerifyOption_G1UsePrevMarking,
1269                      " VerifyDuringGC:(before)");
1270   }
1271   g1h->check_bitmaps("Remark Start");
1272 
1273   G1CollectorPolicy* g1p = g1h->g1_policy();
1274   g1p->record_concurrent_mark_remark_start();
1275 
1276   double start = os::elapsedTime();
1277 
1278   checkpointRootsFinalWork();
1279 
1280   double mark_work_end = os::elapsedTime();
1281 
1282   weakRefsWork(clear_all_soft_refs);
1283 
1284   if (has_overflown()) {
1285     // Oops.  We overflowed.  Restart concurrent marking.
1286     _restart_for_overflow = true;
1287     if (G1TraceMarkStackOverflow) {
1288       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1289     }
1290 
1291     // Verify the heap w.r.t. the previous marking bitmap.
1292     if (VerifyDuringGC) {
1293       HandleMark hm;  // handle scope
1294       g1h->prepare_for_verify();
1295       Universe::verify(VerifyOption_G1UsePrevMarking,
1296                        " VerifyDuringGC:(overflow)");
1297     }
1298 
1299     // Clear the marking state because we will be restarting
1300     // marking due to overflowing the global mark stack.
1301     reset_marking_state();
1302   } else {
1303     {
1304       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1305 
1306       // Aggregate the per-task counting data that we have accumulated
1307       // while marking.
1308       aggregate_count_data();
1309     }
1310 
1311     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1312     // We're done with marking.
1313     // This is the end of  the marking cycle, we're expected all
1314     // threads to have SATB queues with active set to true.
1315     satb_mq_set.set_active_all_threads(false, /* new active value */
1316                                        true /* expected_active */);
1317 
1318     if (VerifyDuringGC) {
1319       HandleMark hm;  // handle scope
1320       g1h->prepare_for_verify();
1321       Universe::verify(VerifyOption_G1UseNextMarking,
1322                        " VerifyDuringGC:(after)");
1323     }
1324     g1h->check_bitmaps("Remark End");
1325     assert(!restart_for_overflow(), "sanity");
1326     // Completely reset the marking state since marking completed
1327     set_non_marking_state();
1328   }
1329 
1330   // Expand the marking stack, if we have to and if we can.
1331   if (_markStack.should_expand()) {
1332     _markStack.expand();
1333   }
1334 
1335   // Statistics
1336   double now = os::elapsedTime();
1337   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1338   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1339   _remark_times.add((now - start) * 1000.0);
1340 
1341   g1p->record_concurrent_mark_remark_end();
1342 
1343   G1CMIsAliveClosure is_alive(g1h);
1344   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1345 }
1346 
1347 // Base class of the closures that finalize and verify the
1348 // liveness counting data.
1349 class CMCountDataClosureBase: public HeapRegionClosure {
1350 protected:
1351   G1CollectedHeap* _g1h;
1352   ConcurrentMark* _cm;
1353   CardTableModRefBS* _ct_bs;
1354 
1355   BitMap* _region_bm;
1356   BitMap* _card_bm;
1357 
1358   // Takes a region that's not empty (i.e., it has at least one
1359   // live object in it and sets its corresponding bit on the region
1360   // bitmap to 1. If the region is "starts humongous" it will also set
1361   // to 1 the bits on the region bitmap that correspond to its
1362   // associated "continues humongous" regions.
1363   void set_bit_for_region(HeapRegion* hr) {
1364     assert(!hr->is_continues_humongous(), "should have filtered those out");
1365 
1366     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1367     if (!hr->is_starts_humongous()) {
1368       // Normal (non-humongous) case: just set the bit.
1369       _region_bm->par_at_put(index, true);
1370     } else {
1371       // Starts humongous case: calculate how many regions are part of
1372       // this humongous region and then set the bit range.
1373       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1374       _region_bm->par_at_put_range(index, end_index, true);
1375     }
1376   }
1377 
1378 public:
1379   CMCountDataClosureBase(G1CollectedHeap* g1h,
1380                          BitMap* region_bm, BitMap* card_bm):
1381     _g1h(g1h), _cm(g1h->concurrent_mark()),
1382     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1383     _region_bm(region_bm), _card_bm(card_bm) { }
1384 };
1385 
1386 // Closure that calculates the # live objects per region. Used
1387 // for verification purposes during the cleanup pause.
1388 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1389   CMBitMapRO* _bm;
1390   size_t _region_marked_bytes;
1391 
1392 public:
1393   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1394                          BitMap* region_bm, BitMap* card_bm) :
1395     CMCountDataClosureBase(g1h, region_bm, card_bm),
1396     _bm(bm), _region_marked_bytes(0) { }
1397 
1398   bool doHeapRegion(HeapRegion* hr) {
1399 
1400     if (hr->is_continues_humongous()) {
1401       // We will ignore these here and process them when their
1402       // associated "starts humongous" region is processed (see
1403       // set_bit_for_heap_region()). Note that we cannot rely on their
1404       // associated "starts humongous" region to have their bit set to
1405       // 1 since, due to the region chunking in the parallel region
1406       // iteration, a "continues humongous" region might be visited
1407       // before its associated "starts humongous".
1408       return false;
1409     }
1410 
1411     HeapWord* ntams = hr->next_top_at_mark_start();
1412     HeapWord* start = hr->bottom();
1413 
1414     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1415            err_msg("Preconditions not met - "
1416                    "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1417                    p2i(start), p2i(ntams), p2i(hr->end())));
1418 
1419     // Find the first marked object at or after "start".
1420     start = _bm->getNextMarkedWordAddress(start, ntams);
1421 
1422     size_t marked_bytes = 0;
1423 
1424     while (start < ntams) {
1425       oop obj = oop(start);
1426       int obj_sz = obj->size();
1427       HeapWord* obj_end = start + obj_sz;
1428 
1429       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1430       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1431 
1432       // Note: if we're looking at the last region in heap - obj_end
1433       // could be actually just beyond the end of the heap; end_idx
1434       // will then correspond to a (non-existent) card that is also
1435       // just beyond the heap.
1436       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1437         // end of object is not card aligned - increment to cover
1438         // all the cards spanned by the object
1439         end_idx += 1;
1440       }
1441 
1442       // Set the bits in the card BM for the cards spanned by this object.
1443       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1444 
1445       // Add the size of this object to the number of marked bytes.
1446       marked_bytes += (size_t)obj_sz * HeapWordSize;
1447 
1448       // Find the next marked object after this one.
1449       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1450     }
1451 
1452     // Mark the allocated-since-marking portion...
1453     HeapWord* top = hr->top();
1454     if (ntams < top) {
1455       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1456       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1457 
1458       // Note: if we're looking at the last region in heap - top
1459       // could be actually just beyond the end of the heap; end_idx
1460       // will then correspond to a (non-existent) card that is also
1461       // just beyond the heap.
1462       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1463         // end of object is not card aligned - increment to cover
1464         // all the cards spanned by the object
1465         end_idx += 1;
1466       }
1467       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1468 
1469       // This definitely means the region has live objects.
1470       set_bit_for_region(hr);
1471     }
1472 
1473     // Update the live region bitmap.
1474     if (marked_bytes > 0) {
1475       set_bit_for_region(hr);
1476     }
1477 
1478     // Set the marked bytes for the current region so that
1479     // it can be queried by a calling verification routine
1480     _region_marked_bytes = marked_bytes;
1481 
1482     return false;
1483   }
1484 
1485   size_t region_marked_bytes() const { return _region_marked_bytes; }
1486 };
1487 
1488 // Heap region closure used for verifying the counting data
1489 // that was accumulated concurrently and aggregated during
1490 // the remark pause. This closure is applied to the heap
1491 // regions during the STW cleanup pause.
1492 
1493 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1494   G1CollectedHeap* _g1h;
1495   ConcurrentMark* _cm;
1496   CalcLiveObjectsClosure _calc_cl;
1497   BitMap* _region_bm;   // Region BM to be verified
1498   BitMap* _card_bm;     // Card BM to be verified
1499   bool _verbose;        // verbose output?
1500 
1501   BitMap* _exp_region_bm; // Expected Region BM values
1502   BitMap* _exp_card_bm;   // Expected card BM values
1503 
1504   int _failures;
1505 
1506 public:
1507   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1508                                 BitMap* region_bm,
1509                                 BitMap* card_bm,
1510                                 BitMap* exp_region_bm,
1511                                 BitMap* exp_card_bm,
1512                                 bool verbose) :
1513     _g1h(g1h), _cm(g1h->concurrent_mark()),
1514     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1515     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1516     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1517     _failures(0) { }
1518 
1519   int failures() const { return _failures; }
1520 
1521   bool doHeapRegion(HeapRegion* hr) {
1522     if (hr->is_continues_humongous()) {
1523       // We will ignore these here and process them when their
1524       // associated "starts humongous" region is processed (see
1525       // set_bit_for_heap_region()). Note that we cannot rely on their
1526       // associated "starts humongous" region to have their bit set to
1527       // 1 since, due to the region chunking in the parallel region
1528       // iteration, a "continues humongous" region might be visited
1529       // before its associated "starts humongous".
1530       return false;
1531     }
1532 
1533     int failures = 0;
1534 
1535     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1536     // this region and set the corresponding bits in the expected region
1537     // and card bitmaps.
1538     bool res = _calc_cl.doHeapRegion(hr);
1539     assert(res == false, "should be continuing");
1540 
1541     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1542                     Mutex::_no_safepoint_check_flag);
1543 
1544     // Verify the marked bytes for this region.
1545     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1546     size_t act_marked_bytes = hr->next_marked_bytes();
1547 
1548     // We're not OK if expected marked bytes > actual marked bytes. It means
1549     // we have missed accounting some objects during the actual marking.
1550     if (exp_marked_bytes > act_marked_bytes) {
1551       if (_verbose) {
1552         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1553                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1554                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1555       }
1556       failures += 1;
1557     }
1558 
1559     // Verify the bit, for this region, in the actual and expected
1560     // (which was just calculated) region bit maps.
1561     // We're not OK if the bit in the calculated expected region
1562     // bitmap is set and the bit in the actual region bitmap is not.
1563     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1564 
1565     bool expected = _exp_region_bm->at(index);
1566     bool actual = _region_bm->at(index);
1567     if (expected && !actual) {
1568       if (_verbose) {
1569         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1570                                "expected: %s, actual: %s",
1571                                hr->hrm_index(),
1572                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1573       }
1574       failures += 1;
1575     }
1576 
1577     // Verify that the card bit maps for the cards spanned by the current
1578     // region match. We have an error if we have a set bit in the expected
1579     // bit map and the corresponding bit in the actual bitmap is not set.
1580 
1581     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1582     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1583 
1584     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1585       expected = _exp_card_bm->at(i);
1586       actual = _card_bm->at(i);
1587 
1588       if (expected && !actual) {
1589         if (_verbose) {
1590           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1591                                  "expected: %s, actual: %s",
1592                                  hr->hrm_index(), i,
1593                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1594         }
1595         failures += 1;
1596       }
1597     }
1598 
1599     if (failures > 0 && _verbose)  {
1600       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1601                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1602                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1603                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1604     }
1605 
1606     _failures += failures;
1607 
1608     // We could stop iteration over the heap when we
1609     // find the first violating region by returning true.
1610     return false;
1611   }
1612 };
1613 
1614 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1615 protected:
1616   G1CollectedHeap* _g1h;
1617   ConcurrentMark* _cm;
1618   BitMap* _actual_region_bm;
1619   BitMap* _actual_card_bm;
1620 
1621   uint    _n_workers;
1622 
1623   BitMap* _expected_region_bm;
1624   BitMap* _expected_card_bm;
1625 
1626   int  _failures;
1627   bool _verbose;
1628 
1629   HeapRegionClaimer _hrclaimer;
1630 
1631 public:
1632   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1633                             BitMap* region_bm, BitMap* card_bm,
1634                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1635     : AbstractGangTask("G1 verify final counting"),
1636       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1637       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1638       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1639       _failures(0), _verbose(false),
1640       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1641     assert(VerifyDuringGC, "don't call this otherwise");
1642     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1643     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1644 
1645     _verbose = _cm->verbose_medium();
1646   }
1647 
1648   void work(uint worker_id) {
1649     assert(worker_id < _n_workers, "invariant");
1650 
1651     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1652                                             _actual_region_bm, _actual_card_bm,
1653                                             _expected_region_bm,
1654                                             _expected_card_bm,
1655                                             _verbose);
1656 
1657     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1658 
1659     Atomic::add(verify_cl.failures(), &_failures);
1660   }
1661 
1662   int failures() const { return _failures; }
1663 };
1664 
1665 // Closure that finalizes the liveness counting data.
1666 // Used during the cleanup pause.
1667 // Sets the bits corresponding to the interval [NTAMS, top]
1668 // (which contains the implicitly live objects) in the
1669 // card liveness bitmap. Also sets the bit for each region,
1670 // containing live data, in the region liveness bitmap.
1671 
1672 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1673  public:
1674   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1675                               BitMap* region_bm,
1676                               BitMap* card_bm) :
1677     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1678 
1679   bool doHeapRegion(HeapRegion* hr) {
1680 
1681     if (hr->is_continues_humongous()) {
1682       // We will ignore these here and process them when their
1683       // associated "starts humongous" region is processed (see
1684       // set_bit_for_heap_region()). Note that we cannot rely on their
1685       // associated "starts humongous" region to have their bit set to
1686       // 1 since, due to the region chunking in the parallel region
1687       // iteration, a "continues humongous" region might be visited
1688       // before its associated "starts humongous".
1689       return false;
1690     }
1691 
1692     HeapWord* ntams = hr->next_top_at_mark_start();
1693     HeapWord* top   = hr->top();
1694 
1695     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1696 
1697     // Mark the allocated-since-marking portion...
1698     if (ntams < top) {
1699       // This definitely means the region has live objects.
1700       set_bit_for_region(hr);
1701 
1702       // Now set the bits in the card bitmap for [ntams, top)
1703       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1704       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1705 
1706       // Note: if we're looking at the last region in heap - top
1707       // could be actually just beyond the end of the heap; end_idx
1708       // will then correspond to a (non-existent) card that is also
1709       // just beyond the heap.
1710       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1711         // end of object is not card aligned - increment to cover
1712         // all the cards spanned by the object
1713         end_idx += 1;
1714       }
1715 
1716       assert(end_idx <= _card_bm->size(),
1717              err_msg("oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1718                      end_idx, _card_bm->size()));
1719       assert(start_idx < _card_bm->size(),
1720              err_msg("oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1721                      start_idx, _card_bm->size()));
1722 
1723       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1724     }
1725 
1726     // Set the bit for the region if it contains live data
1727     if (hr->next_marked_bytes() > 0) {
1728       set_bit_for_region(hr);
1729     }
1730 
1731     return false;
1732   }
1733 };
1734 
1735 class G1ParFinalCountTask: public AbstractGangTask {
1736 protected:
1737   G1CollectedHeap* _g1h;
1738   ConcurrentMark* _cm;
1739   BitMap* _actual_region_bm;
1740   BitMap* _actual_card_bm;
1741 
1742   uint    _n_workers;
1743   HeapRegionClaimer _hrclaimer;
1744 
1745 public:
1746   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1747     : AbstractGangTask("G1 final counting"),
1748       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1749       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1750       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1751   }
1752 
1753   void work(uint worker_id) {
1754     assert(worker_id < _n_workers, "invariant");
1755 
1756     FinalCountDataUpdateClosure final_update_cl(_g1h,
1757                                                 _actual_region_bm,
1758                                                 _actual_card_bm);
1759 
1760     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1761   }
1762 };
1763 
1764 class G1ParNoteEndTask;
1765 
1766 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1767   G1CollectedHeap* _g1;
1768   size_t _max_live_bytes;
1769   uint _regions_claimed;
1770   size_t _freed_bytes;
1771   FreeRegionList* _local_cleanup_list;
1772   HeapRegionSetCount _old_regions_removed;
1773   HeapRegionSetCount _humongous_regions_removed;
1774   HRRSCleanupTask* _hrrs_cleanup_task;
1775   double _claimed_region_time;
1776   double _max_region_time;
1777 
1778 public:
1779   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1780                              FreeRegionList* local_cleanup_list,
1781                              HRRSCleanupTask* hrrs_cleanup_task) :
1782     _g1(g1),
1783     _max_live_bytes(0), _regions_claimed(0),
1784     _freed_bytes(0),
1785     _claimed_region_time(0.0), _max_region_time(0.0),
1786     _local_cleanup_list(local_cleanup_list),
1787     _old_regions_removed(),
1788     _humongous_regions_removed(),
1789     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1790 
1791   size_t freed_bytes() { return _freed_bytes; }
1792   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1793   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1794 
1795   bool doHeapRegion(HeapRegion *hr) {
1796     if (hr->is_continues_humongous()) {
1797       return false;
1798     }
1799     // We use a claim value of zero here because all regions
1800     // were claimed with value 1 in the FinalCount task.
1801     _g1->reset_gc_time_stamps(hr);
1802     double start = os::elapsedTime();
1803     _regions_claimed++;
1804     hr->note_end_of_marking();
1805     _max_live_bytes += hr->max_live_bytes();
1806 
1807     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1808       _freed_bytes += hr->used();
1809       hr->set_containing_set(NULL);
1810       if (hr->is_humongous()) {
1811         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1812         _humongous_regions_removed.increment(1u, hr->capacity());
1813         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1814       } else {
1815         _old_regions_removed.increment(1u, hr->capacity());
1816         _g1->free_region(hr, _local_cleanup_list, true);
1817       }
1818     } else {
1819       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1820     }
1821 
1822     double region_time = (os::elapsedTime() - start);
1823     _claimed_region_time += region_time;
1824     if (region_time > _max_region_time) {
1825       _max_region_time = region_time;
1826     }
1827     return false;
1828   }
1829 
1830   size_t max_live_bytes() { return _max_live_bytes; }
1831   uint regions_claimed() { return _regions_claimed; }
1832   double claimed_region_time_sec() { return _claimed_region_time; }
1833   double max_region_time_sec() { return _max_region_time; }
1834 };
1835 
1836 class G1ParNoteEndTask: public AbstractGangTask {
1837   friend class G1NoteEndOfConcMarkClosure;
1838 
1839 protected:
1840   G1CollectedHeap* _g1h;
1841   size_t _max_live_bytes;
1842   size_t _freed_bytes;
1843   FreeRegionList* _cleanup_list;
1844   HeapRegionClaimer _hrclaimer;
1845 
1846 public:
1847   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1848       AbstractGangTask("G1 note end"), _g1h(g1h), _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1849   }
1850 
1851   void work(uint worker_id) {
1852     FreeRegionList local_cleanup_list("Local Cleanup List");
1853     HRRSCleanupTask hrrs_cleanup_task;
1854     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1855                                            &hrrs_cleanup_task);
1856     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1857     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1858 
1859     // Now update the lists
1860     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1861     {
1862       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1863       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1864       _max_live_bytes += g1_note_end.max_live_bytes();
1865       _freed_bytes += g1_note_end.freed_bytes();
1866 
1867       // If we iterate over the global cleanup list at the end of
1868       // cleanup to do this printing we will not guarantee to only
1869       // generate output for the newly-reclaimed regions (the list
1870       // might not be empty at the beginning of cleanup; we might
1871       // still be working on its previous contents). So we do the
1872       // printing here, before we append the new regions to the global
1873       // cleanup list.
1874 
1875       G1HRPrinter* hr_printer = _g1h->hr_printer();
1876       if (hr_printer->is_active()) {
1877         FreeRegionListIterator iter(&local_cleanup_list);
1878         while (iter.more_available()) {
1879           HeapRegion* hr = iter.get_next();
1880           hr_printer->cleanup(hr);
1881         }
1882       }
1883 
1884       _cleanup_list->add_ordered(&local_cleanup_list);
1885       assert(local_cleanup_list.is_empty(), "post-condition");
1886 
1887       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1888     }
1889   }
1890   size_t max_live_bytes() { return _max_live_bytes; }
1891   size_t freed_bytes() { return _freed_bytes; }
1892 };
1893 
1894 class G1ParScrubRemSetTask: public AbstractGangTask {
1895 protected:
1896   G1RemSet* _g1rs;
1897   BitMap* _region_bm;
1898   BitMap* _card_bm;
1899   HeapRegionClaimer _hrclaimer;
1900 
1901 public:
1902   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1903       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1904   }
1905 
1906   void work(uint worker_id) {
1907     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1908   }
1909 
1910 };
1911 
1912 void ConcurrentMark::cleanup() {
1913   // world is stopped at this checkpoint
1914   assert(SafepointSynchronize::is_at_safepoint(),
1915          "world should be stopped");
1916   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1917 
1918   // If a full collection has happened, we shouldn't do this.
1919   if (has_aborted()) {
1920     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1921     return;
1922   }
1923 
1924   g1h->verify_region_sets_optional();
1925 
1926   if (VerifyDuringGC) {
1927     HandleMark hm;  // handle scope
1928     g1h->prepare_for_verify();
1929     Universe::verify(VerifyOption_G1UsePrevMarking,
1930                      " VerifyDuringGC:(before)");
1931   }
1932   g1h->check_bitmaps("Cleanup Start");
1933 
1934   G1CollectorPolicy* g1p = g1h->g1_policy();
1935   g1p->record_concurrent_mark_cleanup_start();
1936 
1937   double start = os::elapsedTime();
1938 
1939   HeapRegionRemSet::reset_for_cleanup_tasks();
1940 
1941   uint n_workers;
1942 
1943   // Do counting once more with the world stopped for good measure.
1944   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1945 
1946   g1h->set_par_threads();
1947   n_workers = g1h->n_par_threads();
1948   assert(g1h->n_par_threads() == n_workers,
1949          "Should not have been reset");
1950   g1h->workers()->run_task(&g1_par_count_task);
1951   // Done with the parallel phase so reset to 0.
1952   g1h->set_par_threads(0);
1953 
1954   if (VerifyDuringGC) {
1955     // Verify that the counting data accumulated during marking matches
1956     // that calculated by walking the marking bitmap.
1957 
1958     // Bitmaps to hold expected values
1959     BitMap expected_region_bm(_region_bm.size(), true);
1960     BitMap expected_card_bm(_card_bm.size(), true);
1961 
1962     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1963                                                  &_region_bm,
1964                                                  &_card_bm,
1965                                                  &expected_region_bm,
1966                                                  &expected_card_bm);
1967 
1968     g1h->set_par_threads((int)n_workers);
1969     g1h->workers()->run_task(&g1_par_verify_task);
1970     // Done with the parallel phase so reset to 0.
1971     g1h->set_par_threads(0);
1972 
1973     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1974   }
1975 
1976   size_t start_used_bytes = g1h->used();
1977   g1h->set_marking_complete();
1978 
1979   double count_end = os::elapsedTime();
1980   double this_final_counting_time = (count_end - start);
1981   _total_counting_time += this_final_counting_time;
1982 
1983   if (G1PrintRegionLivenessInfo) {
1984     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1985     _g1h->heap_region_iterate(&cl);
1986   }
1987 
1988   // Install newly created mark bitMap as "prev".
1989   swapMarkBitMaps();
1990 
1991   g1h->reset_gc_time_stamp();
1992 
1993   // Note end of marking in all heap regions.
1994   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1995   g1h->set_par_threads((int)n_workers);
1996   g1h->workers()->run_task(&g1_par_note_end_task);
1997   g1h->set_par_threads(0);
1998   g1h->check_gc_time_stamps();
1999 
2000   if (!cleanup_list_is_empty()) {
2001     // The cleanup list is not empty, so we'll have to process it
2002     // concurrently. Notify anyone else that might be wanting free
2003     // regions that there will be more free regions coming soon.
2004     g1h->set_free_regions_coming();
2005   }
2006 
2007   // call below, since it affects the metric by which we sort the heap
2008   // regions.
2009   if (G1ScrubRemSets) {
2010     double rs_scrub_start = os::elapsedTime();
2011     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
2012     g1h->set_par_threads((int)n_workers);
2013     g1h->workers()->run_task(&g1_par_scrub_rs_task);
2014     g1h->set_par_threads(0);
2015 
2016     double rs_scrub_end = os::elapsedTime();
2017     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2018     _total_rs_scrub_time += this_rs_scrub_time;
2019   }
2020 
2021   // this will also free any regions totally full of garbage objects,
2022   // and sort the regions.
2023   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2024 
2025   // Statistics.
2026   double end = os::elapsedTime();
2027   _cleanup_times.add((end - start) * 1000.0);
2028 
2029   if (G1Log::fine()) {
2030     g1h->g1_policy()->print_heap_transition(start_used_bytes);
2031   }
2032 
2033   // Clean up will have freed any regions completely full of garbage.
2034   // Update the soft reference policy with the new heap occupancy.
2035   Universe::update_heap_info_at_gc();
2036 
2037   if (VerifyDuringGC) {
2038     HandleMark hm;  // handle scope
2039     g1h->prepare_for_verify();
2040     Universe::verify(VerifyOption_G1UsePrevMarking,
2041                      " VerifyDuringGC:(after)");
2042   }
2043 
2044   g1h->check_bitmaps("Cleanup End");
2045 
2046   g1h->verify_region_sets_optional();
2047 
2048   // We need to make this be a "collection" so any collection pause that
2049   // races with it goes around and waits for completeCleanup to finish.
2050   g1h->increment_total_collections();
2051 
2052   // Clean out dead classes and update Metaspace sizes.
2053   if (ClassUnloadingWithConcurrentMark) {
2054     ClassLoaderDataGraph::purge();
2055   }
2056   MetaspaceGC::compute_new_size();
2057 
2058   // We reclaimed old regions so we should calculate the sizes to make
2059   // sure we update the old gen/space data.
2060   g1h->g1mm()->update_sizes();
2061   g1h->allocation_context_stats().update_after_mark();
2062 
2063   g1h->trace_heap_after_concurrent_cycle();
2064 }
2065 
2066 void ConcurrentMark::completeCleanup() {
2067   if (has_aborted()) return;
2068 
2069   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2070 
2071   _cleanup_list.verify_optional();
2072   FreeRegionList tmp_free_list("Tmp Free List");
2073 
2074   if (G1ConcRegionFreeingVerbose) {
2075     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2076                            "cleanup list has %u entries",
2077                            _cleanup_list.length());
2078   }
2079 
2080   // No one else should be accessing the _cleanup_list at this point,
2081   // so it is not necessary to take any locks
2082   while (!_cleanup_list.is_empty()) {
2083     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2084     assert(hr != NULL, "Got NULL from a non-empty list");
2085     hr->par_clear();
2086     tmp_free_list.add_ordered(hr);
2087 
2088     // Instead of adding one region at a time to the secondary_free_list,
2089     // we accumulate them in the local list and move them a few at a
2090     // time. This also cuts down on the number of notify_all() calls
2091     // we do during this process. We'll also append the local list when
2092     // _cleanup_list is empty (which means we just removed the last
2093     // region from the _cleanup_list).
2094     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2095         _cleanup_list.is_empty()) {
2096       if (G1ConcRegionFreeingVerbose) {
2097         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2098                                "appending %u entries to the secondary_free_list, "
2099                                "cleanup list still has %u entries",
2100                                tmp_free_list.length(),
2101                                _cleanup_list.length());
2102       }
2103 
2104       {
2105         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2106         g1h->secondary_free_list_add(&tmp_free_list);
2107         SecondaryFreeList_lock->notify_all();
2108       }
2109 #ifndef PRODUCT
2110       if (G1StressConcRegionFreeing) {
2111         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2112           os::sleep(Thread::current(), (jlong) 1, false);
2113         }
2114       }
2115 #endif
2116     }
2117   }
2118   assert(tmp_free_list.is_empty(), "post-condition");
2119 }
2120 
2121 // Supporting Object and Oop closures for reference discovery
2122 // and processing in during marking
2123 
2124 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2125   HeapWord* addr = (HeapWord*)obj;
2126   return addr != NULL &&
2127          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2128 }
2129 
2130 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2131 // Uses the CMTask associated with a worker thread (for serial reference
2132 // processing the CMTask for worker 0 is used) to preserve (mark) and
2133 // trace referent objects.
2134 //
2135 // Using the CMTask and embedded local queues avoids having the worker
2136 // threads operating on the global mark stack. This reduces the risk
2137 // of overflowing the stack - which we would rather avoid at this late
2138 // state. Also using the tasks' local queues removes the potential
2139 // of the workers interfering with each other that could occur if
2140 // operating on the global stack.
2141 
2142 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2143   ConcurrentMark* _cm;
2144   CMTask*         _task;
2145   int             _ref_counter_limit;
2146   int             _ref_counter;
2147   bool            _is_serial;
2148  public:
2149   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2150     _cm(cm), _task(task), _is_serial(is_serial),
2151     _ref_counter_limit(G1RefProcDrainInterval) {
2152     assert(_ref_counter_limit > 0, "sanity");
2153     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2154     _ref_counter = _ref_counter_limit;
2155   }
2156 
2157   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2158   virtual void do_oop(      oop* p) { do_oop_work(p); }
2159 
2160   template <class T> void do_oop_work(T* p) {
2161     if (!_cm->has_overflown()) {
2162       oop obj = oopDesc::load_decode_heap_oop(p);
2163       if (_cm->verbose_high()) {
2164         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2165                                "*" PTR_FORMAT " = " PTR_FORMAT,
2166                                _task->worker_id(), p2i(p), p2i((void*) obj));
2167       }
2168 
2169       _task->deal_with_reference(obj);
2170       _ref_counter--;
2171 
2172       if (_ref_counter == 0) {
2173         // We have dealt with _ref_counter_limit references, pushing them
2174         // and objects reachable from them on to the local stack (and
2175         // possibly the global stack). Call CMTask::do_marking_step() to
2176         // process these entries.
2177         //
2178         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2179         // there's nothing more to do (i.e. we're done with the entries that
2180         // were pushed as a result of the CMTask::deal_with_reference() calls
2181         // above) or we overflow.
2182         //
2183         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2184         // flag while there may still be some work to do. (See the comment at
2185         // the beginning of CMTask::do_marking_step() for those conditions -
2186         // one of which is reaching the specified time target.) It is only
2187         // when CMTask::do_marking_step() returns without setting the
2188         // has_aborted() flag that the marking step has completed.
2189         do {
2190           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2191           _task->do_marking_step(mark_step_duration_ms,
2192                                  false      /* do_termination */,
2193                                  _is_serial);
2194         } while (_task->has_aborted() && !_cm->has_overflown());
2195         _ref_counter = _ref_counter_limit;
2196       }
2197     } else {
2198       if (_cm->verbose_high()) {
2199          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2200       }
2201     }
2202   }
2203 };
2204 
2205 // 'Drain' oop closure used by both serial and parallel reference processing.
2206 // Uses the CMTask associated with a given worker thread (for serial
2207 // reference processing the CMtask for worker 0 is used). Calls the
2208 // do_marking_step routine, with an unbelievably large timeout value,
2209 // to drain the marking data structures of the remaining entries
2210 // added by the 'keep alive' oop closure above.
2211 
2212 class G1CMDrainMarkingStackClosure: public VoidClosure {
2213   ConcurrentMark* _cm;
2214   CMTask*         _task;
2215   bool            _is_serial;
2216  public:
2217   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2218     _cm(cm), _task(task), _is_serial(is_serial) {
2219     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2220   }
2221 
2222   void do_void() {
2223     do {
2224       if (_cm->verbose_high()) {
2225         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2226                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2227       }
2228 
2229       // We call CMTask::do_marking_step() to completely drain the local
2230       // and global marking stacks of entries pushed by the 'keep alive'
2231       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2232       //
2233       // CMTask::do_marking_step() is called in a loop, which we'll exit
2234       // if there's nothing more to do (i.e. we've completely drained the
2235       // entries that were pushed as a a result of applying the 'keep alive'
2236       // closure to the entries on the discovered ref lists) or we overflow
2237       // the global marking stack.
2238       //
2239       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2240       // flag while there may still be some work to do. (See the comment at
2241       // the beginning of CMTask::do_marking_step() for those conditions -
2242       // one of which is reaching the specified time target.) It is only
2243       // when CMTask::do_marking_step() returns without setting the
2244       // has_aborted() flag that the marking step has completed.
2245 
2246       _task->do_marking_step(1000000000.0 /* something very large */,
2247                              true         /* do_termination */,
2248                              _is_serial);
2249     } while (_task->has_aborted() && !_cm->has_overflown());
2250   }
2251 };
2252 
2253 // Implementation of AbstractRefProcTaskExecutor for parallel
2254 // reference processing at the end of G1 concurrent marking
2255 
2256 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2257 private:
2258   G1CollectedHeap* _g1h;
2259   ConcurrentMark*  _cm;
2260   WorkGang*        _workers;
2261   uint             _active_workers;
2262 
2263 public:
2264   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2265                           ConcurrentMark* cm,
2266                           WorkGang* workers,
2267                           uint n_workers) :
2268     _g1h(g1h), _cm(cm),
2269     _workers(workers), _active_workers(n_workers) { }
2270 
2271   // Executes the given task using concurrent marking worker threads.
2272   virtual void execute(ProcessTask& task);
2273   virtual void execute(EnqueueTask& task);
2274 };
2275 
2276 class G1CMRefProcTaskProxy: public AbstractGangTask {
2277   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2278   ProcessTask&     _proc_task;
2279   G1CollectedHeap* _g1h;
2280   ConcurrentMark*  _cm;
2281 
2282 public:
2283   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2284                      G1CollectedHeap* g1h,
2285                      ConcurrentMark* cm) :
2286     AbstractGangTask("Process reference objects in parallel"),
2287     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2288     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2289     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2290   }
2291 
2292   virtual void work(uint worker_id) {
2293     ResourceMark rm;
2294     HandleMark hm;
2295     CMTask* task = _cm->task(worker_id);
2296     G1CMIsAliveClosure g1_is_alive(_g1h);
2297     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2298     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2299 
2300     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2301   }
2302 };
2303 
2304 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2305   assert(_workers != NULL, "Need parallel worker threads.");
2306   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2307 
2308   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2309 
2310   // We need to reset the concurrency level before each
2311   // proxy task execution, so that the termination protocol
2312   // and overflow handling in CMTask::do_marking_step() knows
2313   // how many workers to wait for.
2314   _cm->set_concurrency(_active_workers);
2315   _g1h->set_par_threads(_active_workers);
2316   _workers->run_task(&proc_task_proxy);
2317   _g1h->set_par_threads(0);
2318 }
2319 
2320 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2321   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2322   EnqueueTask& _enq_task;
2323 
2324 public:
2325   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2326     AbstractGangTask("Enqueue reference objects in parallel"),
2327     _enq_task(enq_task) { }
2328 
2329   virtual void work(uint worker_id) {
2330     _enq_task.work(worker_id);
2331   }
2332 };
2333 
2334 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2335   assert(_workers != NULL, "Need parallel worker threads.");
2336   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2337 
2338   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2339 
2340   // Not strictly necessary but...
2341   //
2342   // We need to reset the concurrency level before each
2343   // proxy task execution, so that the termination protocol
2344   // and overflow handling in CMTask::do_marking_step() knows
2345   // how many workers to wait for.
2346   _cm->set_concurrency(_active_workers);
2347   _g1h->set_par_threads(_active_workers);
2348   _workers->run_task(&enq_task_proxy);
2349   _g1h->set_par_threads(0);
2350 }
2351 
2352 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2353   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2354 }
2355 
2356 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2357   if (has_overflown()) {
2358     // Skip processing the discovered references if we have
2359     // overflown the global marking stack. Reference objects
2360     // only get discovered once so it is OK to not
2361     // de-populate the discovered reference lists. We could have,
2362     // but the only benefit would be that, when marking restarts,
2363     // less reference objects are discovered.
2364     return;
2365   }
2366 
2367   ResourceMark rm;
2368   HandleMark   hm;
2369 
2370   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2371 
2372   // Is alive closure.
2373   G1CMIsAliveClosure g1_is_alive(g1h);
2374 
2375   // Inner scope to exclude the cleaning of the string and symbol
2376   // tables from the displayed time.
2377   {
2378     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2379 
2380     ReferenceProcessor* rp = g1h->ref_processor_cm();
2381 
2382     // See the comment in G1CollectedHeap::ref_processing_init()
2383     // about how reference processing currently works in G1.
2384 
2385     // Set the soft reference policy
2386     rp->setup_policy(clear_all_soft_refs);
2387     assert(_markStack.isEmpty(), "mark stack should be empty");
2388 
2389     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2390     // in serial reference processing. Note these closures are also
2391     // used for serially processing (by the the current thread) the
2392     // JNI references during parallel reference processing.
2393     //
2394     // These closures do not need to synchronize with the worker
2395     // threads involved in parallel reference processing as these
2396     // instances are executed serially by the current thread (e.g.
2397     // reference processing is not multi-threaded and is thus
2398     // performed by the current thread instead of a gang worker).
2399     //
2400     // The gang tasks involved in parallel reference processing create
2401     // their own instances of these closures, which do their own
2402     // synchronization among themselves.
2403     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2404     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2405 
2406     // We need at least one active thread. If reference processing
2407     // is not multi-threaded we use the current (VMThread) thread,
2408     // otherwise we use the work gang from the G1CollectedHeap and
2409     // we utilize all the worker threads we can.
2410     bool processing_is_mt = rp->processing_is_mt();
2411     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2412     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2413 
2414     // Parallel processing task executor.
2415     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2416                                               g1h->workers(), active_workers);
2417     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2418 
2419     // Set the concurrency level. The phase was already set prior to
2420     // executing the remark task.
2421     set_concurrency(active_workers);
2422 
2423     // Set the degree of MT processing here.  If the discovery was done MT,
2424     // the number of threads involved during discovery could differ from
2425     // the number of active workers.  This is OK as long as the discovered
2426     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2427     rp->set_active_mt_degree(active_workers);
2428 
2429     // Process the weak references.
2430     const ReferenceProcessorStats& stats =
2431         rp->process_discovered_references(&g1_is_alive,
2432                                           &g1_keep_alive,
2433                                           &g1_drain_mark_stack,
2434                                           executor,
2435                                           g1h->gc_timer_cm(),
2436                                           concurrent_gc_id());
2437     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2438 
2439     // The do_oop work routines of the keep_alive and drain_marking_stack
2440     // oop closures will set the has_overflown flag if we overflow the
2441     // global marking stack.
2442 
2443     assert(_markStack.overflow() || _markStack.isEmpty(),
2444             "mark stack should be empty (unless it overflowed)");
2445 
2446     if (_markStack.overflow()) {
2447       // This should have been done already when we tried to push an
2448       // entry on to the global mark stack. But let's do it again.
2449       set_has_overflown();
2450     }
2451 
2452     assert(rp->num_q() == active_workers, "why not");
2453 
2454     rp->enqueue_discovered_references(executor);
2455 
2456     rp->verify_no_references_recorded();
2457     assert(!rp->discovery_enabled(), "Post condition");
2458   }
2459 
2460   if (has_overflown()) {
2461     // We can not trust g1_is_alive if the marking stack overflowed
2462     return;
2463   }
2464 
2465   assert(_markStack.isEmpty(), "Marking should have completed");
2466 
2467   // Unload Klasses, String, Symbols, Code Cache, etc.
2468   {
2469     G1CMTraceTime trace("Unloading", G1Log::finer());
2470 
2471     if (ClassUnloadingWithConcurrentMark) {
2472       bool purged_classes;
2473 
2474       {
2475         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2476         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2477       }
2478 
2479       {
2480         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2481         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2482       }
2483     }
2484 
2485     if (G1StringDedup::is_enabled()) {
2486       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2487       G1StringDedup::unlink(&g1_is_alive);
2488     }
2489   }
2490 }
2491 
2492 void ConcurrentMark::swapMarkBitMaps() {
2493   CMBitMapRO* temp = _prevMarkBitMap;
2494   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2495   _nextMarkBitMap  = (CMBitMap*)  temp;
2496 }
2497 
2498 // Closure for marking entries in SATB buffers.
2499 class CMSATBBufferClosure : public SATBBufferClosure {
2500 private:
2501   CMTask* _task;
2502   G1CollectedHeap* _g1h;
2503 
2504   // This is very similar to CMTask::deal_with_reference, but with
2505   // more relaxed requirements for the argument, so this must be more
2506   // circumspect about treating the argument as an object.
2507   void do_entry(void* entry) const {
2508     _task->increment_refs_reached();
2509     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2510     if (entry < hr->next_top_at_mark_start()) {
2511       // Until we get here, we don't know whether entry refers to a valid
2512       // object; it could instead have been a stale reference.
2513       oop obj = static_cast<oop>(entry);
2514       assert(obj->is_oop(true /* ignore mark word */),
2515              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2516       _task->make_reference_grey(obj, hr);
2517     }
2518   }
2519 
2520 public:
2521   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2522     : _task(task), _g1h(g1h) { }
2523 
2524   virtual void do_buffer(void** buffer, size_t size) {
2525     for (size_t i = 0; i < size; ++i) {
2526       do_entry(buffer[i]);
2527     }
2528   }
2529 };
2530 
2531 class G1RemarkThreadsClosure : public ThreadClosure {
2532   CMSATBBufferClosure _cm_satb_cl;
2533   G1CMOopClosure _cm_cl;
2534   MarkingCodeBlobClosure _code_cl;
2535   int _thread_parity;
2536 
2537  public:
2538   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2539     _cm_satb_cl(task, g1h),
2540     _cm_cl(g1h, g1h->concurrent_mark(), task),
2541     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2542     _thread_parity(Threads::thread_claim_parity()) {}
2543 
2544   void do_thread(Thread* thread) {
2545     if (thread->is_Java_thread()) {
2546       if (thread->claim_oops_do(true, _thread_parity)) {
2547         JavaThread* jt = (JavaThread*)thread;
2548 
2549         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2550         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2551         // * Alive if on the stack of an executing method
2552         // * Weakly reachable otherwise
2553         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2554         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2555         jt->nmethods_do(&_code_cl);
2556 
2557         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2558       }
2559     } else if (thread->is_VM_thread()) {
2560       if (thread->claim_oops_do(true, _thread_parity)) {
2561         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2562       }
2563     }
2564   }
2565 };
2566 
2567 class CMRemarkTask: public AbstractGangTask {
2568 private:
2569   ConcurrentMark* _cm;
2570 public:
2571   void work(uint worker_id) {
2572     // Since all available tasks are actually started, we should
2573     // only proceed if we're supposed to be active.
2574     if (worker_id < _cm->active_tasks()) {
2575       CMTask* task = _cm->task(worker_id);
2576       task->record_start_time();
2577       {
2578         ResourceMark rm;
2579         HandleMark hm;
2580 
2581         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2582         Threads::threads_do(&threads_f);
2583       }
2584 
2585       do {
2586         task->do_marking_step(1000000000.0 /* something very large */,
2587                               true         /* do_termination       */,
2588                               false        /* is_serial            */);
2589       } while (task->has_aborted() && !_cm->has_overflown());
2590       // If we overflow, then we do not want to restart. We instead
2591       // want to abort remark and do concurrent marking again.
2592       task->record_end_time();
2593     }
2594   }
2595 
2596   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2597     AbstractGangTask("Par Remark"), _cm(cm) {
2598     _cm->terminator()->reset_for_reuse(active_workers);
2599   }
2600 };
2601 
2602 void ConcurrentMark::checkpointRootsFinalWork() {
2603   ResourceMark rm;
2604   HandleMark   hm;
2605   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2606 
2607   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2608 
2609   g1h->ensure_parsability(false);
2610 
2611   StrongRootsScope srs;
2612   // this is remark, so we'll use up all active threads
2613   uint active_workers = g1h->workers()->active_workers();
2614   if (active_workers == 0) {
2615     assert(active_workers > 0, "Should have been set earlier");
2616     active_workers = (uint) ParallelGCThreads;
2617     g1h->workers()->set_active_workers(active_workers);
2618   }
2619   set_concurrency_and_phase(active_workers, false /* concurrent */);
2620   // Leave _parallel_marking_threads at it's
2621   // value originally calculated in the ConcurrentMark
2622   // constructor and pass values of the active workers
2623   // through the gang in the task.
2624 
2625   CMRemarkTask remarkTask(this, active_workers);
2626   // We will start all available threads, even if we decide that the
2627   // active_workers will be fewer. The extra ones will just bail out
2628   // immediately.
2629   g1h->set_par_threads(active_workers);
2630   g1h->workers()->run_task(&remarkTask);
2631   g1h->set_par_threads(0);
2632 
2633   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2634   guarantee(has_overflown() ||
2635             satb_mq_set.completed_buffers_num() == 0,
2636             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2637                     BOOL_TO_STR(has_overflown()),
2638                     satb_mq_set.completed_buffers_num()));
2639 
2640   print_stats();
2641 }
2642 
2643 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2644   // Note we are overriding the read-only view of the prev map here, via
2645   // the cast.
2646   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2647 }
2648 
2649 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2650   _nextMarkBitMap->clearRange(mr);
2651 }
2652 
2653 HeapRegion*
2654 ConcurrentMark::claim_region(uint worker_id) {
2655   // "checkpoint" the finger
2656   HeapWord* finger = _finger;
2657 
2658   // _heap_end will not change underneath our feet; it only changes at
2659   // yield points.
2660   while (finger < _heap_end) {
2661     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2662 
2663     // Note on how this code handles humongous regions. In the
2664     // normal case the finger will reach the start of a "starts
2665     // humongous" (SH) region. Its end will either be the end of the
2666     // last "continues humongous" (CH) region in the sequence, or the
2667     // standard end of the SH region (if the SH is the only region in
2668     // the sequence). That way claim_region() will skip over the CH
2669     // regions. However, there is a subtle race between a CM thread
2670     // executing this method and a mutator thread doing a humongous
2671     // object allocation. The two are not mutually exclusive as the CM
2672     // thread does not need to hold the Heap_lock when it gets
2673     // here. So there is a chance that claim_region() will come across
2674     // a free region that's in the progress of becoming a SH or a CH
2675     // region. In the former case, it will either
2676     //   a) Miss the update to the region's end, in which case it will
2677     //      visit every subsequent CH region, will find their bitmaps
2678     //      empty, and do nothing, or
2679     //   b) Will observe the update of the region's end (in which case
2680     //      it will skip the subsequent CH regions).
2681     // If it comes across a region that suddenly becomes CH, the
2682     // scenario will be similar to b). So, the race between
2683     // claim_region() and a humongous object allocation might force us
2684     // to do a bit of unnecessary work (due to some unnecessary bitmap
2685     // iterations) but it should not introduce and correctness issues.
2686     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2687 
2688     // Above heap_region_containing_raw may return NULL as we always scan claim
2689     // until the end of the heap. In this case, just jump to the next region.
2690     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2691 
2692     // Is the gap between reading the finger and doing the CAS too long?
2693     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2694     if (res == finger && curr_region != NULL) {
2695       // we succeeded
2696       HeapWord*   bottom        = curr_region->bottom();
2697       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2698 
2699       if (verbose_low()) {
2700         gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
2701                                "[" PTR_FORMAT ", " PTR_FORMAT "), "
2702                                "limit = " PTR_FORMAT,
2703                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2704       }
2705 
2706       // notice that _finger == end cannot be guaranteed here since,
2707       // someone else might have moved the finger even further
2708       assert(_finger >= end, "the finger should have moved forward");
2709 
2710       if (verbose_low()) {
2711         gclog_or_tty->print_cr("[%u] we were successful with region = "
2712                                PTR_FORMAT, worker_id, p2i(curr_region));
2713       }
2714 
2715       if (limit > bottom) {
2716         if (verbose_low()) {
2717           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
2718                                  "returning it ", worker_id, p2i(curr_region));
2719         }
2720         return curr_region;
2721       } else {
2722         assert(limit == bottom,
2723                "the region limit should be at bottom");
2724         if (verbose_low()) {
2725           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
2726                                  "returning NULL", worker_id, p2i(curr_region));
2727         }
2728         // we return NULL and the caller should try calling
2729         // claim_region() again.
2730         return NULL;
2731       }
2732     } else {
2733       assert(_finger > finger, "the finger should have moved forward");
2734       if (verbose_low()) {
2735         if (curr_region == NULL) {
2736           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2737                                  "global finger = " PTR_FORMAT ", "
2738                                  "our finger = " PTR_FORMAT,
2739                                  worker_id, p2i(_finger), p2i(finger));
2740         } else {
2741           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2742                                  "global finger = " PTR_FORMAT ", "
2743                                  "our finger = " PTR_FORMAT,
2744                                  worker_id, p2i(_finger), p2i(finger));
2745         }
2746       }
2747 
2748       // read it again
2749       finger = _finger;
2750     }
2751   }
2752 
2753   return NULL;
2754 }
2755 
2756 #ifndef PRODUCT
2757 enum VerifyNoCSetOopsPhase {
2758   VerifyNoCSetOopsStack,
2759   VerifyNoCSetOopsQueues
2760 };
2761 
2762 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2763 private:
2764   G1CollectedHeap* _g1h;
2765   VerifyNoCSetOopsPhase _phase;
2766   int _info;
2767 
2768   const char* phase_str() {
2769     switch (_phase) {
2770     case VerifyNoCSetOopsStack:         return "Stack";
2771     case VerifyNoCSetOopsQueues:        return "Queue";
2772     default:                            ShouldNotReachHere();
2773     }
2774     return NULL;
2775   }
2776 
2777   void do_object_work(oop obj) {
2778     guarantee(!_g1h->obj_in_cs(obj),
2779               err_msg("obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2780                       p2i((void*) obj), phase_str(), _info));
2781   }
2782 
2783 public:
2784   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2785 
2786   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2787     _phase = phase;
2788     _info = info;
2789   }
2790 
2791   virtual void do_oop(oop* p) {
2792     oop obj = oopDesc::load_decode_heap_oop(p);
2793     do_object_work(obj);
2794   }
2795 
2796   virtual void do_oop(narrowOop* p) {
2797     // We should not come across narrow oops while scanning marking
2798     // stacks
2799     ShouldNotReachHere();
2800   }
2801 
2802   virtual void do_object(oop obj) {
2803     do_object_work(obj);
2804   }
2805 };
2806 
2807 void ConcurrentMark::verify_no_cset_oops() {
2808   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2809   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2810     return;
2811   }
2812 
2813   VerifyNoCSetOopsClosure cl;
2814 
2815   // Verify entries on the global mark stack
2816   cl.set_phase(VerifyNoCSetOopsStack);
2817   _markStack.oops_do(&cl);
2818 
2819   // Verify entries on the task queues
2820   for (uint i = 0; i < _max_worker_id; i += 1) {
2821     cl.set_phase(VerifyNoCSetOopsQueues, i);
2822     CMTaskQueue* queue = _task_queues->queue(i);
2823     queue->oops_do(&cl);
2824   }
2825 
2826   // Verify the global finger
2827   HeapWord* global_finger = finger();
2828   if (global_finger != NULL && global_finger < _heap_end) {
2829     // The global finger always points to a heap region boundary. We
2830     // use heap_region_containing_raw() to get the containing region
2831     // given that the global finger could be pointing to a free region
2832     // which subsequently becomes continues humongous. If that
2833     // happens, heap_region_containing() will return the bottom of the
2834     // corresponding starts humongous region and the check below will
2835     // not hold any more.
2836     // Since we always iterate over all regions, we might get a NULL HeapRegion
2837     // here.
2838     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2839     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2840               err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT,
2841                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2842   }
2843 
2844   // Verify the task fingers
2845   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2846   for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2847     CMTask* task = _tasks[i];
2848     HeapWord* task_finger = task->finger();
2849     if (task_finger != NULL && task_finger < _heap_end) {
2850       // See above note on the global finger verification.
2851       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2852       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2853                 !task_hr->in_collection_set(),
2854                 err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT,
2855                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2856     }
2857   }
2858 }
2859 #endif // PRODUCT
2860 
2861 // Aggregate the counting data that was constructed concurrently
2862 // with marking.
2863 class AggregateCountDataHRClosure: public HeapRegionClosure {
2864   G1CollectedHeap* _g1h;
2865   ConcurrentMark* _cm;
2866   CardTableModRefBS* _ct_bs;
2867   BitMap* _cm_card_bm;
2868   uint _max_worker_id;
2869 
2870  public:
2871   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2872                               BitMap* cm_card_bm,
2873                               uint max_worker_id) :
2874     _g1h(g1h), _cm(g1h->concurrent_mark()),
2875     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2876     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2877 
2878   bool doHeapRegion(HeapRegion* hr) {
2879     if (hr->is_continues_humongous()) {
2880       // We will ignore these here and process them when their
2881       // associated "starts humongous" region is processed.
2882       // Note that we cannot rely on their associated
2883       // "starts humongous" region to have their bit set to 1
2884       // since, due to the region chunking in the parallel region
2885       // iteration, a "continues humongous" region might be visited
2886       // before its associated "starts humongous".
2887       return false;
2888     }
2889 
2890     HeapWord* start = hr->bottom();
2891     HeapWord* limit = hr->next_top_at_mark_start();
2892     HeapWord* end = hr->end();
2893 
2894     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2895            err_msg("Preconditions not met - "
2896                    "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2897                    "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2898                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2899 
2900     assert(hr->next_marked_bytes() == 0, "Precondition");
2901 
2902     if (start == limit) {
2903       // NTAMS of this region has not been set so nothing to do.
2904       return false;
2905     }
2906 
2907     // 'start' should be in the heap.
2908     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2909     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2910     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2911 
2912     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2913     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2914     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2915 
2916     // If ntams is not card aligned then we bump card bitmap index
2917     // for limit so that we get the all the cards spanned by
2918     // the object ending at ntams.
2919     // Note: if this is the last region in the heap then ntams
2920     // could be actually just beyond the end of the the heap;
2921     // limit_idx will then  correspond to a (non-existent) card
2922     // that is also outside the heap.
2923     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2924       limit_idx += 1;
2925     }
2926 
2927     assert(limit_idx <= end_idx, "or else use atomics");
2928 
2929     // Aggregate the "stripe" in the count data associated with hr.
2930     uint hrm_index = hr->hrm_index();
2931     size_t marked_bytes = 0;
2932 
2933     for (uint i = 0; i < _max_worker_id; i += 1) {
2934       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2935       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2936 
2937       // Fetch the marked_bytes in this region for task i and
2938       // add it to the running total for this region.
2939       marked_bytes += marked_bytes_array[hrm_index];
2940 
2941       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2942       // into the global card bitmap.
2943       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2944 
2945       while (scan_idx < limit_idx) {
2946         assert(task_card_bm->at(scan_idx) == true, "should be");
2947         _cm_card_bm->set_bit(scan_idx);
2948         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2949 
2950         // BitMap::get_next_one_offset() can handle the case when
2951         // its left_offset parameter is greater than its right_offset
2952         // parameter. It does, however, have an early exit if
2953         // left_offset == right_offset. So let's limit the value
2954         // passed in for left offset here.
2955         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2956         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2957       }
2958     }
2959 
2960     // Update the marked bytes for this region.
2961     hr->add_to_marked_bytes(marked_bytes);
2962 
2963     // Next heap region
2964     return false;
2965   }
2966 };
2967 
2968 class G1AggregateCountDataTask: public AbstractGangTask {
2969 protected:
2970   G1CollectedHeap* _g1h;
2971   ConcurrentMark* _cm;
2972   BitMap* _cm_card_bm;
2973   uint _max_worker_id;
2974   uint _active_workers;
2975   HeapRegionClaimer _hrclaimer;
2976 
2977 public:
2978   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2979                            ConcurrentMark* cm,
2980                            BitMap* cm_card_bm,
2981                            uint max_worker_id,
2982                            uint n_workers) :
2983       AbstractGangTask("Count Aggregation"),
2984       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2985       _max_worker_id(max_worker_id),
2986       _active_workers(n_workers),
2987       _hrclaimer(_active_workers) {
2988   }
2989 
2990   void work(uint worker_id) {
2991     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2992 
2993     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2994   }
2995 };
2996 
2997 
2998 void ConcurrentMark::aggregate_count_data() {
2999   uint n_workers = _g1h->workers()->active_workers();
3000 
3001   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3002                                            _max_worker_id, n_workers);
3003 
3004   _g1h->set_par_threads(n_workers);
3005   _g1h->workers()->run_task(&g1_par_agg_task);
3006   _g1h->set_par_threads(0);
3007 }
3008 
3009 // Clear the per-worker arrays used to store the per-region counting data
3010 void ConcurrentMark::clear_all_count_data() {
3011   // Clear the global card bitmap - it will be filled during
3012   // liveness count aggregation (during remark) and the
3013   // final counting task.
3014   _card_bm.clear();
3015 
3016   // Clear the global region bitmap - it will be filled as part
3017   // of the final counting task.
3018   _region_bm.clear();
3019 
3020   uint max_regions = _g1h->max_regions();
3021   assert(_max_worker_id > 0, "uninitialized");
3022 
3023   for (uint i = 0; i < _max_worker_id; i += 1) {
3024     BitMap* task_card_bm = count_card_bitmap_for(i);
3025     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3026 
3027     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3028     assert(marked_bytes_array != NULL, "uninitialized");
3029 
3030     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3031     task_card_bm->clear();
3032   }
3033 }
3034 
3035 void ConcurrentMark::print_stats() {
3036   if (verbose_stats()) {
3037     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3038     for (size_t i = 0; i < _active_tasks; ++i) {
3039       _tasks[i]->print_stats();
3040       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3041     }
3042   }
3043 }
3044 
3045 // abandon current marking iteration due to a Full GC
3046 void ConcurrentMark::abort() {
3047   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3048   // concurrent bitmap clearing.
3049   _nextMarkBitMap->clearAll();
3050 
3051   // Note we cannot clear the previous marking bitmap here
3052   // since VerifyDuringGC verifies the objects marked during
3053   // a full GC against the previous bitmap.
3054 
3055   // Clear the liveness counting data
3056   clear_all_count_data();
3057   // Empty mark stack
3058   reset_marking_state();
3059   for (uint i = 0; i < _max_worker_id; ++i) {
3060     _tasks[i]->clear_region_fields();
3061   }
3062   _first_overflow_barrier_sync.abort();
3063   _second_overflow_barrier_sync.abort();
3064   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3065   if (!gc_id.is_undefined()) {
3066     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3067     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3068     _aborted_gc_id = gc_id;
3069    }
3070   _has_aborted = true;
3071 
3072   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3073   satb_mq_set.abandon_partial_marking();
3074   // This can be called either during or outside marking, we'll read
3075   // the expected_active value from the SATB queue set.
3076   satb_mq_set.set_active_all_threads(
3077                                  false, /* new active value */
3078                                  satb_mq_set.is_active() /* expected_active */);
3079 
3080   _g1h->trace_heap_after_concurrent_cycle();
3081   _g1h->register_concurrent_cycle_end();
3082 }
3083 
3084 const GCId& ConcurrentMark::concurrent_gc_id() {
3085   if (has_aborted()) {
3086     return _aborted_gc_id;
3087   }
3088   return _g1h->gc_tracer_cm()->gc_id();
3089 }
3090 
3091 static void print_ms_time_info(const char* prefix, const char* name,
3092                                NumberSeq& ns) {
3093   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3094                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3095   if (ns.num() > 0) {
3096     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3097                            prefix, ns.sd(), ns.maximum());
3098   }
3099 }
3100 
3101 void ConcurrentMark::print_summary_info() {
3102   gclog_or_tty->print_cr(" Concurrent marking:");
3103   print_ms_time_info("  ", "init marks", _init_times);
3104   print_ms_time_info("  ", "remarks", _remark_times);
3105   {
3106     print_ms_time_info("     ", "final marks", _remark_mark_times);
3107     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3108 
3109   }
3110   print_ms_time_info("  ", "cleanups", _cleanup_times);
3111   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3112                          _total_counting_time,
3113                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3114                           (double)_cleanup_times.num()
3115                          : 0.0));
3116   if (G1ScrubRemSets) {
3117     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3118                            _total_rs_scrub_time,
3119                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3120                             (double)_cleanup_times.num()
3121                            : 0.0));
3122   }
3123   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3124                          (_init_times.sum() + _remark_times.sum() +
3125                           _cleanup_times.sum())/1000.0);
3126   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3127                 "(%8.2f s marking).",
3128                 cmThread()->vtime_accum(),
3129                 cmThread()->vtime_mark_accum());
3130 }
3131 
3132 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3133   _parallel_workers->print_worker_threads_on(st);
3134 }
3135 
3136 void ConcurrentMark::print_on_error(outputStream* st) const {
3137   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3138       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3139   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3140   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3141 }
3142 
3143 // We take a break if someone is trying to stop the world.
3144 bool ConcurrentMark::do_yield_check(uint worker_id) {
3145   if (SuspendibleThreadSet::should_yield()) {
3146     if (worker_id == 0) {
3147       _g1h->g1_policy()->record_concurrent_pause();
3148     }
3149     SuspendibleThreadSet::yield();
3150     return true;
3151   } else {
3152     return false;
3153   }
3154 }
3155 
3156 #ifndef PRODUCT
3157 // for debugging purposes
3158 void ConcurrentMark::print_finger() {
3159   gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
3160                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3161   for (uint i = 0; i < _max_worker_id; ++i) {
3162     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3163   }
3164   gclog_or_tty->cr();
3165 }
3166 #endif
3167 
3168 template<bool scan>
3169 inline void CMTask::process_grey_object(oop obj) {
3170   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3171   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3172 
3173   if (_cm->verbose_high()) {
3174     gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3175                            _worker_id, p2i((void*) obj));
3176   }
3177 
3178   size_t obj_size = obj->size();
3179   _words_scanned += obj_size;
3180 
3181   if (scan) {
3182     obj->oop_iterate(_cm_oop_closure);
3183   }
3184   statsOnly( ++_objs_scanned );
3185   check_limits();
3186 }
3187 
3188 template void CMTask::process_grey_object<true>(oop);
3189 template void CMTask::process_grey_object<false>(oop);
3190 
3191 // Closure for iteration over bitmaps
3192 class CMBitMapClosure : public BitMapClosure {
3193 private:
3194   // the bitmap that is being iterated over
3195   CMBitMap*                   _nextMarkBitMap;
3196   ConcurrentMark*             _cm;
3197   CMTask*                     _task;
3198 
3199 public:
3200   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3201     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3202 
3203   bool do_bit(size_t offset) {
3204     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3205     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3206     assert( addr < _cm->finger(), "invariant");
3207 
3208     statsOnly( _task->increase_objs_found_on_bitmap() );
3209     assert(addr >= _task->finger(), "invariant");
3210 
3211     // We move that task's local finger along.
3212     _task->move_finger_to(addr);
3213 
3214     _task->scan_object(oop(addr));
3215     // we only partially drain the local queue and global stack
3216     _task->drain_local_queue(true);
3217     _task->drain_global_stack(true);
3218 
3219     // if the has_aborted flag has been raised, we need to bail out of
3220     // the iteration
3221     return !_task->has_aborted();
3222   }
3223 };
3224 
3225 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3226                                ConcurrentMark* cm,
3227                                CMTask* task)
3228   : _g1h(g1h), _cm(cm), _task(task) {
3229   assert(_ref_processor == NULL, "should be initialized to NULL");
3230 
3231   if (G1UseConcMarkReferenceProcessing) {
3232     _ref_processor = g1h->ref_processor_cm();
3233     assert(_ref_processor != NULL, "should not be NULL");
3234   }
3235 }
3236 
3237 void CMTask::setup_for_region(HeapRegion* hr) {
3238   assert(hr != NULL,
3239         "claim_region() should have filtered out NULL regions");
3240   assert(!hr->is_continues_humongous(),
3241         "claim_region() should have filtered out continues humongous regions");
3242 
3243   if (_cm->verbose_low()) {
3244     gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3245                            _worker_id, p2i(hr));
3246   }
3247 
3248   _curr_region  = hr;
3249   _finger       = hr->bottom();
3250   update_region_limit();
3251 }
3252 
3253 void CMTask::update_region_limit() {
3254   HeapRegion* hr            = _curr_region;
3255   HeapWord* bottom          = hr->bottom();
3256   HeapWord* limit           = hr->next_top_at_mark_start();
3257 
3258   if (limit == bottom) {
3259     if (_cm->verbose_low()) {
3260       gclog_or_tty->print_cr("[%u] found an empty region "
3261                              "[" PTR_FORMAT ", " PTR_FORMAT ")",
3262                              _worker_id, p2i(bottom), p2i(limit));
3263     }
3264     // The region was collected underneath our feet.
3265     // We set the finger to bottom to ensure that the bitmap
3266     // iteration that will follow this will not do anything.
3267     // (this is not a condition that holds when we set the region up,
3268     // as the region is not supposed to be empty in the first place)
3269     _finger = bottom;
3270   } else if (limit >= _region_limit) {
3271     assert(limit >= _finger, "peace of mind");
3272   } else {
3273     assert(limit < _region_limit, "only way to get here");
3274     // This can happen under some pretty unusual circumstances.  An
3275     // evacuation pause empties the region underneath our feet (NTAMS
3276     // at bottom). We then do some allocation in the region (NTAMS
3277     // stays at bottom), followed by the region being used as a GC
3278     // alloc region (NTAMS will move to top() and the objects
3279     // originally below it will be grayed). All objects now marked in
3280     // the region are explicitly grayed, if below the global finger,
3281     // and we do not need in fact to scan anything else. So, we simply
3282     // set _finger to be limit to ensure that the bitmap iteration
3283     // doesn't do anything.
3284     _finger = limit;
3285   }
3286 
3287   _region_limit = limit;
3288 }
3289 
3290 void CMTask::giveup_current_region() {
3291   assert(_curr_region != NULL, "invariant");
3292   if (_cm->verbose_low()) {
3293     gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3294                            _worker_id, p2i(_curr_region));
3295   }
3296   clear_region_fields();
3297 }
3298 
3299 void CMTask::clear_region_fields() {
3300   // Values for these three fields that indicate that we're not
3301   // holding on to a region.
3302   _curr_region   = NULL;
3303   _finger        = NULL;
3304   _region_limit  = NULL;
3305 }
3306 
3307 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3308   if (cm_oop_closure == NULL) {
3309     assert(_cm_oop_closure != NULL, "invariant");
3310   } else {
3311     assert(_cm_oop_closure == NULL, "invariant");
3312   }
3313   _cm_oop_closure = cm_oop_closure;
3314 }
3315 
3316 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3317   guarantee(nextMarkBitMap != NULL, "invariant");
3318 
3319   if (_cm->verbose_low()) {
3320     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3321   }
3322 
3323   _nextMarkBitMap                = nextMarkBitMap;
3324   clear_region_fields();
3325 
3326   _calls                         = 0;
3327   _elapsed_time_ms               = 0.0;
3328   _termination_time_ms           = 0.0;
3329   _termination_start_time_ms     = 0.0;
3330 
3331 #if _MARKING_STATS_
3332   _aborted                       = 0;
3333   _aborted_overflow              = 0;
3334   _aborted_cm_aborted            = 0;
3335   _aborted_yield                 = 0;
3336   _aborted_timed_out             = 0;
3337   _aborted_satb                  = 0;
3338   _aborted_termination           = 0;
3339   _steal_attempts                = 0;
3340   _steals                        = 0;
3341   _local_pushes                  = 0;
3342   _local_pops                    = 0;
3343   _local_max_size                = 0;
3344   _objs_scanned                  = 0;
3345   _global_pushes                 = 0;
3346   _global_pops                   = 0;
3347   _global_max_size               = 0;
3348   _global_transfers_to           = 0;
3349   _global_transfers_from         = 0;
3350   _regions_claimed               = 0;
3351   _objs_found_on_bitmap          = 0;
3352   _satb_buffers_processed        = 0;
3353 #endif // _MARKING_STATS_
3354 }
3355 
3356 bool CMTask::should_exit_termination() {
3357   regular_clock_call();
3358   // This is called when we are in the termination protocol. We should
3359   // quit if, for some reason, this task wants to abort or the global
3360   // stack is not empty (this means that we can get work from it).
3361   return !_cm->mark_stack_empty() || has_aborted();
3362 }
3363 
3364 void CMTask::reached_limit() {
3365   assert(_words_scanned >= _words_scanned_limit ||
3366          _refs_reached >= _refs_reached_limit ,
3367          "shouldn't have been called otherwise");
3368   regular_clock_call();
3369 }
3370 
3371 void CMTask::regular_clock_call() {
3372   if (has_aborted()) return;
3373 
3374   // First, we need to recalculate the words scanned and refs reached
3375   // limits for the next clock call.
3376   recalculate_limits();
3377 
3378   // During the regular clock call we do the following
3379 
3380   // (1) If an overflow has been flagged, then we abort.
3381   if (_cm->has_overflown()) {
3382     set_has_aborted();
3383     return;
3384   }
3385 
3386   // If we are not concurrent (i.e. we're doing remark) we don't need
3387   // to check anything else. The other steps are only needed during
3388   // the concurrent marking phase.
3389   if (!concurrent()) return;
3390 
3391   // (2) If marking has been aborted for Full GC, then we also abort.
3392   if (_cm->has_aborted()) {
3393     set_has_aborted();
3394     statsOnly( ++_aborted_cm_aborted );
3395     return;
3396   }
3397 
3398   double curr_time_ms = os::elapsedVTime() * 1000.0;
3399 
3400   // (3) If marking stats are enabled, then we update the step history.
3401 #if _MARKING_STATS_
3402   if (_words_scanned >= _words_scanned_limit) {
3403     ++_clock_due_to_scanning;
3404   }
3405   if (_refs_reached >= _refs_reached_limit) {
3406     ++_clock_due_to_marking;
3407   }
3408 
3409   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3410   _interval_start_time_ms = curr_time_ms;
3411   _all_clock_intervals_ms.add(last_interval_ms);
3412 
3413   if (_cm->verbose_medium()) {
3414       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3415                         "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s",
3416                         _worker_id, last_interval_ms,
3417                         _words_scanned,
3418                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3419                         _refs_reached,
3420                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3421   }
3422 #endif // _MARKING_STATS_
3423 
3424   // (4) We check whether we should yield. If we have to, then we abort.
3425   if (SuspendibleThreadSet::should_yield()) {
3426     // We should yield. To do this we abort the task. The caller is
3427     // responsible for yielding.
3428     set_has_aborted();
3429     statsOnly( ++_aborted_yield );
3430     return;
3431   }
3432 
3433   // (5) We check whether we've reached our time quota. If we have,
3434   // then we abort.
3435   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3436   if (elapsed_time_ms > _time_target_ms) {
3437     set_has_aborted();
3438     _has_timed_out = true;
3439     statsOnly( ++_aborted_timed_out );
3440     return;
3441   }
3442 
3443   // (6) Finally, we check whether there are enough completed STAB
3444   // buffers available for processing. If there are, we abort.
3445   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3446   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3447     if (_cm->verbose_low()) {
3448       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3449                              _worker_id);
3450     }
3451     // we do need to process SATB buffers, we'll abort and restart
3452     // the marking task to do so
3453     set_has_aborted();
3454     statsOnly( ++_aborted_satb );
3455     return;
3456   }
3457 }
3458 
3459 void CMTask::recalculate_limits() {
3460   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3461   _words_scanned_limit      = _real_words_scanned_limit;
3462 
3463   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3464   _refs_reached_limit       = _real_refs_reached_limit;
3465 }
3466 
3467 void CMTask::decrease_limits() {
3468   // This is called when we believe that we're going to do an infrequent
3469   // operation which will increase the per byte scanned cost (i.e. move
3470   // entries to/from the global stack). It basically tries to decrease the
3471   // scanning limit so that the clock is called earlier.
3472 
3473   if (_cm->verbose_medium()) {
3474     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3475   }
3476 
3477   _words_scanned_limit = _real_words_scanned_limit -
3478     3 * words_scanned_period / 4;
3479   _refs_reached_limit  = _real_refs_reached_limit -
3480     3 * refs_reached_period / 4;
3481 }
3482 
3483 void CMTask::move_entries_to_global_stack() {
3484   // local array where we'll store the entries that will be popped
3485   // from the local queue
3486   oop buffer[global_stack_transfer_size];
3487 
3488   int n = 0;
3489   oop obj;
3490   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3491     buffer[n] = obj;
3492     ++n;
3493   }
3494 
3495   if (n > 0) {
3496     // we popped at least one entry from the local queue
3497 
3498     statsOnly( ++_global_transfers_to; _local_pops += n );
3499 
3500     if (!_cm->mark_stack_push(buffer, n)) {
3501       if (_cm->verbose_low()) {
3502         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3503                                _worker_id);
3504       }
3505       set_has_aborted();
3506     } else {
3507       // the transfer was successful
3508 
3509       if (_cm->verbose_medium()) {
3510         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3511                                _worker_id, n);
3512       }
3513       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3514                  if (tmp_size > _global_max_size) {
3515                    _global_max_size = tmp_size;
3516                  }
3517                  _global_pushes += n );
3518     }
3519   }
3520 
3521   // this operation was quite expensive, so decrease the limits
3522   decrease_limits();
3523 }
3524 
3525 void CMTask::get_entries_from_global_stack() {
3526   // local array where we'll store the entries that will be popped
3527   // from the global stack.
3528   oop buffer[global_stack_transfer_size];
3529   int n;
3530   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3531   assert(n <= global_stack_transfer_size,
3532          "we should not pop more than the given limit");
3533   if (n > 0) {
3534     // yes, we did actually pop at least one entry
3535 
3536     statsOnly( ++_global_transfers_from; _global_pops += n );
3537     if (_cm->verbose_medium()) {
3538       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3539                              _worker_id, n);
3540     }
3541     for (int i = 0; i < n; ++i) {
3542       bool success = _task_queue->push(buffer[i]);
3543       // We only call this when the local queue is empty or under a
3544       // given target limit. So, we do not expect this push to fail.
3545       assert(success, "invariant");
3546     }
3547 
3548     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3549                if (tmp_size > _local_max_size) {
3550                  _local_max_size = tmp_size;
3551                }
3552                _local_pushes += n );
3553   }
3554 
3555   // this operation was quite expensive, so decrease the limits
3556   decrease_limits();
3557 }
3558 
3559 void CMTask::drain_local_queue(bool partially) {
3560   if (has_aborted()) return;
3561 
3562   // Decide what the target size is, depending whether we're going to
3563   // drain it partially (so that other tasks can steal if they run out
3564   // of things to do) or totally (at the very end).
3565   size_t target_size;
3566   if (partially) {
3567     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3568   } else {
3569     target_size = 0;
3570   }
3571 
3572   if (_task_queue->size() > target_size) {
3573     if (_cm->verbose_high()) {
3574       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3575                              _worker_id, target_size);
3576     }
3577 
3578     oop obj;
3579     bool ret = _task_queue->pop_local(obj);
3580     while (ret) {
3581       statsOnly( ++_local_pops );
3582 
3583       if (_cm->verbose_high()) {
3584         gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3585                                p2i((void*) obj));
3586       }
3587 
3588       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3589       assert(!_g1h->is_on_master_free_list(
3590                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3591 
3592       scan_object(obj);
3593 
3594       if (_task_queue->size() <= target_size || has_aborted()) {
3595         ret = false;
3596       } else {
3597         ret = _task_queue->pop_local(obj);
3598       }
3599     }
3600 
3601     if (_cm->verbose_high()) {
3602       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3603                              _worker_id, _task_queue->size());
3604     }
3605   }
3606 }
3607 
3608 void CMTask::drain_global_stack(bool partially) {
3609   if (has_aborted()) return;
3610 
3611   // We have a policy to drain the local queue before we attempt to
3612   // drain the global stack.
3613   assert(partially || _task_queue->size() == 0, "invariant");
3614 
3615   // Decide what the target size is, depending whether we're going to
3616   // drain it partially (so that other tasks can steal if they run out
3617   // of things to do) or totally (at the very end).  Notice that,
3618   // because we move entries from the global stack in chunks or
3619   // because another task might be doing the same, we might in fact
3620   // drop below the target. But, this is not a problem.
3621   size_t target_size;
3622   if (partially) {
3623     target_size = _cm->partial_mark_stack_size_target();
3624   } else {
3625     target_size = 0;
3626   }
3627 
3628   if (_cm->mark_stack_size() > target_size) {
3629     if (_cm->verbose_low()) {
3630       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3631                              _worker_id, target_size);
3632     }
3633 
3634     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3635       get_entries_from_global_stack();
3636       drain_local_queue(partially);
3637     }
3638 
3639     if (_cm->verbose_low()) {
3640       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3641                              _worker_id, _cm->mark_stack_size());
3642     }
3643   }
3644 }
3645 
3646 // SATB Queue has several assumptions on whether to call the par or
3647 // non-par versions of the methods. this is why some of the code is
3648 // replicated. We should really get rid of the single-threaded version
3649 // of the code to simplify things.
3650 void CMTask::drain_satb_buffers() {
3651   if (has_aborted()) return;
3652 
3653   // We set this so that the regular clock knows that we're in the
3654   // middle of draining buffers and doesn't set the abort flag when it
3655   // notices that SATB buffers are available for draining. It'd be
3656   // very counter productive if it did that. :-)
3657   _draining_satb_buffers = true;
3658 
3659   CMSATBBufferClosure satb_cl(this, _g1h);
3660   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3661 
3662   // This keeps claiming and applying the closure to completed buffers
3663   // until we run out of buffers or we need to abort.
3664   while (!has_aborted() &&
3665          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3666     if (_cm->verbose_medium()) {
3667       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3668     }
3669     statsOnly( ++_satb_buffers_processed );
3670     regular_clock_call();
3671   }
3672 
3673   _draining_satb_buffers = false;
3674 
3675   assert(has_aborted() ||
3676          concurrent() ||
3677          satb_mq_set.completed_buffers_num() == 0, "invariant");
3678 
3679   // again, this was a potentially expensive operation, decrease the
3680   // limits to get the regular clock call early
3681   decrease_limits();
3682 }
3683 
3684 void CMTask::print_stats() {
3685   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3686                          _worker_id, _calls);
3687   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3688                          _elapsed_time_ms, _termination_time_ms);
3689   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3690                          _step_times_ms.num(), _step_times_ms.avg(),
3691                          _step_times_ms.sd());
3692   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3693                          _step_times_ms.maximum(), _step_times_ms.sum());
3694 
3695 #if _MARKING_STATS_
3696   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3697                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3698                          _all_clock_intervals_ms.sd());
3699   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3700                          _all_clock_intervals_ms.maximum(),
3701                          _all_clock_intervals_ms.sum());
3702   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3703                          _clock_due_to_scanning, _clock_due_to_marking);
3704   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3705                          _objs_scanned, _objs_found_on_bitmap);
3706   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3707                          _local_pushes, _local_pops, _local_max_size);
3708   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3709                          _global_pushes, _global_pops, _global_max_size);
3710   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3711                          _global_transfers_to,_global_transfers_from);
3712   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3713   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3714   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3715                          _steal_attempts, _steals);
3716   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3717   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3718                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3719   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3720                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3721 #endif // _MARKING_STATS_
3722 }
3723 
3724 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3725   return _task_queues->steal(worker_id, hash_seed, obj);
3726 }
3727 
3728 /*****************************************************************************
3729 
3730     The do_marking_step(time_target_ms, ...) method is the building
3731     block of the parallel marking framework. It can be called in parallel
3732     with other invocations of do_marking_step() on different tasks
3733     (but only one per task, obviously) and concurrently with the
3734     mutator threads, or during remark, hence it eliminates the need
3735     for two versions of the code. When called during remark, it will
3736     pick up from where the task left off during the concurrent marking
3737     phase. Interestingly, tasks are also claimable during evacuation
3738     pauses too, since do_marking_step() ensures that it aborts before
3739     it needs to yield.
3740 
3741     The data structures that it uses to do marking work are the
3742     following:
3743 
3744       (1) Marking Bitmap. If there are gray objects that appear only
3745       on the bitmap (this happens either when dealing with an overflow
3746       or when the initial marking phase has simply marked the roots
3747       and didn't push them on the stack), then tasks claim heap
3748       regions whose bitmap they then scan to find gray objects. A
3749       global finger indicates where the end of the last claimed region
3750       is. A local finger indicates how far into the region a task has
3751       scanned. The two fingers are used to determine how to gray an
3752       object (i.e. whether simply marking it is OK, as it will be
3753       visited by a task in the future, or whether it needs to be also
3754       pushed on a stack).
3755 
3756       (2) Local Queue. The local queue of the task which is accessed
3757       reasonably efficiently by the task. Other tasks can steal from
3758       it when they run out of work. Throughout the marking phase, a
3759       task attempts to keep its local queue short but not totally
3760       empty, so that entries are available for stealing by other
3761       tasks. Only when there is no more work, a task will totally
3762       drain its local queue.
3763 
3764       (3) Global Mark Stack. This handles local queue overflow. During
3765       marking only sets of entries are moved between it and the local
3766       queues, as access to it requires a mutex and more fine-grain
3767       interaction with it which might cause contention. If it
3768       overflows, then the marking phase should restart and iterate
3769       over the bitmap to identify gray objects. Throughout the marking
3770       phase, tasks attempt to keep the global mark stack at a small
3771       length but not totally empty, so that entries are available for
3772       popping by other tasks. Only when there is no more work, tasks
3773       will totally drain the global mark stack.
3774 
3775       (4) SATB Buffer Queue. This is where completed SATB buffers are
3776       made available. Buffers are regularly removed from this queue
3777       and scanned for roots, so that the queue doesn't get too
3778       long. During remark, all completed buffers are processed, as
3779       well as the filled in parts of any uncompleted buffers.
3780 
3781     The do_marking_step() method tries to abort when the time target
3782     has been reached. There are a few other cases when the
3783     do_marking_step() method also aborts:
3784 
3785       (1) When the marking phase has been aborted (after a Full GC).
3786 
3787       (2) When a global overflow (on the global stack) has been
3788       triggered. Before the task aborts, it will actually sync up with
3789       the other tasks to ensure that all the marking data structures
3790       (local queues, stacks, fingers etc.)  are re-initialized so that
3791       when do_marking_step() completes, the marking phase can
3792       immediately restart.
3793 
3794       (3) When enough completed SATB buffers are available. The
3795       do_marking_step() method only tries to drain SATB buffers right
3796       at the beginning. So, if enough buffers are available, the
3797       marking step aborts and the SATB buffers are processed at
3798       the beginning of the next invocation.
3799 
3800       (4) To yield. when we have to yield then we abort and yield
3801       right at the end of do_marking_step(). This saves us from a lot
3802       of hassle as, by yielding we might allow a Full GC. If this
3803       happens then objects will be compacted underneath our feet, the
3804       heap might shrink, etc. We save checking for this by just
3805       aborting and doing the yield right at the end.
3806 
3807     From the above it follows that the do_marking_step() method should
3808     be called in a loop (or, otherwise, regularly) until it completes.
3809 
3810     If a marking step completes without its has_aborted() flag being
3811     true, it means it has completed the current marking phase (and
3812     also all other marking tasks have done so and have all synced up).
3813 
3814     A method called regular_clock_call() is invoked "regularly" (in
3815     sub ms intervals) throughout marking. It is this clock method that
3816     checks all the abort conditions which were mentioned above and
3817     decides when the task should abort. A work-based scheme is used to
3818     trigger this clock method: when the number of object words the
3819     marking phase has scanned or the number of references the marking
3820     phase has visited reach a given limit. Additional invocations to
3821     the method clock have been planted in a few other strategic places
3822     too. The initial reason for the clock method was to avoid calling
3823     vtime too regularly, as it is quite expensive. So, once it was in
3824     place, it was natural to piggy-back all the other conditions on it
3825     too and not constantly check them throughout the code.
3826 
3827     If do_termination is true then do_marking_step will enter its
3828     termination protocol.
3829 
3830     The value of is_serial must be true when do_marking_step is being
3831     called serially (i.e. by the VMThread) and do_marking_step should
3832     skip any synchronization in the termination and overflow code.
3833     Examples include the serial remark code and the serial reference
3834     processing closures.
3835 
3836     The value of is_serial must be false when do_marking_step is
3837     being called by any of the worker threads in a work gang.
3838     Examples include the concurrent marking code (CMMarkingTask),
3839     the MT remark code, and the MT reference processing closures.
3840 
3841  *****************************************************************************/
3842 
3843 void CMTask::do_marking_step(double time_target_ms,
3844                              bool do_termination,
3845                              bool is_serial) {
3846   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3847   assert(concurrent() == _cm->concurrent(), "they should be the same");
3848 
3849   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3850   assert(_task_queues != NULL, "invariant");
3851   assert(_task_queue != NULL, "invariant");
3852   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3853 
3854   assert(!_claimed,
3855          "only one thread should claim this task at any one time");
3856 
3857   // OK, this doesn't safeguard again all possible scenarios, as it is
3858   // possible for two threads to set the _claimed flag at the same
3859   // time. But it is only for debugging purposes anyway and it will
3860   // catch most problems.
3861   _claimed = true;
3862 
3863   _start_time_ms = os::elapsedVTime() * 1000.0;
3864   statsOnly( _interval_start_time_ms = _start_time_ms );
3865 
3866   // If do_stealing is true then do_marking_step will attempt to
3867   // steal work from the other CMTasks. It only makes sense to
3868   // enable stealing when the termination protocol is enabled
3869   // and do_marking_step() is not being called serially.
3870   bool do_stealing = do_termination && !is_serial;
3871 
3872   double diff_prediction_ms =
3873     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3874   _time_target_ms = time_target_ms - diff_prediction_ms;
3875 
3876   // set up the variables that are used in the work-based scheme to
3877   // call the regular clock method
3878   _words_scanned = 0;
3879   _refs_reached  = 0;
3880   recalculate_limits();
3881 
3882   // clear all flags
3883   clear_has_aborted();
3884   _has_timed_out = false;
3885   _draining_satb_buffers = false;
3886 
3887   ++_calls;
3888 
3889   if (_cm->verbose_low()) {
3890     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3891                            "target = %1.2lfms >>>>>>>>>>",
3892                            _worker_id, _calls, _time_target_ms);
3893   }
3894 
3895   // Set up the bitmap and oop closures. Anything that uses them is
3896   // eventually called from this method, so it is OK to allocate these
3897   // statically.
3898   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3899   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3900   set_cm_oop_closure(&cm_oop_closure);
3901 
3902   if (_cm->has_overflown()) {
3903     // This can happen if the mark stack overflows during a GC pause
3904     // and this task, after a yield point, restarts. We have to abort
3905     // as we need to get into the overflow protocol which happens
3906     // right at the end of this task.
3907     set_has_aborted();
3908   }
3909 
3910   // First drain any available SATB buffers. After this, we will not
3911   // look at SATB buffers before the next invocation of this method.
3912   // If enough completed SATB buffers are queued up, the regular clock
3913   // will abort this task so that it restarts.
3914   drain_satb_buffers();
3915   // ...then partially drain the local queue and the global stack
3916   drain_local_queue(true);
3917   drain_global_stack(true);
3918 
3919   do {
3920     if (!has_aborted() && _curr_region != NULL) {
3921       // This means that we're already holding on to a region.
3922       assert(_finger != NULL, "if region is not NULL, then the finger "
3923              "should not be NULL either");
3924 
3925       // We might have restarted this task after an evacuation pause
3926       // which might have evacuated the region we're holding on to
3927       // underneath our feet. Let's read its limit again to make sure
3928       // that we do not iterate over a region of the heap that
3929       // contains garbage (update_region_limit() will also move
3930       // _finger to the start of the region if it is found empty).
3931       update_region_limit();
3932       // We will start from _finger not from the start of the region,
3933       // as we might be restarting this task after aborting half-way
3934       // through scanning this region. In this case, _finger points to
3935       // the address where we last found a marked object. If this is a
3936       // fresh region, _finger points to start().
3937       MemRegion mr = MemRegion(_finger, _region_limit);
3938 
3939       if (_cm->verbose_low()) {
3940         gclog_or_tty->print_cr("[%u] we're scanning part "
3941                                "[" PTR_FORMAT ", " PTR_FORMAT ") "
3942                                "of region " HR_FORMAT,
3943                                _worker_id, p2i(_finger), p2i(_region_limit),
3944                                HR_FORMAT_PARAMS(_curr_region));
3945       }
3946 
3947       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3948              "humongous regions should go around loop once only");
3949 
3950       // Some special cases:
3951       // If the memory region is empty, we can just give up the region.
3952       // If the current region is humongous then we only need to check
3953       // the bitmap for the bit associated with the start of the object,
3954       // scan the object if it's live, and give up the region.
3955       // Otherwise, let's iterate over the bitmap of the part of the region
3956       // that is left.
3957       // If the iteration is successful, give up the region.
3958       if (mr.is_empty()) {
3959         giveup_current_region();
3960         regular_clock_call();
3961       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3962         if (_nextMarkBitMap->isMarked(mr.start())) {
3963           // The object is marked - apply the closure
3964           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3965           bitmap_closure.do_bit(offset);
3966         }
3967         // Even if this task aborted while scanning the humongous object
3968         // we can (and should) give up the current region.
3969         giveup_current_region();
3970         regular_clock_call();
3971       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3972         giveup_current_region();
3973         regular_clock_call();
3974       } else {
3975         assert(has_aborted(), "currently the only way to do so");
3976         // The only way to abort the bitmap iteration is to return
3977         // false from the do_bit() method. However, inside the
3978         // do_bit() method we move the _finger to point to the
3979         // object currently being looked at. So, if we bail out, we
3980         // have definitely set _finger to something non-null.
3981         assert(_finger != NULL, "invariant");
3982 
3983         // Region iteration was actually aborted. So now _finger
3984         // points to the address of the object we last scanned. If we
3985         // leave it there, when we restart this task, we will rescan
3986         // the object. It is easy to avoid this. We move the finger by
3987         // enough to point to the next possible object header (the
3988         // bitmap knows by how much we need to move it as it knows its
3989         // granularity).
3990         assert(_finger < _region_limit, "invariant");
3991         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3992         // Check if bitmap iteration was aborted while scanning the last object
3993         if (new_finger >= _region_limit) {
3994           giveup_current_region();
3995         } else {
3996           move_finger_to(new_finger);
3997         }
3998       }
3999     }
4000     // At this point we have either completed iterating over the
4001     // region we were holding on to, or we have aborted.
4002 
4003     // We then partially drain the local queue and the global stack.
4004     // (Do we really need this?)
4005     drain_local_queue(true);
4006     drain_global_stack(true);
4007 
4008     // Read the note on the claim_region() method on why it might
4009     // return NULL with potentially more regions available for
4010     // claiming and why we have to check out_of_regions() to determine
4011     // whether we're done or not.
4012     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4013       // We are going to try to claim a new region. We should have
4014       // given up on the previous one.
4015       // Separated the asserts so that we know which one fires.
4016       assert(_curr_region  == NULL, "invariant");
4017       assert(_finger       == NULL, "invariant");
4018       assert(_region_limit == NULL, "invariant");
4019       if (_cm->verbose_low()) {
4020         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4021       }
4022       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4023       if (claimed_region != NULL) {
4024         // Yes, we managed to claim one
4025         statsOnly( ++_regions_claimed );
4026 
4027         if (_cm->verbose_low()) {
4028           gclog_or_tty->print_cr("[%u] we successfully claimed "
4029                                  "region " PTR_FORMAT,
4030                                  _worker_id, p2i(claimed_region));
4031         }
4032 
4033         setup_for_region(claimed_region);
4034         assert(_curr_region == claimed_region, "invariant");
4035       }
4036       // It is important to call the regular clock here. It might take
4037       // a while to claim a region if, for example, we hit a large
4038       // block of empty regions. So we need to call the regular clock
4039       // method once round the loop to make sure it's called
4040       // frequently enough.
4041       regular_clock_call();
4042     }
4043 
4044     if (!has_aborted() && _curr_region == NULL) {
4045       assert(_cm->out_of_regions(),
4046              "at this point we should be out of regions");
4047     }
4048   } while ( _curr_region != NULL && !has_aborted());
4049 
4050   if (!has_aborted()) {
4051     // We cannot check whether the global stack is empty, since other
4052     // tasks might be pushing objects to it concurrently.
4053     assert(_cm->out_of_regions(),
4054            "at this point we should be out of regions");
4055 
4056     if (_cm->verbose_low()) {
4057       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4058     }
4059 
4060     // Try to reduce the number of available SATB buffers so that
4061     // remark has less work to do.
4062     drain_satb_buffers();
4063   }
4064 
4065   // Since we've done everything else, we can now totally drain the
4066   // local queue and global stack.
4067   drain_local_queue(false);
4068   drain_global_stack(false);
4069 
4070   // Attempt at work stealing from other task's queues.
4071   if (do_stealing && !has_aborted()) {
4072     // We have not aborted. This means that we have finished all that
4073     // we could. Let's try to do some stealing...
4074 
4075     // We cannot check whether the global stack is empty, since other
4076     // tasks might be pushing objects to it concurrently.
4077     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4078            "only way to reach here");
4079 
4080     if (_cm->verbose_low()) {
4081       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4082     }
4083 
4084     while (!has_aborted()) {
4085       oop obj;
4086       statsOnly( ++_steal_attempts );
4087 
4088       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4089         if (_cm->verbose_medium()) {
4090           gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
4091                                  _worker_id, p2i((void*) obj));
4092         }
4093 
4094         statsOnly( ++_steals );
4095 
4096         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4097                "any stolen object should be marked");
4098         scan_object(obj);
4099 
4100         // And since we're towards the end, let's totally drain the
4101         // local queue and global stack.
4102         drain_local_queue(false);
4103         drain_global_stack(false);
4104       } else {
4105         break;
4106       }
4107     }
4108   }
4109 
4110   // If we are about to wrap up and go into termination, check if we
4111   // should raise the overflow flag.
4112   if (do_termination && !has_aborted()) {
4113     if (_cm->force_overflow()->should_force()) {
4114       _cm->set_has_overflown();
4115       regular_clock_call();
4116     }
4117   }
4118 
4119   // We still haven't aborted. Now, let's try to get into the
4120   // termination protocol.
4121   if (do_termination && !has_aborted()) {
4122     // We cannot check whether the global stack is empty, since other
4123     // tasks might be concurrently pushing objects on it.
4124     // Separated the asserts so that we know which one fires.
4125     assert(_cm->out_of_regions(), "only way to reach here");
4126     assert(_task_queue->size() == 0, "only way to reach here");
4127 
4128     if (_cm->verbose_low()) {
4129       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4130     }
4131 
4132     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4133 
4134     // The CMTask class also extends the TerminatorTerminator class,
4135     // hence its should_exit_termination() method will also decide
4136     // whether to exit the termination protocol or not.
4137     bool finished = (is_serial ||
4138                      _cm->terminator()->offer_termination(this));
4139     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4140     _termination_time_ms +=
4141       termination_end_time_ms - _termination_start_time_ms;
4142 
4143     if (finished) {
4144       // We're all done.
4145 
4146       if (_worker_id == 0) {
4147         // let's allow task 0 to do this
4148         if (concurrent()) {
4149           assert(_cm->concurrent_marking_in_progress(), "invariant");
4150           // we need to set this to false before the next
4151           // safepoint. This way we ensure that the marking phase
4152           // doesn't observe any more heap expansions.
4153           _cm->clear_concurrent_marking_in_progress();
4154         }
4155       }
4156 
4157       // We can now guarantee that the global stack is empty, since
4158       // all other tasks have finished. We separated the guarantees so
4159       // that, if a condition is false, we can immediately find out
4160       // which one.
4161       guarantee(_cm->out_of_regions(), "only way to reach here");
4162       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4163       guarantee(_task_queue->size() == 0, "only way to reach here");
4164       guarantee(!_cm->has_overflown(), "only way to reach here");
4165       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4166 
4167       if (_cm->verbose_low()) {
4168         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4169       }
4170     } else {
4171       // Apparently there's more work to do. Let's abort this task. It
4172       // will restart it and we can hopefully find more things to do.
4173 
4174       if (_cm->verbose_low()) {
4175         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4176                                _worker_id);
4177       }
4178 
4179       set_has_aborted();
4180       statsOnly( ++_aborted_termination );
4181     }
4182   }
4183 
4184   // Mainly for debugging purposes to make sure that a pointer to the
4185   // closure which was statically allocated in this frame doesn't
4186   // escape it by accident.
4187   set_cm_oop_closure(NULL);
4188   double end_time_ms = os::elapsedVTime() * 1000.0;
4189   double elapsed_time_ms = end_time_ms - _start_time_ms;
4190   // Update the step history.
4191   _step_times_ms.add(elapsed_time_ms);
4192 
4193   if (has_aborted()) {
4194     // The task was aborted for some reason.
4195 
4196     statsOnly( ++_aborted );
4197 
4198     if (_has_timed_out) {
4199       double diff_ms = elapsed_time_ms - _time_target_ms;
4200       // Keep statistics of how well we did with respect to hitting
4201       // our target only if we actually timed out (if we aborted for
4202       // other reasons, then the results might get skewed).
4203       _marking_step_diffs_ms.add(diff_ms);
4204     }
4205 
4206     if (_cm->has_overflown()) {
4207       // This is the interesting one. We aborted because a global
4208       // overflow was raised. This means we have to restart the
4209       // marking phase and start iterating over regions. However, in
4210       // order to do this we have to make sure that all tasks stop
4211       // what they are doing and re-initialize in a safe manner. We
4212       // will achieve this with the use of two barrier sync points.
4213 
4214       if (_cm->verbose_low()) {
4215         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4216       }
4217 
4218       if (!is_serial) {
4219         // We only need to enter the sync barrier if being called
4220         // from a parallel context
4221         _cm->enter_first_sync_barrier(_worker_id);
4222 
4223         // When we exit this sync barrier we know that all tasks have
4224         // stopped doing marking work. So, it's now safe to
4225         // re-initialize our data structures. At the end of this method,
4226         // task 0 will clear the global data structures.
4227       }
4228 
4229       statsOnly( ++_aborted_overflow );
4230 
4231       // We clear the local state of this task...
4232       clear_region_fields();
4233 
4234       if (!is_serial) {
4235         // ...and enter the second barrier.
4236         _cm->enter_second_sync_barrier(_worker_id);
4237       }
4238       // At this point, if we're during the concurrent phase of
4239       // marking, everything has been re-initialized and we're
4240       // ready to restart.
4241     }
4242 
4243     if (_cm->verbose_low()) {
4244       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4245                              "elapsed = %1.2lfms <<<<<<<<<<",
4246                              _worker_id, _time_target_ms, elapsed_time_ms);
4247       if (_cm->has_aborted()) {
4248         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4249                                _worker_id);
4250       }
4251     }
4252   } else {
4253     if (_cm->verbose_low()) {
4254       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4255                              "elapsed = %1.2lfms <<<<<<<<<<",
4256                              _worker_id, _time_target_ms, elapsed_time_ms);
4257     }
4258   }
4259 
4260   _claimed = false;
4261 }
4262 
4263 CMTask::CMTask(uint worker_id,
4264                ConcurrentMark* cm,
4265                size_t* marked_bytes,
4266                BitMap* card_bm,
4267                CMTaskQueue* task_queue,
4268                CMTaskQueueSet* task_queues)
4269   : _g1h(G1CollectedHeap::heap()),
4270     _worker_id(worker_id), _cm(cm),
4271     _claimed(false),
4272     _nextMarkBitMap(NULL), _hash_seed(17),
4273     _task_queue(task_queue),
4274     _task_queues(task_queues),
4275     _cm_oop_closure(NULL),
4276     _marked_bytes_array(marked_bytes),
4277     _card_bm(card_bm) {
4278   guarantee(task_queue != NULL, "invariant");
4279   guarantee(task_queues != NULL, "invariant");
4280 
4281   statsOnly( _clock_due_to_scanning = 0;
4282              _clock_due_to_marking  = 0 );
4283 
4284   _marking_step_diffs_ms.add(0.5);
4285 }
4286 
4287 // These are formatting macros that are used below to ensure
4288 // consistent formatting. The *_H_* versions are used to format the
4289 // header for a particular value and they should be kept consistent
4290 // with the corresponding macro. Also note that most of the macros add
4291 // the necessary white space (as a prefix) which makes them a bit
4292 // easier to compose.
4293 
4294 // All the output lines are prefixed with this string to be able to
4295 // identify them easily in a large log file.
4296 #define G1PPRL_LINE_PREFIX            "###"
4297 
4298 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4299 #ifdef _LP64
4300 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4301 #else // _LP64
4302 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4303 #endif // _LP64
4304 
4305 // For per-region info
4306 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4307 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4308 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4309 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4310 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4311 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4312 
4313 // For summary info
4314 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4315 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4316 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4317 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4318 
4319 G1PrintRegionLivenessInfoClosure::
4320 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4321   : _out(out),
4322     _total_used_bytes(0), _total_capacity_bytes(0),
4323     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4324     _hum_used_bytes(0), _hum_capacity_bytes(0),
4325     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4326     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4327   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4328   MemRegion g1_reserved = g1h->g1_reserved();
4329   double now = os::elapsedTime();
4330 
4331   // Print the header of the output.
4332   _out->cr();
4333   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4334   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4335                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4336                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4337                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4338                  HeapRegion::GrainBytes);
4339   _out->print_cr(G1PPRL_LINE_PREFIX);
4340   _out->print_cr(G1PPRL_LINE_PREFIX
4341                 G1PPRL_TYPE_H_FORMAT
4342                 G1PPRL_ADDR_BASE_H_FORMAT
4343                 G1PPRL_BYTE_H_FORMAT
4344                 G1PPRL_BYTE_H_FORMAT
4345                 G1PPRL_BYTE_H_FORMAT
4346                 G1PPRL_DOUBLE_H_FORMAT
4347                 G1PPRL_BYTE_H_FORMAT
4348                 G1PPRL_BYTE_H_FORMAT,
4349                 "type", "address-range",
4350                 "used", "prev-live", "next-live", "gc-eff",
4351                 "remset", "code-roots");
4352   _out->print_cr(G1PPRL_LINE_PREFIX
4353                 G1PPRL_TYPE_H_FORMAT
4354                 G1PPRL_ADDR_BASE_H_FORMAT
4355                 G1PPRL_BYTE_H_FORMAT
4356                 G1PPRL_BYTE_H_FORMAT
4357                 G1PPRL_BYTE_H_FORMAT
4358                 G1PPRL_DOUBLE_H_FORMAT
4359                 G1PPRL_BYTE_H_FORMAT
4360                 G1PPRL_BYTE_H_FORMAT,
4361                 "", "",
4362                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4363                 "(bytes)", "(bytes)");
4364 }
4365 
4366 // It takes as a parameter a reference to one of the _hum_* fields, it
4367 // deduces the corresponding value for a region in a humongous region
4368 // series (either the region size, or what's left if the _hum_* field
4369 // is < the region size), and updates the _hum_* field accordingly.
4370 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4371   size_t bytes = 0;
4372   // The > 0 check is to deal with the prev and next live bytes which
4373   // could be 0.
4374   if (*hum_bytes > 0) {
4375     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4376     *hum_bytes -= bytes;
4377   }
4378   return bytes;
4379 }
4380 
4381 // It deduces the values for a region in a humongous region series
4382 // from the _hum_* fields and updates those accordingly. It assumes
4383 // that that _hum_* fields have already been set up from the "starts
4384 // humongous" region and we visit the regions in address order.
4385 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4386                                                      size_t* capacity_bytes,
4387                                                      size_t* prev_live_bytes,
4388                                                      size_t* next_live_bytes) {
4389   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4390   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4391   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4392   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4393   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4394 }
4395 
4396 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4397   const char* type       = r->get_type_str();
4398   HeapWord* bottom       = r->bottom();
4399   HeapWord* end          = r->end();
4400   size_t capacity_bytes  = r->capacity();
4401   size_t used_bytes      = r->used();
4402   size_t prev_live_bytes = r->live_bytes();
4403   size_t next_live_bytes = r->next_live_bytes();
4404   double gc_eff          = r->gc_efficiency();
4405   size_t remset_bytes    = r->rem_set()->mem_size();
4406   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4407 
4408   if (r->is_starts_humongous()) {
4409     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4410            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4411            "they should have been zeroed after the last time we used them");
4412     // Set up the _hum_* fields.
4413     _hum_capacity_bytes  = capacity_bytes;
4414     _hum_used_bytes      = used_bytes;
4415     _hum_prev_live_bytes = prev_live_bytes;
4416     _hum_next_live_bytes = next_live_bytes;
4417     get_hum_bytes(&used_bytes, &capacity_bytes,
4418                   &prev_live_bytes, &next_live_bytes);
4419     end = bottom + HeapRegion::GrainWords;
4420   } else if (r->is_continues_humongous()) {
4421     get_hum_bytes(&used_bytes, &capacity_bytes,
4422                   &prev_live_bytes, &next_live_bytes);
4423     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4424   }
4425 
4426   _total_used_bytes      += used_bytes;
4427   _total_capacity_bytes  += capacity_bytes;
4428   _total_prev_live_bytes += prev_live_bytes;
4429   _total_next_live_bytes += next_live_bytes;
4430   _total_remset_bytes    += remset_bytes;
4431   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4432 
4433   // Print a line for this particular region.
4434   _out->print_cr(G1PPRL_LINE_PREFIX
4435                  G1PPRL_TYPE_FORMAT
4436                  G1PPRL_ADDR_BASE_FORMAT
4437                  G1PPRL_BYTE_FORMAT
4438                  G1PPRL_BYTE_FORMAT
4439                  G1PPRL_BYTE_FORMAT
4440                  G1PPRL_DOUBLE_FORMAT
4441                  G1PPRL_BYTE_FORMAT
4442                  G1PPRL_BYTE_FORMAT,
4443                  type, p2i(bottom), p2i(end),
4444                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4445                  remset_bytes, strong_code_roots_bytes);
4446 
4447   return false;
4448 }
4449 
4450 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4451   // add static memory usages to remembered set sizes
4452   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4453   // Print the footer of the output.
4454   _out->print_cr(G1PPRL_LINE_PREFIX);
4455   _out->print_cr(G1PPRL_LINE_PREFIX
4456                  " SUMMARY"
4457                  G1PPRL_SUM_MB_FORMAT("capacity")
4458                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4459                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4460                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4461                  G1PPRL_SUM_MB_FORMAT("remset")
4462                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4463                  bytes_to_mb(_total_capacity_bytes),
4464                  bytes_to_mb(_total_used_bytes),
4465                  perc(_total_used_bytes, _total_capacity_bytes),
4466                  bytes_to_mb(_total_prev_live_bytes),
4467                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4468                  bytes_to_mb(_total_next_live_bytes),
4469                  perc(_total_next_live_bytes, _total_capacity_bytes),
4470                  bytes_to_mb(_total_remset_bytes),
4471                  bytes_to_mb(_total_strong_code_roots_bytes));
4472   _out->cr();
4473 }