1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcTimer.hpp"
  45 #include "gc/shared/gcTrace.hpp"
  46 #include "gc/shared/gcTraceTime.hpp"
  47 #include "gc/shared/genOopClosures.inline.hpp"
  48 #include "gc/shared/referencePolicy.hpp"
  49 #include "gc/shared/strongRootsScope.hpp"
  50 #include "gc/shared/taskqueue.inline.hpp"
  51 #include "gc/shared/vmGCOperations.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 CMBitMapRO::CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   if (limit == NULL) {
  77     limit = _bmStartWord + _bmWordSize;
  78   }
  79   size_t limitOffset = heapWordToOffset(limit);
  80   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  81   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  82   assert(nextAddr >= addr, "get_next_one postcondition");
  83   assert(nextAddr == limit || isMarked(nextAddr),
  84          "get_next_one postcondition");
  85   return nextAddr;
  86 }
  87 
  88 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  89                                                  const HeapWord* limit) const {
  90   size_t addrOffset = heapWordToOffset(addr);
  91   if (limit == NULL) {
  92     limit = _bmStartWord + _bmWordSize;
  93   }
  94   size_t limitOffset = heapWordToOffset(limit);
  95   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  96   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  97   assert(nextAddr >= addr, "get_next_one postcondition");
  98   assert(nextAddr == limit || !isMarked(nextAddr),
  99          "get_next_one postcondition");
 100   return nextAddr;
 101 }
 102 
 103 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 104   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 105   return (int) (diff >> _shifter);
 106 }
 107 
 108 #ifndef PRODUCT
 109 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 110   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 111   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 112          "size inconsistency");
 113   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 114          _bmWordSize  == heap_rs.word_size();
 115 }
 116 #endif
 117 
 118 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 119   _bm.print_on_error(st, prefix);
 120 }
 121 
 122 size_t CMBitMap::compute_size(size_t heap_size) {
 123   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 124 }
 125 
 126 size_t CMBitMap::mark_distance() {
 127   return MinObjAlignmentInBytes * BitsPerByte;
 128 }
 129 
 130 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 131   _bmStartWord = heap.start();
 132   _bmWordSize = heap.word_size();
 133 
 134   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 135   _bm.set_size(_bmWordSize >> _shifter);
 136 
 137   storage->set_mapping_changed_listener(&_listener);
 138 }
 139 
 140 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 141   if (zero_filled) {
 142     return;
 143   }
 144   // We need to clear the bitmap on commit, removing any existing information.
 145   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 146   _bm->clearRange(mr);
 147 }
 148 
 149 // Closure used for clearing the given mark bitmap.
 150 class ClearBitmapHRClosure : public HeapRegionClosure {
 151  private:
 152   ConcurrentMark* _cm;
 153   CMBitMap* _bitmap;
 154   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 155  public:
 156   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 157     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 158   }
 159 
 160   virtual bool doHeapRegion(HeapRegion* r) {
 161     size_t const chunk_size_in_words = M / HeapWordSize;
 162 
 163     HeapWord* cur = r->bottom();
 164     HeapWord* const end = r->end();
 165 
 166     while (cur < end) {
 167       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 168       _bitmap->clearRange(mr);
 169 
 170       cur += chunk_size_in_words;
 171 
 172       // Abort iteration if after yielding the marking has been aborted.
 173       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 174         return true;
 175       }
 176       // Repeat the asserts from before the start of the closure. We will do them
 177       // as asserts here to minimize their overhead on the product. However, we
 178       // will have them as guarantees at the beginning / end of the bitmap
 179       // clearing to get some checking in the product.
 180       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 181       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 182     }
 183 
 184     return false;
 185   }
 186 };
 187 
 188 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 189   ClearBitmapHRClosure* _cl;
 190   HeapRegionClaimer     _hrclaimer;
 191   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 192 
 193 public:
 194   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 195       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 196 
 197   void work(uint worker_id) {
 198     SuspendibleThreadSetJoiner sts_join(_suspendible);
 199     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 200   }
 201 };
 202 
 203 void CMBitMap::clearAll() {
 204   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 205   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 206   uint n_workers = g1h->workers()->active_workers();
 207   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 208   g1h->workers()->run_task(&task);
 209   guarantee(cl.complete(), "Must have completed iteration.");
 210   return;
 211 }
 212 
 213 void CMBitMap::markRange(MemRegion mr) {
 214   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 215   assert(!mr.is_empty(), "unexpected empty region");
 216   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 217           ((HeapWord *) mr.end())),
 218          "markRange memory region end is not card aligned");
 219   // convert address range into offset range
 220   _bm.at_put_range(heapWordToOffset(mr.start()),
 221                    heapWordToOffset(mr.end()), true);
 222 }
 223 
 224 void CMBitMap::clearRange(MemRegion mr) {
 225   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 226   assert(!mr.is_empty(), "unexpected empty region");
 227   // convert address range into offset range
 228   _bm.at_put_range(heapWordToOffset(mr.start()),
 229                    heapWordToOffset(mr.end()), false);
 230 }
 231 
 232 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 233                                             HeapWord* end_addr) {
 234   HeapWord* start = getNextMarkedWordAddress(addr);
 235   start = MIN2(start, end_addr);
 236   HeapWord* end   = getNextUnmarkedWordAddress(start);
 237   end = MIN2(end, end_addr);
 238   assert(start <= end, "Consistency check");
 239   MemRegion mr(start, end);
 240   if (!mr.is_empty()) {
 241     clearRange(mr);
 242   }
 243   return mr;
 244 }
 245 
 246 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 247   _base(NULL), _cm(cm)
 248 #ifdef ASSERT
 249   , _drain_in_progress(false)
 250   , _drain_in_progress_yields(false)
 251 #endif
 252 {}
 253 
 254 bool CMMarkStack::allocate(size_t capacity) {
 255   // allocate a stack of the requisite depth
 256   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 257   if (!rs.is_reserved()) {
 258     warning("ConcurrentMark MarkStack allocation failure");
 259     return false;
 260   }
 261   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 262   if (!_virtual_space.initialize(rs, rs.size())) {
 263     warning("ConcurrentMark MarkStack backing store failure");
 264     // Release the virtual memory reserved for the marking stack
 265     rs.release();
 266     return false;
 267   }
 268   assert(_virtual_space.committed_size() == rs.size(),
 269          "Didn't reserve backing store for all of ConcurrentMark stack?");
 270   _base = (oop*) _virtual_space.low();
 271   setEmpty();
 272   _capacity = (jint) capacity;
 273   _saved_index = -1;
 274   _should_expand = false;
 275   return true;
 276 }
 277 
 278 void CMMarkStack::expand() {
 279   // Called, during remark, if we've overflown the marking stack during marking.
 280   assert(isEmpty(), "stack should been emptied while handling overflow");
 281   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 282   // Clear expansion flag
 283   _should_expand = false;
 284   if (_capacity == (jint) MarkStackSizeMax) {
 285     if (PrintGCDetails && Verbose) {
 286       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 287     }
 288     return;
 289   }
 290   // Double capacity if possible
 291   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 292   // Do not give up existing stack until we have managed to
 293   // get the double capacity that we desired.
 294   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 295                                                            sizeof(oop)));
 296   if (rs.is_reserved()) {
 297     // Release the backing store associated with old stack
 298     _virtual_space.release();
 299     // Reinitialize virtual space for new stack
 300     if (!_virtual_space.initialize(rs, rs.size())) {
 301       fatal("Not enough swap for expanded marking stack capacity");
 302     }
 303     _base = (oop*)(_virtual_space.low());
 304     _index = 0;
 305     _capacity = new_capacity;
 306   } else {
 307     if (PrintGCDetails && Verbose) {
 308       // Failed to double capacity, continue;
 309       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 310                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 311                           _capacity / K, new_capacity / K);
 312     }
 313   }
 314 }
 315 
 316 void CMMarkStack::set_should_expand() {
 317   // If we're resetting the marking state because of an
 318   // marking stack overflow, record that we should, if
 319   // possible, expand the stack.
 320   _should_expand = _cm->has_overflown();
 321 }
 322 
 323 CMMarkStack::~CMMarkStack() {
 324   if (_base != NULL) {
 325     _base = NULL;
 326     _virtual_space.release();
 327   }
 328 }
 329 
 330 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 331   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 332   jint start = _index;
 333   jint next_index = start + n;
 334   if (next_index > _capacity) {
 335     _overflow = true;
 336     return;
 337   }
 338   // Otherwise.
 339   _index = next_index;
 340   for (int i = 0; i < n; i++) {
 341     int ind = start + i;
 342     assert(ind < _capacity, "By overflow test above.");
 343     _base[ind] = ptr_arr[i];
 344   }
 345 }
 346 
 347 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 348   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 349   jint index = _index;
 350   if (index == 0) {
 351     *n = 0;
 352     return false;
 353   } else {
 354     int k = MIN2(max, index);
 355     jint  new_ind = index - k;
 356     for (int j = 0; j < k; j++) {
 357       ptr_arr[j] = _base[new_ind + j];
 358     }
 359     _index = new_ind;
 360     *n = k;
 361     return true;
 362   }
 363 }
 364 
 365 template<class OopClosureClass>
 366 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 367   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 368          || SafepointSynchronize::is_at_safepoint(),
 369          "Drain recursion must be yield-safe.");
 370   bool res = true;
 371   debug_only(_drain_in_progress = true);
 372   debug_only(_drain_in_progress_yields = yield_after);
 373   while (!isEmpty()) {
 374     oop newOop = pop();
 375     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 376     assert(newOop->is_oop(), "Expected an oop");
 377     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 378            "only grey objects on this stack");
 379     newOop->oop_iterate(cl);
 380     if (yield_after && _cm->do_yield_check()) {
 381       res = false;
 382       break;
 383     }
 384   }
 385   debug_only(_drain_in_progress = false);
 386   return res;
 387 }
 388 
 389 void CMMarkStack::note_start_of_gc() {
 390   assert(_saved_index == -1,
 391          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 392   _saved_index = _index;
 393 }
 394 
 395 void CMMarkStack::note_end_of_gc() {
 396   // This is intentionally a guarantee, instead of an assert. If we
 397   // accidentally add something to the mark stack during GC, it
 398   // will be a correctness issue so it's better if we crash. we'll
 399   // only check this once per GC anyway, so it won't be a performance
 400   // issue in any way.
 401   guarantee(_saved_index == _index,
 402             err_msg("saved index: %d index: %d", _saved_index, _index));
 403   _saved_index = -1;
 404 }
 405 
 406 void CMMarkStack::oops_do(OopClosure* f) {
 407   assert(_saved_index == _index,
 408          err_msg("saved index: %d index: %d", _saved_index, _index));
 409   for (int i = 0; i < _index; i += 1) {
 410     f->do_oop(&_base[i]);
 411   }
 412 }
 413 
 414 CMRootRegions::CMRootRegions() :
 415   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 416   _should_abort(false),  _next_survivor(NULL) { }
 417 
 418 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 419   _young_list = g1h->young_list();
 420   _cm = cm;
 421 }
 422 
 423 void CMRootRegions::prepare_for_scan() {
 424   assert(!scan_in_progress(), "pre-condition");
 425 
 426   // Currently, only survivors can be root regions.
 427   assert(_next_survivor == NULL, "pre-condition");
 428   _next_survivor = _young_list->first_survivor_region();
 429   _scan_in_progress = (_next_survivor != NULL);
 430   _should_abort = false;
 431 }
 432 
 433 HeapRegion* CMRootRegions::claim_next() {
 434   if (_should_abort) {
 435     // If someone has set the should_abort flag, we return NULL to
 436     // force the caller to bail out of their loop.
 437     return NULL;
 438   }
 439 
 440   // Currently, only survivors can be root regions.
 441   HeapRegion* res = _next_survivor;
 442   if (res != NULL) {
 443     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 444     // Read it again in case it changed while we were waiting for the lock.
 445     res = _next_survivor;
 446     if (res != NULL) {
 447       if (res == _young_list->last_survivor_region()) {
 448         // We just claimed the last survivor so store NULL to indicate
 449         // that we're done.
 450         _next_survivor = NULL;
 451       } else {
 452         _next_survivor = res->get_next_young_region();
 453       }
 454     } else {
 455       // Someone else claimed the last survivor while we were trying
 456       // to take the lock so nothing else to do.
 457     }
 458   }
 459   assert(res == NULL || res->is_survivor(), "post-condition");
 460 
 461   return res;
 462 }
 463 
 464 void CMRootRegions::scan_finished() {
 465   assert(scan_in_progress(), "pre-condition");
 466 
 467   // Currently, only survivors can be root regions.
 468   if (!_should_abort) {
 469     assert(_next_survivor == NULL, "we should have claimed all survivors");
 470   }
 471   _next_survivor = NULL;
 472 
 473   {
 474     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 475     _scan_in_progress = false;
 476     RootRegionScan_lock->notify_all();
 477   }
 478 }
 479 
 480 bool CMRootRegions::wait_until_scan_finished() {
 481   if (!scan_in_progress()) return false;
 482 
 483   {
 484     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 485     while (scan_in_progress()) {
 486       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 487     }
 488   }
 489   return true;
 490 }
 491 
 492 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 493 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 494 #endif // _MSC_VER
 495 
 496 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 497   return MAX2((n_par_threads + 2) / 4, 1U);
 498 }
 499 
 500 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 501   _g1h(g1h),
 502   _markBitMap1(),
 503   _markBitMap2(),
 504   _parallel_marking_threads(0),
 505   _max_parallel_marking_threads(0),
 506   _sleep_factor(0.0),
 507   _marking_task_overhead(1.0),
 508   _cleanup_sleep_factor(0.0),
 509   _cleanup_task_overhead(1.0),
 510   _cleanup_list("Cleanup List"),
 511   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 512   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 513             CardTableModRefBS::card_shift,
 514             false /* in_resource_area*/),
 515 
 516   _prevMarkBitMap(&_markBitMap1),
 517   _nextMarkBitMap(&_markBitMap2),
 518 
 519   _markStack(this),
 520   // _finger set in set_non_marking_state
 521 
 522   _max_worker_id(ParallelGCThreads),
 523   // _active_tasks set in set_non_marking_state
 524   // _tasks set inside the constructor
 525   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 526   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 527 
 528   _has_overflown(false),
 529   _concurrent(false),
 530   _has_aborted(false),
 531   _aborted_gc_id(GCId::undefined()),
 532   _restart_for_overflow(false),
 533   _concurrent_marking_in_progress(false),
 534 
 535   // _verbose_level set below
 536 
 537   _init_times(),
 538   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 539   _cleanup_times(),
 540   _total_counting_time(0.0),
 541   _total_rs_scrub_time(0.0),
 542 
 543   _parallel_workers(NULL),
 544 
 545   _count_card_bitmaps(NULL),
 546   _count_marked_bytes(NULL),
 547   _completed_initialization(false) {
 548   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 549   if (verbose_level < no_verbose) {
 550     verbose_level = no_verbose;
 551   }
 552   if (verbose_level > high_verbose) {
 553     verbose_level = high_verbose;
 554   }
 555   _verbose_level = verbose_level;
 556 
 557   if (verbose_low()) {
 558     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 559                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 560   }
 561 
 562   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 563   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 564 
 565   // Create & start a ConcurrentMark thread.
 566   _cmThread = new ConcurrentMarkThread(this);
 567   assert(cmThread() != NULL, "CM Thread should have been created");
 568   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 569   if (_cmThread->osthread() == NULL) {
 570       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 571   }
 572 
 573   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 574   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 575   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 576 
 577   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 578   satb_qs.set_buffer_size(G1SATBBufferSize);
 579 
 580   _root_regions.init(_g1h, this);
 581 
 582   if (ConcGCThreads > ParallelGCThreads) {
 583     warning("Can't have more ConcGCThreads (%u) "
 584             "than ParallelGCThreads (%u).",
 585             ConcGCThreads, ParallelGCThreads);
 586     return;
 587   }
 588   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 589     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 590     // if both are set
 591     _sleep_factor             = 0.0;
 592     _marking_task_overhead    = 1.0;
 593   } else if (G1MarkingOverheadPercent > 0) {
 594     // We will calculate the number of parallel marking threads based
 595     // on a target overhead with respect to the soft real-time goal
 596     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 597     double overall_cm_overhead =
 598       (double) MaxGCPauseMillis * marking_overhead /
 599       (double) GCPauseIntervalMillis;
 600     double cpu_ratio = 1.0 / (double) os::processor_count();
 601     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 602     double marking_task_overhead =
 603       overall_cm_overhead / marking_thread_num *
 604                                               (double) os::processor_count();
 605     double sleep_factor =
 606                        (1.0 - marking_task_overhead) / marking_task_overhead;
 607 
 608     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 609     _sleep_factor             = sleep_factor;
 610     _marking_task_overhead    = marking_task_overhead;
 611   } else {
 612     // Calculate the number of parallel marking threads by scaling
 613     // the number of parallel GC threads.
 614     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 615     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 616     _sleep_factor             = 0.0;
 617     _marking_task_overhead    = 1.0;
 618   }
 619 
 620   assert(ConcGCThreads > 0, "Should have been set");
 621   _parallel_marking_threads = ConcGCThreads;
 622   _max_parallel_marking_threads = _parallel_marking_threads;
 623 
 624   if (parallel_marking_threads() > 1) {
 625     _cleanup_task_overhead = 1.0;
 626   } else {
 627     _cleanup_task_overhead = marking_task_overhead();
 628   }
 629   _cleanup_sleep_factor =
 630                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 631 
 632 #if 0
 633   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 634   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 635   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 636   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 637   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 638 #endif
 639 
 640   _parallel_workers = new FlexibleWorkGang("G1 Marker",
 641        _max_parallel_marking_threads, false, true);
 642   if (_parallel_workers == NULL) {
 643     vm_exit_during_initialization("Failed necessary allocation.");
 644   } else {
 645     _parallel_workers->initialize_workers();
 646   }
 647 
 648   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 649     size_t mark_stack_size =
 650       MIN2(MarkStackSizeMax,
 651           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 652     // Verify that the calculated value for MarkStackSize is in range.
 653     // It would be nice to use the private utility routine from Arguments.
 654     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 655       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 656               "must be between 1 and " SIZE_FORMAT,
 657               mark_stack_size, MarkStackSizeMax);
 658       return;
 659     }
 660     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 661   } else {
 662     // Verify MarkStackSize is in range.
 663     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 664       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 665         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 666           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 667                   "must be between 1 and " SIZE_FORMAT,
 668                   MarkStackSize, MarkStackSizeMax);
 669           return;
 670         }
 671       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 672         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 673           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 674                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 675                   MarkStackSize, MarkStackSizeMax);
 676           return;
 677         }
 678       }
 679     }
 680   }
 681 
 682   if (!_markStack.allocate(MarkStackSize)) {
 683     warning("Failed to allocate CM marking stack");
 684     return;
 685   }
 686 
 687   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 688   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 689 
 690   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 691   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 692 
 693   BitMap::idx_t card_bm_size = _card_bm.size();
 694 
 695   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 696   _active_tasks = _max_worker_id;
 697 
 698   uint max_regions = _g1h->max_regions();
 699   for (uint i = 0; i < _max_worker_id; ++i) {
 700     CMTaskQueue* task_queue = new CMTaskQueue();
 701     task_queue->initialize();
 702     _task_queues->register_queue(i, task_queue);
 703 
 704     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 705     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 706 
 707     _tasks[i] = new CMTask(i, this,
 708                            _count_marked_bytes[i],
 709                            &_count_card_bitmaps[i],
 710                            task_queue, _task_queues);
 711 
 712     _accum_task_vtime[i] = 0.0;
 713   }
 714 
 715   // Calculate the card number for the bottom of the heap. Used
 716   // in biasing indexes into the accounting card bitmaps.
 717   _heap_bottom_card_num =
 718     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 719                                 CardTableModRefBS::card_shift);
 720 
 721   // Clear all the liveness counting data
 722   clear_all_count_data();
 723 
 724   // so that the call below can read a sensible value
 725   _heap_start = g1h->reserved_region().start();
 726   set_non_marking_state();
 727   _completed_initialization = true;
 728 }
 729 
 730 void ConcurrentMark::reset() {
 731   // Starting values for these two. This should be called in a STW
 732   // phase.
 733   MemRegion reserved = _g1h->g1_reserved();
 734   _heap_start = reserved.start();
 735   _heap_end   = reserved.end();
 736 
 737   // Separated the asserts so that we know which one fires.
 738   assert(_heap_start != NULL, "heap bounds should look ok");
 739   assert(_heap_end != NULL, "heap bounds should look ok");
 740   assert(_heap_start < _heap_end, "heap bounds should look ok");
 741 
 742   // Reset all the marking data structures and any necessary flags
 743   reset_marking_state();
 744 
 745   if (verbose_low()) {
 746     gclog_or_tty->print_cr("[global] resetting");
 747   }
 748 
 749   // We do reset all of them, since different phases will use
 750   // different number of active threads. So, it's easiest to have all
 751   // of them ready.
 752   for (uint i = 0; i < _max_worker_id; ++i) {
 753     _tasks[i]->reset(_nextMarkBitMap);
 754   }
 755 
 756   // we need this to make sure that the flag is on during the evac
 757   // pause with initial mark piggy-backed
 758   set_concurrent_marking_in_progress();
 759 }
 760 
 761 
 762 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 763   _markStack.set_should_expand();
 764   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 765   if (clear_overflow) {
 766     clear_has_overflown();
 767   } else {
 768     assert(has_overflown(), "pre-condition");
 769   }
 770   _finger = _heap_start;
 771 
 772   for (uint i = 0; i < _max_worker_id; ++i) {
 773     CMTaskQueue* queue = _task_queues->queue(i);
 774     queue->set_empty();
 775   }
 776 }
 777 
 778 void ConcurrentMark::set_concurrency(uint active_tasks) {
 779   assert(active_tasks <= _max_worker_id, "we should not have more");
 780 
 781   _active_tasks = active_tasks;
 782   // Need to update the three data structures below according to the
 783   // number of active threads for this phase.
 784   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 785   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 786   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 787 }
 788 
 789 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 790   set_concurrency(active_tasks);
 791 
 792   _concurrent = concurrent;
 793   // We propagate this to all tasks, not just the active ones.
 794   for (uint i = 0; i < _max_worker_id; ++i)
 795     _tasks[i]->set_concurrent(concurrent);
 796 
 797   if (concurrent) {
 798     set_concurrent_marking_in_progress();
 799   } else {
 800     // We currently assume that the concurrent flag has been set to
 801     // false before we start remark. At this point we should also be
 802     // in a STW phase.
 803     assert(!concurrent_marking_in_progress(), "invariant");
 804     assert(out_of_regions(),
 805            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 806                    p2i(_finger), p2i(_heap_end)));
 807   }
 808 }
 809 
 810 void ConcurrentMark::set_non_marking_state() {
 811   // We set the global marking state to some default values when we're
 812   // not doing marking.
 813   reset_marking_state();
 814   _active_tasks = 0;
 815   clear_concurrent_marking_in_progress();
 816 }
 817 
 818 ConcurrentMark::~ConcurrentMark() {
 819   // The ConcurrentMark instance is never freed.
 820   ShouldNotReachHere();
 821 }
 822 
 823 void ConcurrentMark::clearNextBitmap() {
 824   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 825 
 826   // Make sure that the concurrent mark thread looks to still be in
 827   // the current cycle.
 828   guarantee(cmThread()->during_cycle(), "invariant");
 829 
 830   // We are finishing up the current cycle by clearing the next
 831   // marking bitmap and getting it ready for the next cycle. During
 832   // this time no other cycle can start. So, let's make sure that this
 833   // is the case.
 834   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 835 
 836   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 837   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 838   _parallel_workers->run_task(&task);
 839 
 840   // Clear the liveness counting data. If the marking has been aborted, the abort()
 841   // call already did that.
 842   if (cl.complete()) {
 843     clear_all_count_data();
 844   }
 845 
 846   // Repeat the asserts from above.
 847   guarantee(cmThread()->during_cycle(), "invariant");
 848   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 849 }
 850 
 851 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 852   CMBitMap* _bitmap;
 853   bool _error;
 854  public:
 855   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 856   }
 857 
 858   virtual bool doHeapRegion(HeapRegion* r) {
 859     // This closure can be called concurrently to the mutator, so we must make sure
 860     // that the result of the getNextMarkedWordAddress() call is compared to the
 861     // value passed to it as limit to detect any found bits.
 862     // We can use the region's orig_end() for the limit and the comparison value
 863     // as it always contains the "real" end of the region that never changes and
 864     // has no side effects.
 865     // Due to the latter, there can also be no problem with the compiler generating
 866     // reloads of the orig_end() call.
 867     HeapWord* end = r->orig_end();
 868     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 869   }
 870 };
 871 
 872 bool ConcurrentMark::nextMarkBitmapIsClear() {
 873   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 874   _g1h->heap_region_iterate(&cl);
 875   return cl.complete();
 876 }
 877 
 878 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 879 public:
 880   bool doHeapRegion(HeapRegion* r) {
 881     if (!r->is_continues_humongous()) {
 882       r->note_start_of_marking();
 883     }
 884     return false;
 885   }
 886 };
 887 
 888 void ConcurrentMark::checkpointRootsInitialPre() {
 889   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 890   G1CollectorPolicy* g1p = g1h->g1_policy();
 891 
 892   _has_aborted = false;
 893 
 894   // Initialize marking structures. This has to be done in a STW phase.
 895   reset();
 896 
 897   // For each region note start of marking.
 898   NoteStartOfMarkHRClosure startcl;
 899   g1h->heap_region_iterate(&startcl);
 900 }
 901 
 902 
 903 void ConcurrentMark::checkpointRootsInitialPost() {
 904   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 905 
 906   // If we force an overflow during remark, the remark operation will
 907   // actually abort and we'll restart concurrent marking. If we always
 908   // force an overflow during remark we'll never actually complete the
 909   // marking phase. So, we initialize this here, at the start of the
 910   // cycle, so that at the remaining overflow number will decrease at
 911   // every remark and we'll eventually not need to cause one.
 912   force_overflow_stw()->init();
 913 
 914   // Start Concurrent Marking weak-reference discovery.
 915   ReferenceProcessor* rp = g1h->ref_processor_cm();
 916   // enable ("weak") refs discovery
 917   rp->enable_discovery();
 918   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 919 
 920   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 921   // This is the start of  the marking cycle, we're expected all
 922   // threads to have SATB queues with active set to false.
 923   satb_mq_set.set_active_all_threads(true, /* new active value */
 924                                      false /* expected_active */);
 925 
 926   _root_regions.prepare_for_scan();
 927 
 928   // update_g1_committed() will be called at the end of an evac pause
 929   // when marking is on. So, it's also called at the end of the
 930   // initial-mark pause to update the heap end, if the heap expands
 931   // during it. No need to call it here.
 932 }
 933 
 934 /*
 935  * Notice that in the next two methods, we actually leave the STS
 936  * during the barrier sync and join it immediately afterwards. If we
 937  * do not do this, the following deadlock can occur: one thread could
 938  * be in the barrier sync code, waiting for the other thread to also
 939  * sync up, whereas another one could be trying to yield, while also
 940  * waiting for the other threads to sync up too.
 941  *
 942  * Note, however, that this code is also used during remark and in
 943  * this case we should not attempt to leave / enter the STS, otherwise
 944  * we'll either hit an assert (debug / fastdebug) or deadlock
 945  * (product). So we should only leave / enter the STS if we are
 946  * operating concurrently.
 947  *
 948  * Because the thread that does the sync barrier has left the STS, it
 949  * is possible to be suspended for a Full GC or an evacuation pause
 950  * could occur. This is actually safe, since the entering the sync
 951  * barrier is one of the last things do_marking_step() does, and it
 952  * doesn't manipulate any data structures afterwards.
 953  */
 954 
 955 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 956   bool barrier_aborted;
 957 
 958   if (verbose_low()) {
 959     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 960   }
 961 
 962   {
 963     SuspendibleThreadSetLeaver sts_leave(concurrent());
 964     barrier_aborted = !_first_overflow_barrier_sync.enter();
 965   }
 966 
 967   // at this point everyone should have synced up and not be doing any
 968   // more work
 969 
 970   if (verbose_low()) {
 971     if (barrier_aborted) {
 972       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 973     } else {
 974       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 975     }
 976   }
 977 
 978   if (barrier_aborted) {
 979     // If the barrier aborted we ignore the overflow condition and
 980     // just abort the whole marking phase as quickly as possible.
 981     return;
 982   }
 983 
 984   // If we're executing the concurrent phase of marking, reset the marking
 985   // state; otherwise the marking state is reset after reference processing,
 986   // during the remark pause.
 987   // If we reset here as a result of an overflow during the remark we will
 988   // see assertion failures from any subsequent set_concurrency_and_phase()
 989   // calls.
 990   if (concurrent()) {
 991     // let the task associated with with worker 0 do this
 992     if (worker_id == 0) {
 993       // task 0 is responsible for clearing the global data structures
 994       // We should be here because of an overflow. During STW we should
 995       // not clear the overflow flag since we rely on it being true when
 996       // we exit this method to abort the pause and restart concurrent
 997       // marking.
 998       reset_marking_state(true /* clear_overflow */);
 999       force_overflow()->update();
1000 
1001       if (G1Log::fine()) {
1002         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1003         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1004       }
1005     }
1006   }
1007 
1008   // after this, each task should reset its own data structures then
1009   // then go into the second barrier
1010 }
1011 
1012 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1013   bool barrier_aborted;
1014 
1015   if (verbose_low()) {
1016     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1017   }
1018 
1019   {
1020     SuspendibleThreadSetLeaver sts_leave(concurrent());
1021     barrier_aborted = !_second_overflow_barrier_sync.enter();
1022   }
1023 
1024   // at this point everything should be re-initialized and ready to go
1025 
1026   if (verbose_low()) {
1027     if (barrier_aborted) {
1028       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1029     } else {
1030       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1031     }
1032   }
1033 }
1034 
1035 #ifndef PRODUCT
1036 void ForceOverflowSettings::init() {
1037   _num_remaining = G1ConcMarkForceOverflow;
1038   _force = false;
1039   update();
1040 }
1041 
1042 void ForceOverflowSettings::update() {
1043   if (_num_remaining > 0) {
1044     _num_remaining -= 1;
1045     _force = true;
1046   } else {
1047     _force = false;
1048   }
1049 }
1050 
1051 bool ForceOverflowSettings::should_force() {
1052   if (_force) {
1053     _force = false;
1054     return true;
1055   } else {
1056     return false;
1057   }
1058 }
1059 #endif // !PRODUCT
1060 
1061 class CMConcurrentMarkingTask: public AbstractGangTask {
1062 private:
1063   ConcurrentMark*       _cm;
1064   ConcurrentMarkThread* _cmt;
1065 
1066 public:
1067   void work(uint worker_id) {
1068     assert(Thread::current()->is_ConcurrentGC_thread(),
1069            "this should only be done by a conc GC thread");
1070     ResourceMark rm;
1071 
1072     double start_vtime = os::elapsedVTime();
1073 
1074     {
1075       SuspendibleThreadSetJoiner sts_join;
1076 
1077       assert(worker_id < _cm->active_tasks(), "invariant");
1078       CMTask* the_task = _cm->task(worker_id);
1079       the_task->record_start_time();
1080       if (!_cm->has_aborted()) {
1081         do {
1082           double start_vtime_sec = os::elapsedVTime();
1083           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1084 
1085           the_task->do_marking_step(mark_step_duration_ms,
1086                                     true  /* do_termination */,
1087                                     false /* is_serial*/);
1088 
1089           double end_vtime_sec = os::elapsedVTime();
1090           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1091           _cm->clear_has_overflown();
1092 
1093           _cm->do_yield_check(worker_id);
1094 
1095           jlong sleep_time_ms;
1096           if (!_cm->has_aborted() && the_task->has_aborted()) {
1097             sleep_time_ms =
1098               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1099             {
1100               SuspendibleThreadSetLeaver sts_leave;
1101               os::sleep(Thread::current(), sleep_time_ms, false);
1102             }
1103           }
1104         } while (!_cm->has_aborted() && the_task->has_aborted());
1105       }
1106       the_task->record_end_time();
1107       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1108     }
1109 
1110     double end_vtime = os::elapsedVTime();
1111     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1112   }
1113 
1114   CMConcurrentMarkingTask(ConcurrentMark* cm,
1115                           ConcurrentMarkThread* cmt) :
1116       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1117 
1118   ~CMConcurrentMarkingTask() { }
1119 };
1120 
1121 // Calculates the number of active workers for a concurrent
1122 // phase.
1123 uint ConcurrentMark::calc_parallel_marking_threads() {
1124   uint n_conc_workers = 0;
1125   if (!UseDynamicNumberOfGCThreads ||
1126       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1127        !ForceDynamicNumberOfGCThreads)) {
1128     n_conc_workers = max_parallel_marking_threads();
1129   } else {
1130     n_conc_workers =
1131       AdaptiveSizePolicy::calc_default_active_workers(
1132                                    max_parallel_marking_threads(),
1133                                    1, /* Minimum workers */
1134                                    parallel_marking_threads(),
1135                                    Threads::number_of_non_daemon_threads());
1136     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1137     // that scaling has already gone into "_max_parallel_marking_threads".
1138   }
1139   assert(n_conc_workers > 0, "Always need at least 1");
1140   return n_conc_workers;
1141 }
1142 
1143 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1144   // Currently, only survivors can be root regions.
1145   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1146   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1147 
1148   const uintx interval = PrefetchScanIntervalInBytes;
1149   HeapWord* curr = hr->bottom();
1150   const HeapWord* end = hr->top();
1151   while (curr < end) {
1152     Prefetch::read(curr, interval);
1153     oop obj = oop(curr);
1154     int size = obj->oop_iterate(&cl);
1155     assert(size == obj->size(), "sanity");
1156     curr += size;
1157   }
1158 }
1159 
1160 class CMRootRegionScanTask : public AbstractGangTask {
1161 private:
1162   ConcurrentMark* _cm;
1163 
1164 public:
1165   CMRootRegionScanTask(ConcurrentMark* cm) :
1166     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1167 
1168   void work(uint worker_id) {
1169     assert(Thread::current()->is_ConcurrentGC_thread(),
1170            "this should only be done by a conc GC thread");
1171 
1172     CMRootRegions* root_regions = _cm->root_regions();
1173     HeapRegion* hr = root_regions->claim_next();
1174     while (hr != NULL) {
1175       _cm->scanRootRegion(hr, worker_id);
1176       hr = root_regions->claim_next();
1177     }
1178   }
1179 };
1180 
1181 void ConcurrentMark::scanRootRegions() {
1182   // Start of concurrent marking.
1183   ClassLoaderDataGraph::clear_claimed_marks();
1184 
1185   // scan_in_progress() will have been set to true only if there was
1186   // at least one root region to scan. So, if it's false, we
1187   // should not attempt to do any further work.
1188   if (root_regions()->scan_in_progress()) {
1189     _parallel_marking_threads = calc_parallel_marking_threads();
1190     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1191            "Maximum number of marking threads exceeded");
1192     uint active_workers = MAX2(1U, parallel_marking_threads());
1193 
1194     CMRootRegionScanTask task(this);
1195     _parallel_workers->set_active_workers(active_workers);
1196     _parallel_workers->run_task(&task);
1197 
1198     // It's possible that has_aborted() is true here without actually
1199     // aborting the survivor scan earlier. This is OK as it's
1200     // mainly used for sanity checking.
1201     root_regions()->scan_finished();
1202   }
1203 }
1204 
1205 void ConcurrentMark::markFromRoots() {
1206   // we might be tempted to assert that:
1207   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1208   //        "inconsistent argument?");
1209   // However that wouldn't be right, because it's possible that
1210   // a safepoint is indeed in progress as a younger generation
1211   // stop-the-world GC happens even as we mark in this generation.
1212 
1213   _restart_for_overflow = false;
1214   force_overflow_conc()->init();
1215 
1216   // _g1h has _n_par_threads
1217   _parallel_marking_threads = calc_parallel_marking_threads();
1218   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1219     "Maximum number of marking threads exceeded");
1220 
1221   uint active_workers = MAX2(1U, parallel_marking_threads());
1222   assert(active_workers > 0, "Should have been set");
1223 
1224   // Parallel task terminator is set in "set_concurrency_and_phase()"
1225   set_concurrency_and_phase(active_workers, true /* concurrent */);
1226 
1227   CMConcurrentMarkingTask markingTask(this, cmThread());
1228   _parallel_workers->set_active_workers(active_workers);
1229   _parallel_workers->run_task(&markingTask);
1230   print_stats();
1231 }
1232 
1233 // Helper class to get rid of some boilerplate code.
1234 class G1CMTraceTime : public GCTraceTime {
1235   static bool doit_and_prepend(bool doit) {
1236     if (doit) {
1237       gclog_or_tty->put(' ');
1238     }
1239     return doit;
1240   }
1241 
1242  public:
1243   G1CMTraceTime(const char* title, bool doit)
1244     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1245         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1246   }
1247 };
1248 
1249 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1250   // world is stopped at this checkpoint
1251   assert(SafepointSynchronize::is_at_safepoint(),
1252          "world should be stopped");
1253 
1254   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1255 
1256   // If a full collection has happened, we shouldn't do this.
1257   if (has_aborted()) {
1258     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1259     return;
1260   }
1261 
1262   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1263 
1264   if (VerifyDuringGC) {
1265     HandleMark hm;  // handle scope
1266     g1h->prepare_for_verify();
1267     Universe::verify(VerifyOption_G1UsePrevMarking,
1268                      " VerifyDuringGC:(before)");
1269   }
1270   g1h->check_bitmaps("Remark Start");
1271 
1272   G1CollectorPolicy* g1p = g1h->g1_policy();
1273   g1p->record_concurrent_mark_remark_start();
1274 
1275   double start = os::elapsedTime();
1276 
1277   checkpointRootsFinalWork();
1278 
1279   double mark_work_end = os::elapsedTime();
1280 
1281   weakRefsWork(clear_all_soft_refs);
1282 
1283   if (has_overflown()) {
1284     // Oops.  We overflowed.  Restart concurrent marking.
1285     _restart_for_overflow = true;
1286     if (G1TraceMarkStackOverflow) {
1287       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1288     }
1289 
1290     // Verify the heap w.r.t. the previous marking bitmap.
1291     if (VerifyDuringGC) {
1292       HandleMark hm;  // handle scope
1293       g1h->prepare_for_verify();
1294       Universe::verify(VerifyOption_G1UsePrevMarking,
1295                        " VerifyDuringGC:(overflow)");
1296     }
1297 
1298     // Clear the marking state because we will be restarting
1299     // marking due to overflowing the global mark stack.
1300     reset_marking_state();
1301   } else {
1302     {
1303       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1304 
1305       // Aggregate the per-task counting data that we have accumulated
1306       // while marking.
1307       aggregate_count_data();
1308     }
1309 
1310     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1311     // We're done with marking.
1312     // This is the end of  the marking cycle, we're expected all
1313     // threads to have SATB queues with active set to true.
1314     satb_mq_set.set_active_all_threads(false, /* new active value */
1315                                        true /* expected_active */);
1316 
1317     if (VerifyDuringGC) {
1318       HandleMark hm;  // handle scope
1319       g1h->prepare_for_verify();
1320       Universe::verify(VerifyOption_G1UseNextMarking,
1321                        " VerifyDuringGC:(after)");
1322     }
1323     g1h->check_bitmaps("Remark End");
1324     assert(!restart_for_overflow(), "sanity");
1325     // Completely reset the marking state since marking completed
1326     set_non_marking_state();
1327   }
1328 
1329   // Expand the marking stack, if we have to and if we can.
1330   if (_markStack.should_expand()) {
1331     _markStack.expand();
1332   }
1333 
1334   // Statistics
1335   double now = os::elapsedTime();
1336   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1337   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1338   _remark_times.add((now - start) * 1000.0);
1339 
1340   g1p->record_concurrent_mark_remark_end();
1341 
1342   G1CMIsAliveClosure is_alive(g1h);
1343   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1344 }
1345 
1346 // Base class of the closures that finalize and verify the
1347 // liveness counting data.
1348 class CMCountDataClosureBase: public HeapRegionClosure {
1349 protected:
1350   G1CollectedHeap* _g1h;
1351   ConcurrentMark* _cm;
1352   CardTableModRefBS* _ct_bs;
1353 
1354   BitMap* _region_bm;
1355   BitMap* _card_bm;
1356 
1357   // Takes a region that's not empty (i.e., it has at least one
1358   // live object in it and sets its corresponding bit on the region
1359   // bitmap to 1. If the region is "starts humongous" it will also set
1360   // to 1 the bits on the region bitmap that correspond to its
1361   // associated "continues humongous" regions.
1362   void set_bit_for_region(HeapRegion* hr) {
1363     assert(!hr->is_continues_humongous(), "should have filtered those out");
1364 
1365     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1366     if (!hr->is_starts_humongous()) {
1367       // Normal (non-humongous) case: just set the bit.
1368       _region_bm->par_at_put(index, true);
1369     } else {
1370       // Starts humongous case: calculate how many regions are part of
1371       // this humongous region and then set the bit range.
1372       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1373       _region_bm->par_at_put_range(index, end_index, true);
1374     }
1375   }
1376 
1377 public:
1378   CMCountDataClosureBase(G1CollectedHeap* g1h,
1379                          BitMap* region_bm, BitMap* card_bm):
1380     _g1h(g1h), _cm(g1h->concurrent_mark()),
1381     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1382     _region_bm(region_bm), _card_bm(card_bm) { }
1383 };
1384 
1385 // Closure that calculates the # live objects per region. Used
1386 // for verification purposes during the cleanup pause.
1387 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1388   CMBitMapRO* _bm;
1389   size_t _region_marked_bytes;
1390 
1391 public:
1392   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1393                          BitMap* region_bm, BitMap* card_bm) :
1394     CMCountDataClosureBase(g1h, region_bm, card_bm),
1395     _bm(bm), _region_marked_bytes(0) { }
1396 
1397   bool doHeapRegion(HeapRegion* hr) {
1398 
1399     if (hr->is_continues_humongous()) {
1400       // We will ignore these here and process them when their
1401       // associated "starts humongous" region is processed (see
1402       // set_bit_for_heap_region()). Note that we cannot rely on their
1403       // associated "starts humongous" region to have their bit set to
1404       // 1 since, due to the region chunking in the parallel region
1405       // iteration, a "continues humongous" region might be visited
1406       // before its associated "starts humongous".
1407       return false;
1408     }
1409 
1410     HeapWord* ntams = hr->next_top_at_mark_start();
1411     HeapWord* start = hr->bottom();
1412 
1413     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1414            err_msg("Preconditions not met - "
1415                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1416                    p2i(start), p2i(ntams), p2i(hr->end())));
1417 
1418     // Find the first marked object at or after "start".
1419     start = _bm->getNextMarkedWordAddress(start, ntams);
1420 
1421     size_t marked_bytes = 0;
1422 
1423     while (start < ntams) {
1424       oop obj = oop(start);
1425       int obj_sz = obj->size();
1426       HeapWord* obj_end = start + obj_sz;
1427 
1428       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1429       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1430 
1431       // Note: if we're looking at the last region in heap - obj_end
1432       // could be actually just beyond the end of the heap; end_idx
1433       // will then correspond to a (non-existent) card that is also
1434       // just beyond the heap.
1435       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1436         // end of object is not card aligned - increment to cover
1437         // all the cards spanned by the object
1438         end_idx += 1;
1439       }
1440 
1441       // Set the bits in the card BM for the cards spanned by this object.
1442       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1443 
1444       // Add the size of this object to the number of marked bytes.
1445       marked_bytes += (size_t)obj_sz * HeapWordSize;
1446 
1447       // Find the next marked object after this one.
1448       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1449     }
1450 
1451     // Mark the allocated-since-marking portion...
1452     HeapWord* top = hr->top();
1453     if (ntams < top) {
1454       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1455       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1456 
1457       // Note: if we're looking at the last region in heap - top
1458       // could be actually just beyond the end of the heap; end_idx
1459       // will then correspond to a (non-existent) card that is also
1460       // just beyond the heap.
1461       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1462         // end of object is not card aligned - increment to cover
1463         // all the cards spanned by the object
1464         end_idx += 1;
1465       }
1466       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1467 
1468       // This definitely means the region has live objects.
1469       set_bit_for_region(hr);
1470     }
1471 
1472     // Update the live region bitmap.
1473     if (marked_bytes > 0) {
1474       set_bit_for_region(hr);
1475     }
1476 
1477     // Set the marked bytes for the current region so that
1478     // it can be queried by a calling verification routine
1479     _region_marked_bytes = marked_bytes;
1480 
1481     return false;
1482   }
1483 
1484   size_t region_marked_bytes() const { return _region_marked_bytes; }
1485 };
1486 
1487 // Heap region closure used for verifying the counting data
1488 // that was accumulated concurrently and aggregated during
1489 // the remark pause. This closure is applied to the heap
1490 // regions during the STW cleanup pause.
1491 
1492 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1493   G1CollectedHeap* _g1h;
1494   ConcurrentMark* _cm;
1495   CalcLiveObjectsClosure _calc_cl;
1496   BitMap* _region_bm;   // Region BM to be verified
1497   BitMap* _card_bm;     // Card BM to be verified
1498   bool _verbose;        // verbose output?
1499 
1500   BitMap* _exp_region_bm; // Expected Region BM values
1501   BitMap* _exp_card_bm;   // Expected card BM values
1502 
1503   int _failures;
1504 
1505 public:
1506   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1507                                 BitMap* region_bm,
1508                                 BitMap* card_bm,
1509                                 BitMap* exp_region_bm,
1510                                 BitMap* exp_card_bm,
1511                                 bool verbose) :
1512     _g1h(g1h), _cm(g1h->concurrent_mark()),
1513     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1514     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1515     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1516     _failures(0) { }
1517 
1518   int failures() const { return _failures; }
1519 
1520   bool doHeapRegion(HeapRegion* hr) {
1521     if (hr->is_continues_humongous()) {
1522       // We will ignore these here and process them when their
1523       // associated "starts humongous" region is processed (see
1524       // set_bit_for_heap_region()). Note that we cannot rely on their
1525       // associated "starts humongous" region to have their bit set to
1526       // 1 since, due to the region chunking in the parallel region
1527       // iteration, a "continues humongous" region might be visited
1528       // before its associated "starts humongous".
1529       return false;
1530     }
1531 
1532     int failures = 0;
1533 
1534     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1535     // this region and set the corresponding bits in the expected region
1536     // and card bitmaps.
1537     bool res = _calc_cl.doHeapRegion(hr);
1538     assert(res == false, "should be continuing");
1539 
1540     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1541                     Mutex::_no_safepoint_check_flag);
1542 
1543     // Verify the marked bytes for this region.
1544     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1545     size_t act_marked_bytes = hr->next_marked_bytes();
1546 
1547     // We're not OK if expected marked bytes > actual marked bytes. It means
1548     // we have missed accounting some objects during the actual marking.
1549     if (exp_marked_bytes > act_marked_bytes) {
1550       if (_verbose) {
1551         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1552                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1553                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1554       }
1555       failures += 1;
1556     }
1557 
1558     // Verify the bit, for this region, in the actual and expected
1559     // (which was just calculated) region bit maps.
1560     // We're not OK if the bit in the calculated expected region
1561     // bitmap is set and the bit in the actual region bitmap is not.
1562     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1563 
1564     bool expected = _exp_region_bm->at(index);
1565     bool actual = _region_bm->at(index);
1566     if (expected && !actual) {
1567       if (_verbose) {
1568         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1569                                "expected: %s, actual: %s",
1570                                hr->hrm_index(),
1571                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1572       }
1573       failures += 1;
1574     }
1575 
1576     // Verify that the card bit maps for the cards spanned by the current
1577     // region match. We have an error if we have a set bit in the expected
1578     // bit map and the corresponding bit in the actual bitmap is not set.
1579 
1580     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1581     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1582 
1583     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1584       expected = _exp_card_bm->at(i);
1585       actual = _card_bm->at(i);
1586 
1587       if (expected && !actual) {
1588         if (_verbose) {
1589           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1590                                  "expected: %s, actual: %s",
1591                                  hr->hrm_index(), i,
1592                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1593         }
1594         failures += 1;
1595       }
1596     }
1597 
1598     if (failures > 0 && _verbose)  {
1599       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1600                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1601                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1602                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1603     }
1604 
1605     _failures += failures;
1606 
1607     // We could stop iteration over the heap when we
1608     // find the first violating region by returning true.
1609     return false;
1610   }
1611 };
1612 
1613 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1614 protected:
1615   G1CollectedHeap* _g1h;
1616   ConcurrentMark* _cm;
1617   BitMap* _actual_region_bm;
1618   BitMap* _actual_card_bm;
1619 
1620   uint    _n_workers;
1621 
1622   BitMap* _expected_region_bm;
1623   BitMap* _expected_card_bm;
1624 
1625   int  _failures;
1626   bool _verbose;
1627 
1628   HeapRegionClaimer _hrclaimer;
1629 
1630 public:
1631   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1632                             BitMap* region_bm, BitMap* card_bm,
1633                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1634     : AbstractGangTask("G1 verify final counting"),
1635       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1636       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1637       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1638       _failures(0), _verbose(false),
1639       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1640     assert(VerifyDuringGC, "don't call this otherwise");
1641     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1642     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1643 
1644     _verbose = _cm->verbose_medium();
1645   }
1646 
1647   void work(uint worker_id) {
1648     assert(worker_id < _n_workers, "invariant");
1649 
1650     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1651                                             _actual_region_bm, _actual_card_bm,
1652                                             _expected_region_bm,
1653                                             _expected_card_bm,
1654                                             _verbose);
1655 
1656     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1657 
1658     Atomic::add(verify_cl.failures(), &_failures);
1659   }
1660 
1661   int failures() const { return _failures; }
1662 };
1663 
1664 // Closure that finalizes the liveness counting data.
1665 // Used during the cleanup pause.
1666 // Sets the bits corresponding to the interval [NTAMS, top]
1667 // (which contains the implicitly live objects) in the
1668 // card liveness bitmap. Also sets the bit for each region,
1669 // containing live data, in the region liveness bitmap.
1670 
1671 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1672  public:
1673   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1674                               BitMap* region_bm,
1675                               BitMap* card_bm) :
1676     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1677 
1678   bool doHeapRegion(HeapRegion* hr) {
1679 
1680     if (hr->is_continues_humongous()) {
1681       // We will ignore these here and process them when their
1682       // associated "starts humongous" region is processed (see
1683       // set_bit_for_heap_region()). Note that we cannot rely on their
1684       // associated "starts humongous" region to have their bit set to
1685       // 1 since, due to the region chunking in the parallel region
1686       // iteration, a "continues humongous" region might be visited
1687       // before its associated "starts humongous".
1688       return false;
1689     }
1690 
1691     HeapWord* ntams = hr->next_top_at_mark_start();
1692     HeapWord* top   = hr->top();
1693 
1694     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1695 
1696     // Mark the allocated-since-marking portion...
1697     if (ntams < top) {
1698       // This definitely means the region has live objects.
1699       set_bit_for_region(hr);
1700 
1701       // Now set the bits in the card bitmap for [ntams, top)
1702       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1703       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1704 
1705       // Note: if we're looking at the last region in heap - top
1706       // could be actually just beyond the end of the heap; end_idx
1707       // will then correspond to a (non-existent) card that is also
1708       // just beyond the heap.
1709       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1710         // end of object is not card aligned - increment to cover
1711         // all the cards spanned by the object
1712         end_idx += 1;
1713       }
1714 
1715       assert(end_idx <= _card_bm->size(),
1716              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1717                      end_idx, _card_bm->size()));
1718       assert(start_idx < _card_bm->size(),
1719              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1720                      start_idx, _card_bm->size()));
1721 
1722       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1723     }
1724 
1725     // Set the bit for the region if it contains live data
1726     if (hr->next_marked_bytes() > 0) {
1727       set_bit_for_region(hr);
1728     }
1729 
1730     return false;
1731   }
1732 };
1733 
1734 class G1ParFinalCountTask: public AbstractGangTask {
1735 protected:
1736   G1CollectedHeap* _g1h;
1737   ConcurrentMark* _cm;
1738   BitMap* _actual_region_bm;
1739   BitMap* _actual_card_bm;
1740 
1741   uint    _n_workers;
1742   HeapRegionClaimer _hrclaimer;
1743 
1744 public:
1745   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1746     : AbstractGangTask("G1 final counting"),
1747       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1748       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1749       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1750   }
1751 
1752   void work(uint worker_id) {
1753     assert(worker_id < _n_workers, "invariant");
1754 
1755     FinalCountDataUpdateClosure final_update_cl(_g1h,
1756                                                 _actual_region_bm,
1757                                                 _actual_card_bm);
1758 
1759     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1760   }
1761 };
1762 
1763 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1764   G1CollectedHeap* _g1;
1765   size_t _freed_bytes;
1766   FreeRegionList* _local_cleanup_list;
1767   HeapRegionSetCount _old_regions_removed;
1768   HeapRegionSetCount _humongous_regions_removed;
1769   HRRSCleanupTask* _hrrs_cleanup_task;
1770 
1771 public:
1772   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1773                              FreeRegionList* local_cleanup_list,
1774                              HRRSCleanupTask* hrrs_cleanup_task) :
1775     _g1(g1),
1776     _freed_bytes(0),
1777     _local_cleanup_list(local_cleanup_list),
1778     _old_regions_removed(),
1779     _humongous_regions_removed(),
1780     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1781 
1782   size_t freed_bytes() { return _freed_bytes; }
1783   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1784   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1785 
1786   bool doHeapRegion(HeapRegion *hr) {
1787     if (hr->is_continues_humongous() || hr->is_archive()) {
1788       return false;
1789     }
1790     // We use a claim value of zero here because all regions
1791     // were claimed with value 1 in the FinalCount task.
1792     _g1->reset_gc_time_stamps(hr);
1793     hr->note_end_of_marking();
1794 
1795     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1796       _freed_bytes += hr->used();
1797       hr->set_containing_set(NULL);
1798       if (hr->is_humongous()) {
1799         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1800         _humongous_regions_removed.increment(1u, hr->capacity());
1801         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1802       } else {
1803         _old_regions_removed.increment(1u, hr->capacity());
1804         _g1->free_region(hr, _local_cleanup_list, true);
1805       }
1806     } else {
1807       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1808     }
1809 
1810     return false;
1811   }
1812 };
1813 
1814 class G1ParNoteEndTask: public AbstractGangTask {
1815   friend class G1NoteEndOfConcMarkClosure;
1816 
1817 protected:
1818   G1CollectedHeap* _g1h;
1819   FreeRegionList* _cleanup_list;
1820   HeapRegionClaimer _hrclaimer;
1821 
1822 public:
1823   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1824       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1825   }
1826 
1827   void work(uint worker_id) {
1828     FreeRegionList local_cleanup_list("Local Cleanup List");
1829     HRRSCleanupTask hrrs_cleanup_task;
1830     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1831                                            &hrrs_cleanup_task);
1832     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1833     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1834 
1835     // Now update the lists
1836     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1837     {
1838       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1839       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1840 
1841       // If we iterate over the global cleanup list at the end of
1842       // cleanup to do this printing we will not guarantee to only
1843       // generate output for the newly-reclaimed regions (the list
1844       // might not be empty at the beginning of cleanup; we might
1845       // still be working on its previous contents). So we do the
1846       // printing here, before we append the new regions to the global
1847       // cleanup list.
1848 
1849       G1HRPrinter* hr_printer = _g1h->hr_printer();
1850       if (hr_printer->is_active()) {
1851         FreeRegionListIterator iter(&local_cleanup_list);
1852         while (iter.more_available()) {
1853           HeapRegion* hr = iter.get_next();
1854           hr_printer->cleanup(hr);
1855         }
1856       }
1857 
1858       _cleanup_list->add_ordered(&local_cleanup_list);
1859       assert(local_cleanup_list.is_empty(), "post-condition");
1860 
1861       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1862     }
1863   }
1864 };
1865 
1866 class G1ParScrubRemSetTask: public AbstractGangTask {
1867 protected:
1868   G1RemSet* _g1rs;
1869   BitMap* _region_bm;
1870   BitMap* _card_bm;
1871   HeapRegionClaimer _hrclaimer;
1872 
1873 public:
1874   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1875       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1876   }
1877 
1878   void work(uint worker_id) {
1879     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1880   }
1881 
1882 };
1883 
1884 void ConcurrentMark::cleanup() {
1885   // world is stopped at this checkpoint
1886   assert(SafepointSynchronize::is_at_safepoint(),
1887          "world should be stopped");
1888   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1889 
1890   // If a full collection has happened, we shouldn't do this.
1891   if (has_aborted()) {
1892     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1893     return;
1894   }
1895 
1896   g1h->verify_region_sets_optional();
1897 
1898   if (VerifyDuringGC) {
1899     HandleMark hm;  // handle scope
1900     g1h->prepare_for_verify();
1901     Universe::verify(VerifyOption_G1UsePrevMarking,
1902                      " VerifyDuringGC:(before)");
1903   }
1904   g1h->check_bitmaps("Cleanup Start");
1905 
1906   G1CollectorPolicy* g1p = g1h->g1_policy();
1907   g1p->record_concurrent_mark_cleanup_start();
1908 
1909   double start = os::elapsedTime();
1910 
1911   HeapRegionRemSet::reset_for_cleanup_tasks();
1912 
1913   // Do counting once more with the world stopped for good measure.
1914   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1915 
1916   g1h->workers()->run_task(&g1_par_count_task);
1917 
1918   if (VerifyDuringGC) {
1919     // Verify that the counting data accumulated during marking matches
1920     // that calculated by walking the marking bitmap.
1921 
1922     // Bitmaps to hold expected values
1923     BitMap expected_region_bm(_region_bm.size(), true);
1924     BitMap expected_card_bm(_card_bm.size(), true);
1925 
1926     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1927                                                  &_region_bm,
1928                                                  &_card_bm,
1929                                                  &expected_region_bm,
1930                                                  &expected_card_bm);
1931 
1932     g1h->workers()->run_task(&g1_par_verify_task);
1933 
1934     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1935   }
1936 
1937   size_t start_used_bytes = g1h->used();
1938   g1h->collector_state()->set_mark_in_progress(false);
1939 
1940   double count_end = os::elapsedTime();
1941   double this_final_counting_time = (count_end - start);
1942   _total_counting_time += this_final_counting_time;
1943 
1944   if (G1PrintRegionLivenessInfo) {
1945     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1946     _g1h->heap_region_iterate(&cl);
1947   }
1948 
1949   // Install newly created mark bitMap as "prev".
1950   swapMarkBitMaps();
1951 
1952   g1h->reset_gc_time_stamp();
1953 
1954   uint n_workers = _g1h->workers()->active_workers();
1955 
1956   // Note end of marking in all heap regions.
1957   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1958   g1h->workers()->run_task(&g1_par_note_end_task);
1959   g1h->check_gc_time_stamps();
1960 
1961   if (!cleanup_list_is_empty()) {
1962     // The cleanup list is not empty, so we'll have to process it
1963     // concurrently. Notify anyone else that might be wanting free
1964     // regions that there will be more free regions coming soon.
1965     g1h->set_free_regions_coming();
1966   }
1967 
1968   // call below, since it affects the metric by which we sort the heap
1969   // regions.
1970   if (G1ScrubRemSets) {
1971     double rs_scrub_start = os::elapsedTime();
1972     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1973     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1974 
1975     double rs_scrub_end = os::elapsedTime();
1976     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1977     _total_rs_scrub_time += this_rs_scrub_time;
1978   }
1979 
1980   // this will also free any regions totally full of garbage objects,
1981   // and sort the regions.
1982   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1983 
1984   // Statistics.
1985   double end = os::elapsedTime();
1986   _cleanup_times.add((end - start) * 1000.0);
1987 
1988   if (G1Log::fine()) {
1989     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1990   }
1991 
1992   // Clean up will have freed any regions completely full of garbage.
1993   // Update the soft reference policy with the new heap occupancy.
1994   Universe::update_heap_info_at_gc();
1995 
1996   if (VerifyDuringGC) {
1997     HandleMark hm;  // handle scope
1998     g1h->prepare_for_verify();
1999     Universe::verify(VerifyOption_G1UsePrevMarking,
2000                      " VerifyDuringGC:(after)");
2001   }
2002 
2003   g1h->check_bitmaps("Cleanup End");
2004 
2005   g1h->verify_region_sets_optional();
2006 
2007   // We need to make this be a "collection" so any collection pause that
2008   // races with it goes around and waits for completeCleanup to finish.
2009   g1h->increment_total_collections();
2010 
2011   // Clean out dead classes and update Metaspace sizes.
2012   if (ClassUnloadingWithConcurrentMark) {
2013     ClassLoaderDataGraph::purge();
2014   }
2015   MetaspaceGC::compute_new_size();
2016 
2017   // We reclaimed old regions so we should calculate the sizes to make
2018   // sure we update the old gen/space data.
2019   g1h->g1mm()->update_sizes();
2020   g1h->allocation_context_stats().update_after_mark();
2021 
2022   g1h->trace_heap_after_concurrent_cycle();
2023 }
2024 
2025 void ConcurrentMark::completeCleanup() {
2026   if (has_aborted()) return;
2027 
2028   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2029 
2030   _cleanup_list.verify_optional();
2031   FreeRegionList tmp_free_list("Tmp Free List");
2032 
2033   if (G1ConcRegionFreeingVerbose) {
2034     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2035                            "cleanup list has %u entries",
2036                            _cleanup_list.length());
2037   }
2038 
2039   // No one else should be accessing the _cleanup_list at this point,
2040   // so it is not necessary to take any locks
2041   while (!_cleanup_list.is_empty()) {
2042     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2043     assert(hr != NULL, "Got NULL from a non-empty list");
2044     hr->par_clear();
2045     tmp_free_list.add_ordered(hr);
2046 
2047     // Instead of adding one region at a time to the secondary_free_list,
2048     // we accumulate them in the local list and move them a few at a
2049     // time. This also cuts down on the number of notify_all() calls
2050     // we do during this process. We'll also append the local list when
2051     // _cleanup_list is empty (which means we just removed the last
2052     // region from the _cleanup_list).
2053     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2054         _cleanup_list.is_empty()) {
2055       if (G1ConcRegionFreeingVerbose) {
2056         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2057                                "appending %u entries to the secondary_free_list, "
2058                                "cleanup list still has %u entries",
2059                                tmp_free_list.length(),
2060                                _cleanup_list.length());
2061       }
2062 
2063       {
2064         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2065         g1h->secondary_free_list_add(&tmp_free_list);
2066         SecondaryFreeList_lock->notify_all();
2067       }
2068 #ifndef PRODUCT
2069       if (G1StressConcRegionFreeing) {
2070         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2071           os::sleep(Thread::current(), (jlong) 1, false);
2072         }
2073       }
2074 #endif
2075     }
2076   }
2077   assert(tmp_free_list.is_empty(), "post-condition");
2078 }
2079 
2080 // Supporting Object and Oop closures for reference discovery
2081 // and processing in during marking
2082 
2083 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2084   HeapWord* addr = (HeapWord*)obj;
2085   return addr != NULL &&
2086          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2087 }
2088 
2089 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2090 // Uses the CMTask associated with a worker thread (for serial reference
2091 // processing the CMTask for worker 0 is used) to preserve (mark) and
2092 // trace referent objects.
2093 //
2094 // Using the CMTask and embedded local queues avoids having the worker
2095 // threads operating on the global mark stack. This reduces the risk
2096 // of overflowing the stack - which we would rather avoid at this late
2097 // state. Also using the tasks' local queues removes the potential
2098 // of the workers interfering with each other that could occur if
2099 // operating on the global stack.
2100 
2101 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2102   ConcurrentMark* _cm;
2103   CMTask*         _task;
2104   int             _ref_counter_limit;
2105   int             _ref_counter;
2106   bool            _is_serial;
2107  public:
2108   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2109     _cm(cm), _task(task), _is_serial(is_serial),
2110     _ref_counter_limit(G1RefProcDrainInterval) {
2111     assert(_ref_counter_limit > 0, "sanity");
2112     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2113     _ref_counter = _ref_counter_limit;
2114   }
2115 
2116   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2117   virtual void do_oop(      oop* p) { do_oop_work(p); }
2118 
2119   template <class T> void do_oop_work(T* p) {
2120     if (!_cm->has_overflown()) {
2121       oop obj = oopDesc::load_decode_heap_oop(p);
2122       if (_cm->verbose_high()) {
2123         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2124                                "*"PTR_FORMAT" = "PTR_FORMAT,
2125                                _task->worker_id(), p2i(p), p2i((void*) obj));
2126       }
2127 
2128       _task->deal_with_reference(obj);
2129       _ref_counter--;
2130 
2131       if (_ref_counter == 0) {
2132         // We have dealt with _ref_counter_limit references, pushing them
2133         // and objects reachable from them on to the local stack (and
2134         // possibly the global stack). Call CMTask::do_marking_step() to
2135         // process these entries.
2136         //
2137         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2138         // there's nothing more to do (i.e. we're done with the entries that
2139         // were pushed as a result of the CMTask::deal_with_reference() calls
2140         // above) or we overflow.
2141         //
2142         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2143         // flag while there may still be some work to do. (See the comment at
2144         // the beginning of CMTask::do_marking_step() for those conditions -
2145         // one of which is reaching the specified time target.) It is only
2146         // when CMTask::do_marking_step() returns without setting the
2147         // has_aborted() flag that the marking step has completed.
2148         do {
2149           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2150           _task->do_marking_step(mark_step_duration_ms,
2151                                  false      /* do_termination */,
2152                                  _is_serial);
2153         } while (_task->has_aborted() && !_cm->has_overflown());
2154         _ref_counter = _ref_counter_limit;
2155       }
2156     } else {
2157       if (_cm->verbose_high()) {
2158          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2159       }
2160     }
2161   }
2162 };
2163 
2164 // 'Drain' oop closure used by both serial and parallel reference processing.
2165 // Uses the CMTask associated with a given worker thread (for serial
2166 // reference processing the CMtask for worker 0 is used). Calls the
2167 // do_marking_step routine, with an unbelievably large timeout value,
2168 // to drain the marking data structures of the remaining entries
2169 // added by the 'keep alive' oop closure above.
2170 
2171 class G1CMDrainMarkingStackClosure: public VoidClosure {
2172   ConcurrentMark* _cm;
2173   CMTask*         _task;
2174   bool            _is_serial;
2175  public:
2176   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2177     _cm(cm), _task(task), _is_serial(is_serial) {
2178     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2179   }
2180 
2181   void do_void() {
2182     do {
2183       if (_cm->verbose_high()) {
2184         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2185                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2186       }
2187 
2188       // We call CMTask::do_marking_step() to completely drain the local
2189       // and global marking stacks of entries pushed by the 'keep alive'
2190       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2191       //
2192       // CMTask::do_marking_step() is called in a loop, which we'll exit
2193       // if there's nothing more to do (i.e. we've completely drained the
2194       // entries that were pushed as a a result of applying the 'keep alive'
2195       // closure to the entries on the discovered ref lists) or we overflow
2196       // the global marking stack.
2197       //
2198       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2199       // flag while there may still be some work to do. (See the comment at
2200       // the beginning of CMTask::do_marking_step() for those conditions -
2201       // one of which is reaching the specified time target.) It is only
2202       // when CMTask::do_marking_step() returns without setting the
2203       // has_aborted() flag that the marking step has completed.
2204 
2205       _task->do_marking_step(1000000000.0 /* something very large */,
2206                              true         /* do_termination */,
2207                              _is_serial);
2208     } while (_task->has_aborted() && !_cm->has_overflown());
2209   }
2210 };
2211 
2212 // Implementation of AbstractRefProcTaskExecutor for parallel
2213 // reference processing at the end of G1 concurrent marking
2214 
2215 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2216 private:
2217   G1CollectedHeap* _g1h;
2218   ConcurrentMark*  _cm;
2219   WorkGang*        _workers;
2220   uint             _active_workers;
2221 
2222 public:
2223   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2224                           ConcurrentMark* cm,
2225                           WorkGang* workers,
2226                           uint n_workers) :
2227     _g1h(g1h), _cm(cm),
2228     _workers(workers), _active_workers(n_workers) { }
2229 
2230   // Executes the given task using concurrent marking worker threads.
2231   virtual void execute(ProcessTask& task);
2232   virtual void execute(EnqueueTask& task);
2233 };
2234 
2235 class G1CMRefProcTaskProxy: public AbstractGangTask {
2236   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2237   ProcessTask&     _proc_task;
2238   G1CollectedHeap* _g1h;
2239   ConcurrentMark*  _cm;
2240 
2241 public:
2242   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2243                      G1CollectedHeap* g1h,
2244                      ConcurrentMark* cm) :
2245     AbstractGangTask("Process reference objects in parallel"),
2246     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2247     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2248     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2249   }
2250 
2251   virtual void work(uint worker_id) {
2252     ResourceMark rm;
2253     HandleMark hm;
2254     CMTask* task = _cm->task(worker_id);
2255     G1CMIsAliveClosure g1_is_alive(_g1h);
2256     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2257     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2258 
2259     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2260   }
2261 };
2262 
2263 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2264   assert(_workers != NULL, "Need parallel worker threads.");
2265   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2266 
2267   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2268 
2269   // We need to reset the concurrency level before each
2270   // proxy task execution, so that the termination protocol
2271   // and overflow handling in CMTask::do_marking_step() knows
2272   // how many workers to wait for.
2273   _cm->set_concurrency(_active_workers);
2274   _workers->run_task(&proc_task_proxy);
2275 }
2276 
2277 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2278   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2279   EnqueueTask& _enq_task;
2280 
2281 public:
2282   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2283     AbstractGangTask("Enqueue reference objects in parallel"),
2284     _enq_task(enq_task) { }
2285 
2286   virtual void work(uint worker_id) {
2287     _enq_task.work(worker_id);
2288   }
2289 };
2290 
2291 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2292   assert(_workers != NULL, "Need parallel worker threads.");
2293   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2294 
2295   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2296 
2297   // Not strictly necessary but...
2298   //
2299   // We need to reset the concurrency level before each
2300   // proxy task execution, so that the termination protocol
2301   // and overflow handling in CMTask::do_marking_step() knows
2302   // how many workers to wait for.
2303   _cm->set_concurrency(_active_workers);
2304   _workers->run_task(&enq_task_proxy);
2305 }
2306 
2307 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2308   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2309 }
2310 
2311 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2312   if (has_overflown()) {
2313     // Skip processing the discovered references if we have
2314     // overflown the global marking stack. Reference objects
2315     // only get discovered once so it is OK to not
2316     // de-populate the discovered reference lists. We could have,
2317     // but the only benefit would be that, when marking restarts,
2318     // less reference objects are discovered.
2319     return;
2320   }
2321 
2322   ResourceMark rm;
2323   HandleMark   hm;
2324 
2325   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2326 
2327   // Is alive closure.
2328   G1CMIsAliveClosure g1_is_alive(g1h);
2329 
2330   // Inner scope to exclude the cleaning of the string and symbol
2331   // tables from the displayed time.
2332   {
2333     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2334 
2335     ReferenceProcessor* rp = g1h->ref_processor_cm();
2336 
2337     // See the comment in G1CollectedHeap::ref_processing_init()
2338     // about how reference processing currently works in G1.
2339 
2340     // Set the soft reference policy
2341     rp->setup_policy(clear_all_soft_refs);
2342     assert(_markStack.isEmpty(), "mark stack should be empty");
2343 
2344     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2345     // in serial reference processing. Note these closures are also
2346     // used for serially processing (by the the current thread) the
2347     // JNI references during parallel reference processing.
2348     //
2349     // These closures do not need to synchronize with the worker
2350     // threads involved in parallel reference processing as these
2351     // instances are executed serially by the current thread (e.g.
2352     // reference processing is not multi-threaded and is thus
2353     // performed by the current thread instead of a gang worker).
2354     //
2355     // The gang tasks involved in parallel reference processing create
2356     // their own instances of these closures, which do their own
2357     // synchronization among themselves.
2358     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2359     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2360 
2361     // We need at least one active thread. If reference processing
2362     // is not multi-threaded we use the current (VMThread) thread,
2363     // otherwise we use the work gang from the G1CollectedHeap and
2364     // we utilize all the worker threads we can.
2365     bool processing_is_mt = rp->processing_is_mt();
2366     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2367     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2368 
2369     // Parallel processing task executor.
2370     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2371                                               g1h->workers(), active_workers);
2372     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2373 
2374     // Set the concurrency level. The phase was already set prior to
2375     // executing the remark task.
2376     set_concurrency(active_workers);
2377 
2378     // Set the degree of MT processing here.  If the discovery was done MT,
2379     // the number of threads involved during discovery could differ from
2380     // the number of active workers.  This is OK as long as the discovered
2381     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2382     rp->set_active_mt_degree(active_workers);
2383 
2384     // Process the weak references.
2385     const ReferenceProcessorStats& stats =
2386         rp->process_discovered_references(&g1_is_alive,
2387                                           &g1_keep_alive,
2388                                           &g1_drain_mark_stack,
2389                                           executor,
2390                                           g1h->gc_timer_cm(),
2391                                           concurrent_gc_id());
2392     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2393 
2394     // The do_oop work routines of the keep_alive and drain_marking_stack
2395     // oop closures will set the has_overflown flag if we overflow the
2396     // global marking stack.
2397 
2398     assert(_markStack.overflow() || _markStack.isEmpty(),
2399             "mark stack should be empty (unless it overflowed)");
2400 
2401     if (_markStack.overflow()) {
2402       // This should have been done already when we tried to push an
2403       // entry on to the global mark stack. But let's do it again.
2404       set_has_overflown();
2405     }
2406 
2407     assert(rp->num_q() == active_workers, "why not");
2408 
2409     rp->enqueue_discovered_references(executor);
2410 
2411     rp->verify_no_references_recorded();
2412     assert(!rp->discovery_enabled(), "Post condition");
2413   }
2414 
2415   if (has_overflown()) {
2416     // We can not trust g1_is_alive if the marking stack overflowed
2417     return;
2418   }
2419 
2420   assert(_markStack.isEmpty(), "Marking should have completed");
2421 
2422   // Unload Klasses, String, Symbols, Code Cache, etc.
2423   {
2424     G1CMTraceTime trace("Unloading", G1Log::finer());
2425 
2426     if (ClassUnloadingWithConcurrentMark) {
2427       bool purged_classes;
2428 
2429       {
2430         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2431         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2432       }
2433 
2434       {
2435         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2436         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2437       }
2438     }
2439 
2440     if (G1StringDedup::is_enabled()) {
2441       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2442       G1StringDedup::unlink(&g1_is_alive);
2443     }
2444   }
2445 }
2446 
2447 void ConcurrentMark::swapMarkBitMaps() {
2448   CMBitMapRO* temp = _prevMarkBitMap;
2449   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2450   _nextMarkBitMap  = (CMBitMap*)  temp;
2451 }
2452 
2453 // Closure for marking entries in SATB buffers.
2454 class CMSATBBufferClosure : public SATBBufferClosure {
2455 private:
2456   CMTask* _task;
2457   G1CollectedHeap* _g1h;
2458 
2459   // This is very similar to CMTask::deal_with_reference, but with
2460   // more relaxed requirements for the argument, so this must be more
2461   // circumspect about treating the argument as an object.
2462   void do_entry(void* entry) const {
2463     _task->increment_refs_reached();
2464     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2465     if (entry < hr->next_top_at_mark_start()) {
2466       // Until we get here, we don't know whether entry refers to a valid
2467       // object; it could instead have been a stale reference.
2468       oop obj = static_cast<oop>(entry);
2469       assert(obj->is_oop(true /* ignore mark word */),
2470              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2471       _task->make_reference_grey(obj, hr);
2472     }
2473   }
2474 
2475 public:
2476   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2477     : _task(task), _g1h(g1h) { }
2478 
2479   virtual void do_buffer(void** buffer, size_t size) {
2480     for (size_t i = 0; i < size; ++i) {
2481       do_entry(buffer[i]);
2482     }
2483   }
2484 };
2485 
2486 class G1RemarkThreadsClosure : public ThreadClosure {
2487   CMSATBBufferClosure _cm_satb_cl;
2488   G1CMOopClosure _cm_cl;
2489   MarkingCodeBlobClosure _code_cl;
2490   int _thread_parity;
2491 
2492  public:
2493   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2494     _cm_satb_cl(task, g1h),
2495     _cm_cl(g1h, g1h->concurrent_mark(), task),
2496     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2497     _thread_parity(Threads::thread_claim_parity()) {}
2498 
2499   void do_thread(Thread* thread) {
2500     if (thread->is_Java_thread()) {
2501       if (thread->claim_oops_do(true, _thread_parity)) {
2502         JavaThread* jt = (JavaThread*)thread;
2503 
2504         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2505         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2506         // * Alive if on the stack of an executing method
2507         // * Weakly reachable otherwise
2508         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2509         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2510         jt->nmethods_do(&_code_cl);
2511 
2512         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2513       }
2514     } else if (thread->is_VM_thread()) {
2515       if (thread->claim_oops_do(true, _thread_parity)) {
2516         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2517       }
2518     }
2519   }
2520 };
2521 
2522 class CMRemarkTask: public AbstractGangTask {
2523 private:
2524   ConcurrentMark* _cm;
2525 public:
2526   void work(uint worker_id) {
2527     // Since all available tasks are actually started, we should
2528     // only proceed if we're supposed to be active.
2529     if (worker_id < _cm->active_tasks()) {
2530       CMTask* task = _cm->task(worker_id);
2531       task->record_start_time();
2532       {
2533         ResourceMark rm;
2534         HandleMark hm;
2535 
2536         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2537         Threads::threads_do(&threads_f);
2538       }
2539 
2540       do {
2541         task->do_marking_step(1000000000.0 /* something very large */,
2542                               true         /* do_termination       */,
2543                               false        /* is_serial            */);
2544       } while (task->has_aborted() && !_cm->has_overflown());
2545       // If we overflow, then we do not want to restart. We instead
2546       // want to abort remark and do concurrent marking again.
2547       task->record_end_time();
2548     }
2549   }
2550 
2551   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2552     AbstractGangTask("Par Remark"), _cm(cm) {
2553     _cm->terminator()->reset_for_reuse(active_workers);
2554   }
2555 };
2556 
2557 void ConcurrentMark::checkpointRootsFinalWork() {
2558   ResourceMark rm;
2559   HandleMark   hm;
2560   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2561 
2562   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2563 
2564   g1h->ensure_parsability(false);
2565 
2566   // this is remark, so we'll use up all active threads
2567   uint active_workers = g1h->workers()->active_workers();
2568   set_concurrency_and_phase(active_workers, false /* concurrent */);
2569   // Leave _parallel_marking_threads at it's
2570   // value originally calculated in the ConcurrentMark
2571   // constructor and pass values of the active workers
2572   // through the gang in the task.
2573 
2574   {
2575     StrongRootsScope srs(active_workers);
2576 
2577     CMRemarkTask remarkTask(this, active_workers);
2578     // We will start all available threads, even if we decide that the
2579     // active_workers will be fewer. The extra ones will just bail out
2580     // immediately.
2581     g1h->workers()->run_task(&remarkTask);
2582   }
2583 
2584   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2585   guarantee(has_overflown() ||
2586             satb_mq_set.completed_buffers_num() == 0,
2587             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2588                     BOOL_TO_STR(has_overflown()),
2589                     satb_mq_set.completed_buffers_num()));
2590 
2591   print_stats();
2592 }
2593 
2594 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2595   // Note we are overriding the read-only view of the prev map here, via
2596   // the cast.
2597   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2598 }
2599 
2600 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2601   _nextMarkBitMap->clearRange(mr);
2602 }
2603 
2604 HeapRegion*
2605 ConcurrentMark::claim_region(uint worker_id) {
2606   // "checkpoint" the finger
2607   HeapWord* finger = _finger;
2608 
2609   // _heap_end will not change underneath our feet; it only changes at
2610   // yield points.
2611   while (finger < _heap_end) {
2612     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2613 
2614     // Note on how this code handles humongous regions. In the
2615     // normal case the finger will reach the start of a "starts
2616     // humongous" (SH) region. Its end will either be the end of the
2617     // last "continues humongous" (CH) region in the sequence, or the
2618     // standard end of the SH region (if the SH is the only region in
2619     // the sequence). That way claim_region() will skip over the CH
2620     // regions. However, there is a subtle race between a CM thread
2621     // executing this method and a mutator thread doing a humongous
2622     // object allocation. The two are not mutually exclusive as the CM
2623     // thread does not need to hold the Heap_lock when it gets
2624     // here. So there is a chance that claim_region() will come across
2625     // a free region that's in the progress of becoming a SH or a CH
2626     // region. In the former case, it will either
2627     //   a) Miss the update to the region's end, in which case it will
2628     //      visit every subsequent CH region, will find their bitmaps
2629     //      empty, and do nothing, or
2630     //   b) Will observe the update of the region's end (in which case
2631     //      it will skip the subsequent CH regions).
2632     // If it comes across a region that suddenly becomes CH, the
2633     // scenario will be similar to b). So, the race between
2634     // claim_region() and a humongous object allocation might force us
2635     // to do a bit of unnecessary work (due to some unnecessary bitmap
2636     // iterations) but it should not introduce and correctness issues.
2637     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2638 
2639     // Above heap_region_containing_raw may return NULL as we always scan claim
2640     // until the end of the heap. In this case, just jump to the next region.
2641     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2642 
2643     // Is the gap between reading the finger and doing the CAS too long?
2644     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2645     if (res == finger && curr_region != NULL) {
2646       // we succeeded
2647       HeapWord*   bottom        = curr_region->bottom();
2648       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2649 
2650       if (verbose_low()) {
2651         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
2652                                "["PTR_FORMAT", "PTR_FORMAT"), "
2653                                "limit = "PTR_FORMAT,
2654                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2655       }
2656 
2657       // notice that _finger == end cannot be guaranteed here since,
2658       // someone else might have moved the finger even further
2659       assert(_finger >= end, "the finger should have moved forward");
2660 
2661       if (verbose_low()) {
2662         gclog_or_tty->print_cr("[%u] we were successful with region = "
2663                                PTR_FORMAT, worker_id, p2i(curr_region));
2664       }
2665 
2666       if (limit > bottom) {
2667         if (verbose_low()) {
2668           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
2669                                  "returning it ", worker_id, p2i(curr_region));
2670         }
2671         return curr_region;
2672       } else {
2673         assert(limit == bottom,
2674                "the region limit should be at bottom");
2675         if (verbose_low()) {
2676           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
2677                                  "returning NULL", worker_id, p2i(curr_region));
2678         }
2679         // we return NULL and the caller should try calling
2680         // claim_region() again.
2681         return NULL;
2682       }
2683     } else {
2684       assert(_finger > finger, "the finger should have moved forward");
2685       if (verbose_low()) {
2686         if (curr_region == NULL) {
2687           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2688                                  "global finger = "PTR_FORMAT", "
2689                                  "our finger = "PTR_FORMAT,
2690                                  worker_id, p2i(_finger), p2i(finger));
2691         } else {
2692           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2693                                  "global finger = "PTR_FORMAT", "
2694                                  "our finger = "PTR_FORMAT,
2695                                  worker_id, p2i(_finger), p2i(finger));
2696         }
2697       }
2698 
2699       // read it again
2700       finger = _finger;
2701     }
2702   }
2703 
2704   return NULL;
2705 }
2706 
2707 #ifndef PRODUCT
2708 enum VerifyNoCSetOopsPhase {
2709   VerifyNoCSetOopsStack,
2710   VerifyNoCSetOopsQueues
2711 };
2712 
2713 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2714 private:
2715   G1CollectedHeap* _g1h;
2716   VerifyNoCSetOopsPhase _phase;
2717   int _info;
2718 
2719   const char* phase_str() {
2720     switch (_phase) {
2721     case VerifyNoCSetOopsStack:         return "Stack";
2722     case VerifyNoCSetOopsQueues:        return "Queue";
2723     default:                            ShouldNotReachHere();
2724     }
2725     return NULL;
2726   }
2727 
2728   void do_object_work(oop obj) {
2729     guarantee(!_g1h->obj_in_cs(obj),
2730               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2731                       p2i((void*) obj), phase_str(), _info));
2732   }
2733 
2734 public:
2735   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2736 
2737   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2738     _phase = phase;
2739     _info = info;
2740   }
2741 
2742   virtual void do_oop(oop* p) {
2743     oop obj = oopDesc::load_decode_heap_oop(p);
2744     do_object_work(obj);
2745   }
2746 
2747   virtual void do_oop(narrowOop* p) {
2748     // We should not come across narrow oops while scanning marking
2749     // stacks
2750     ShouldNotReachHere();
2751   }
2752 
2753   virtual void do_object(oop obj) {
2754     do_object_work(obj);
2755   }
2756 };
2757 
2758 void ConcurrentMark::verify_no_cset_oops() {
2759   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2760   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2761     return;
2762   }
2763 
2764   VerifyNoCSetOopsClosure cl;
2765 
2766   // Verify entries on the global mark stack
2767   cl.set_phase(VerifyNoCSetOopsStack);
2768   _markStack.oops_do(&cl);
2769 
2770   // Verify entries on the task queues
2771   for (uint i = 0; i < _max_worker_id; i += 1) {
2772     cl.set_phase(VerifyNoCSetOopsQueues, i);
2773     CMTaskQueue* queue = _task_queues->queue(i);
2774     queue->oops_do(&cl);
2775   }
2776 
2777   // Verify the global finger
2778   HeapWord* global_finger = finger();
2779   if (global_finger != NULL && global_finger < _heap_end) {
2780     // The global finger always points to a heap region boundary. We
2781     // use heap_region_containing_raw() to get the containing region
2782     // given that the global finger could be pointing to a free region
2783     // which subsequently becomes continues humongous. If that
2784     // happens, heap_region_containing() will return the bottom of the
2785     // corresponding starts humongous region and the check below will
2786     // not hold any more.
2787     // Since we always iterate over all regions, we might get a NULL HeapRegion
2788     // here.
2789     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2790     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2791               err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2792                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2793   }
2794 
2795   // Verify the task fingers
2796   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2797   for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2798     CMTask* task = _tasks[i];
2799     HeapWord* task_finger = task->finger();
2800     if (task_finger != NULL && task_finger < _heap_end) {
2801       // See above note on the global finger verification.
2802       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2803       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2804                 !task_hr->in_collection_set(),
2805                 err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2806                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2807     }
2808   }
2809 }
2810 #endif // PRODUCT
2811 
2812 // Aggregate the counting data that was constructed concurrently
2813 // with marking.
2814 class AggregateCountDataHRClosure: public HeapRegionClosure {
2815   G1CollectedHeap* _g1h;
2816   ConcurrentMark* _cm;
2817   CardTableModRefBS* _ct_bs;
2818   BitMap* _cm_card_bm;
2819   uint _max_worker_id;
2820 
2821  public:
2822   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2823                               BitMap* cm_card_bm,
2824                               uint max_worker_id) :
2825     _g1h(g1h), _cm(g1h->concurrent_mark()),
2826     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2827     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2828 
2829   bool doHeapRegion(HeapRegion* hr) {
2830     if (hr->is_continues_humongous()) {
2831       // We will ignore these here and process them when their
2832       // associated "starts humongous" region is processed.
2833       // Note that we cannot rely on their associated
2834       // "starts humongous" region to have their bit set to 1
2835       // since, due to the region chunking in the parallel region
2836       // iteration, a "continues humongous" region might be visited
2837       // before its associated "starts humongous".
2838       return false;
2839     }
2840 
2841     HeapWord* start = hr->bottom();
2842     HeapWord* limit = hr->next_top_at_mark_start();
2843     HeapWord* end = hr->end();
2844 
2845     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2846            err_msg("Preconditions not met - "
2847                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2848                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2849                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2850 
2851     assert(hr->next_marked_bytes() == 0, "Precondition");
2852 
2853     if (start == limit) {
2854       // NTAMS of this region has not been set so nothing to do.
2855       return false;
2856     }
2857 
2858     // 'start' should be in the heap.
2859     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2860     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2861     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2862 
2863     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2864     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2865     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2866 
2867     // If ntams is not card aligned then we bump card bitmap index
2868     // for limit so that we get the all the cards spanned by
2869     // the object ending at ntams.
2870     // Note: if this is the last region in the heap then ntams
2871     // could be actually just beyond the end of the the heap;
2872     // limit_idx will then  correspond to a (non-existent) card
2873     // that is also outside the heap.
2874     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2875       limit_idx += 1;
2876     }
2877 
2878     assert(limit_idx <= end_idx, "or else use atomics");
2879 
2880     // Aggregate the "stripe" in the count data associated with hr.
2881     uint hrm_index = hr->hrm_index();
2882     size_t marked_bytes = 0;
2883 
2884     for (uint i = 0; i < _max_worker_id; i += 1) {
2885       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2886       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2887 
2888       // Fetch the marked_bytes in this region for task i and
2889       // add it to the running total for this region.
2890       marked_bytes += marked_bytes_array[hrm_index];
2891 
2892       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2893       // into the global card bitmap.
2894       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2895 
2896       while (scan_idx < limit_idx) {
2897         assert(task_card_bm->at(scan_idx) == true, "should be");
2898         _cm_card_bm->set_bit(scan_idx);
2899         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2900 
2901         // BitMap::get_next_one_offset() can handle the case when
2902         // its left_offset parameter is greater than its right_offset
2903         // parameter. It does, however, have an early exit if
2904         // left_offset == right_offset. So let's limit the value
2905         // passed in for left offset here.
2906         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2907         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2908       }
2909     }
2910 
2911     // Update the marked bytes for this region.
2912     hr->add_to_marked_bytes(marked_bytes);
2913 
2914     // Next heap region
2915     return false;
2916   }
2917 };
2918 
2919 class G1AggregateCountDataTask: public AbstractGangTask {
2920 protected:
2921   G1CollectedHeap* _g1h;
2922   ConcurrentMark* _cm;
2923   BitMap* _cm_card_bm;
2924   uint _max_worker_id;
2925   uint _active_workers;
2926   HeapRegionClaimer _hrclaimer;
2927 
2928 public:
2929   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2930                            ConcurrentMark* cm,
2931                            BitMap* cm_card_bm,
2932                            uint max_worker_id,
2933                            uint n_workers) :
2934       AbstractGangTask("Count Aggregation"),
2935       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2936       _max_worker_id(max_worker_id),
2937       _active_workers(n_workers),
2938       _hrclaimer(_active_workers) {
2939   }
2940 
2941   void work(uint worker_id) {
2942     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2943 
2944     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2945   }
2946 };
2947 
2948 
2949 void ConcurrentMark::aggregate_count_data() {
2950   uint n_workers = _g1h->workers()->active_workers();
2951 
2952   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2953                                            _max_worker_id, n_workers);
2954 
2955   _g1h->workers()->run_task(&g1_par_agg_task);
2956 }
2957 
2958 // Clear the per-worker arrays used to store the per-region counting data
2959 void ConcurrentMark::clear_all_count_data() {
2960   // Clear the global card bitmap - it will be filled during
2961   // liveness count aggregation (during remark) and the
2962   // final counting task.
2963   _card_bm.clear();
2964 
2965   // Clear the global region bitmap - it will be filled as part
2966   // of the final counting task.
2967   _region_bm.clear();
2968 
2969   uint max_regions = _g1h->max_regions();
2970   assert(_max_worker_id > 0, "uninitialized");
2971 
2972   for (uint i = 0; i < _max_worker_id; i += 1) {
2973     BitMap* task_card_bm = count_card_bitmap_for(i);
2974     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2975 
2976     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2977     assert(marked_bytes_array != NULL, "uninitialized");
2978 
2979     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2980     task_card_bm->clear();
2981   }
2982 }
2983 
2984 void ConcurrentMark::print_stats() {
2985   if (verbose_stats()) {
2986     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2987     for (size_t i = 0; i < _active_tasks; ++i) {
2988       _tasks[i]->print_stats();
2989       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2990     }
2991   }
2992 }
2993 
2994 // abandon current marking iteration due to a Full GC
2995 void ConcurrentMark::abort() {
2996   if (!cmThread()->during_cycle() || _has_aborted) {
2997     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2998     return;
2999   }
3000 
3001   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3002   // concurrent bitmap clearing.
3003   _nextMarkBitMap->clearAll();
3004 
3005   // Note we cannot clear the previous marking bitmap here
3006   // since VerifyDuringGC verifies the objects marked during
3007   // a full GC against the previous bitmap.
3008 
3009   // Clear the liveness counting data
3010   clear_all_count_data();
3011   // Empty mark stack
3012   reset_marking_state();
3013   for (uint i = 0; i < _max_worker_id; ++i) {
3014     _tasks[i]->clear_region_fields();
3015   }
3016   _first_overflow_barrier_sync.abort();
3017   _second_overflow_barrier_sync.abort();
3018   _aborted_gc_id = _g1h->gc_tracer_cm()->gc_id();
3019   assert(!_aborted_gc_id.is_undefined(), "abort() executed more than once?");
3020   _has_aborted = true;
3021 
3022   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3023   satb_mq_set.abandon_partial_marking();
3024   // This can be called either during or outside marking, we'll read
3025   // the expected_active value from the SATB queue set.
3026   satb_mq_set.set_active_all_threads(
3027                                  false, /* new active value */
3028                                  satb_mq_set.is_active() /* expected_active */);
3029 
3030   _g1h->trace_heap_after_concurrent_cycle();
3031   _g1h->register_concurrent_cycle_end();
3032 }
3033 
3034 const GCId& ConcurrentMark::concurrent_gc_id() {
3035   if (has_aborted()) {
3036     return _aborted_gc_id;
3037   }
3038   return _g1h->gc_tracer_cm()->gc_id();
3039 }
3040 
3041 static void print_ms_time_info(const char* prefix, const char* name,
3042                                NumberSeq& ns) {
3043   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3044                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3045   if (ns.num() > 0) {
3046     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3047                            prefix, ns.sd(), ns.maximum());
3048   }
3049 }
3050 
3051 void ConcurrentMark::print_summary_info() {
3052   gclog_or_tty->print_cr(" Concurrent marking:");
3053   print_ms_time_info("  ", "init marks", _init_times);
3054   print_ms_time_info("  ", "remarks", _remark_times);
3055   {
3056     print_ms_time_info("     ", "final marks", _remark_mark_times);
3057     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3058 
3059   }
3060   print_ms_time_info("  ", "cleanups", _cleanup_times);
3061   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3062                          _total_counting_time,
3063                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3064                           (double)_cleanup_times.num()
3065                          : 0.0));
3066   if (G1ScrubRemSets) {
3067     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3068                            _total_rs_scrub_time,
3069                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3070                             (double)_cleanup_times.num()
3071                            : 0.0));
3072   }
3073   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3074                          (_init_times.sum() + _remark_times.sum() +
3075                           _cleanup_times.sum())/1000.0);
3076   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3077                 "(%8.2f s marking).",
3078                 cmThread()->vtime_accum(),
3079                 cmThread()->vtime_mark_accum());
3080 }
3081 
3082 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3083   _parallel_workers->print_worker_threads_on(st);
3084 }
3085 
3086 void ConcurrentMark::print_on_error(outputStream* st) const {
3087   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3088       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3089   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3090   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3091 }
3092 
3093 // We take a break if someone is trying to stop the world.
3094 bool ConcurrentMark::do_yield_check(uint worker_id) {
3095   if (SuspendibleThreadSet::should_yield()) {
3096     if (worker_id == 0) {
3097       _g1h->g1_policy()->record_concurrent_pause();
3098     }
3099     SuspendibleThreadSet::yield();
3100     return true;
3101   } else {
3102     return false;
3103   }
3104 }
3105 
3106 #ifndef PRODUCT
3107 // for debugging purposes
3108 void ConcurrentMark::print_finger() {
3109   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3110                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3111   for (uint i = 0; i < _max_worker_id; ++i) {
3112     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3113   }
3114   gclog_or_tty->cr();
3115 }
3116 #endif
3117 
3118 template<bool scan>
3119 inline void CMTask::process_grey_object(oop obj) {
3120   assert(scan || obj->is_typeArray(), "Skipping scan of grey non-typeArray");
3121   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3122 
3123   if (_cm->verbose_high()) {
3124     gclog_or_tty->print_cr("[%u] processing grey object " PTR_FORMAT,
3125                            _worker_id, p2i((void*) obj));
3126   }
3127 
3128   size_t obj_size = obj->size();
3129   _words_scanned += obj_size;
3130 
3131   if (scan) {
3132     obj->oop_iterate(_cm_oop_closure);
3133   }
3134   statsOnly( ++_objs_scanned );
3135   check_limits();
3136 }
3137 
3138 template void CMTask::process_grey_object<true>(oop);
3139 template void CMTask::process_grey_object<false>(oop);
3140 
3141 // Closure for iteration over bitmaps
3142 class CMBitMapClosure : public BitMapClosure {
3143 private:
3144   // the bitmap that is being iterated over
3145   CMBitMap*                   _nextMarkBitMap;
3146   ConcurrentMark*             _cm;
3147   CMTask*                     _task;
3148 
3149 public:
3150   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3151     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3152 
3153   bool do_bit(size_t offset) {
3154     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3155     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3156     assert( addr < _cm->finger(), "invariant");
3157 
3158     statsOnly( _task->increase_objs_found_on_bitmap() );
3159     assert(addr >= _task->finger(), "invariant");
3160 
3161     // We move that task's local finger along.
3162     _task->move_finger_to(addr);
3163 
3164     _task->scan_object(oop(addr));
3165     // we only partially drain the local queue and global stack
3166     _task->drain_local_queue(true);
3167     _task->drain_global_stack(true);
3168 
3169     // if the has_aborted flag has been raised, we need to bail out of
3170     // the iteration
3171     return !_task->has_aborted();
3172   }
3173 };
3174 
3175 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3176                                ConcurrentMark* cm,
3177                                CMTask* task)
3178   : _g1h(g1h), _cm(cm), _task(task) {
3179   assert(_ref_processor == NULL, "should be initialized to NULL");
3180 
3181   if (G1UseConcMarkReferenceProcessing) {
3182     _ref_processor = g1h->ref_processor_cm();
3183     assert(_ref_processor != NULL, "should not be NULL");
3184   }
3185 }
3186 
3187 void CMTask::setup_for_region(HeapRegion* hr) {
3188   assert(hr != NULL,
3189         "claim_region() should have filtered out NULL regions");
3190   assert(!hr->is_continues_humongous(),
3191         "claim_region() should have filtered out continues humongous regions");
3192 
3193   if (_cm->verbose_low()) {
3194     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3195                            _worker_id, p2i(hr));
3196   }
3197 
3198   _curr_region  = hr;
3199   _finger       = hr->bottom();
3200   update_region_limit();
3201 }
3202 
3203 void CMTask::update_region_limit() {
3204   HeapRegion* hr            = _curr_region;
3205   HeapWord* bottom          = hr->bottom();
3206   HeapWord* limit           = hr->next_top_at_mark_start();
3207 
3208   if (limit == bottom) {
3209     if (_cm->verbose_low()) {
3210       gclog_or_tty->print_cr("[%u] found an empty region "
3211                              "["PTR_FORMAT", "PTR_FORMAT")",
3212                              _worker_id, p2i(bottom), p2i(limit));
3213     }
3214     // The region was collected underneath our feet.
3215     // We set the finger to bottom to ensure that the bitmap
3216     // iteration that will follow this will not do anything.
3217     // (this is not a condition that holds when we set the region up,
3218     // as the region is not supposed to be empty in the first place)
3219     _finger = bottom;
3220   } else if (limit >= _region_limit) {
3221     assert(limit >= _finger, "peace of mind");
3222   } else {
3223     assert(limit < _region_limit, "only way to get here");
3224     // This can happen under some pretty unusual circumstances.  An
3225     // evacuation pause empties the region underneath our feet (NTAMS
3226     // at bottom). We then do some allocation in the region (NTAMS
3227     // stays at bottom), followed by the region being used as a GC
3228     // alloc region (NTAMS will move to top() and the objects
3229     // originally below it will be grayed). All objects now marked in
3230     // the region are explicitly grayed, if below the global finger,
3231     // and we do not need in fact to scan anything else. So, we simply
3232     // set _finger to be limit to ensure that the bitmap iteration
3233     // doesn't do anything.
3234     _finger = limit;
3235   }
3236 
3237   _region_limit = limit;
3238 }
3239 
3240 void CMTask::giveup_current_region() {
3241   assert(_curr_region != NULL, "invariant");
3242   if (_cm->verbose_low()) {
3243     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3244                            _worker_id, p2i(_curr_region));
3245   }
3246   clear_region_fields();
3247 }
3248 
3249 void CMTask::clear_region_fields() {
3250   // Values for these three fields that indicate that we're not
3251   // holding on to a region.
3252   _curr_region   = NULL;
3253   _finger        = NULL;
3254   _region_limit  = NULL;
3255 }
3256 
3257 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3258   if (cm_oop_closure == NULL) {
3259     assert(_cm_oop_closure != NULL, "invariant");
3260   } else {
3261     assert(_cm_oop_closure == NULL, "invariant");
3262   }
3263   _cm_oop_closure = cm_oop_closure;
3264 }
3265 
3266 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3267   guarantee(nextMarkBitMap != NULL, "invariant");
3268 
3269   if (_cm->verbose_low()) {
3270     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3271   }
3272 
3273   _nextMarkBitMap                = nextMarkBitMap;
3274   clear_region_fields();
3275 
3276   _calls                         = 0;
3277   _elapsed_time_ms               = 0.0;
3278   _termination_time_ms           = 0.0;
3279   _termination_start_time_ms     = 0.0;
3280 
3281 #if _MARKING_STATS_
3282   _aborted                       = 0;
3283   _aborted_overflow              = 0;
3284   _aborted_cm_aborted            = 0;
3285   _aborted_yield                 = 0;
3286   _aborted_timed_out             = 0;
3287   _aborted_satb                  = 0;
3288   _aborted_termination           = 0;
3289   _steal_attempts                = 0;
3290   _steals                        = 0;
3291   _local_pushes                  = 0;
3292   _local_pops                    = 0;
3293   _local_max_size                = 0;
3294   _objs_scanned                  = 0;
3295   _global_pushes                 = 0;
3296   _global_pops                   = 0;
3297   _global_max_size               = 0;
3298   _global_transfers_to           = 0;
3299   _global_transfers_from         = 0;
3300   _regions_claimed               = 0;
3301   _objs_found_on_bitmap          = 0;
3302   _satb_buffers_processed        = 0;
3303 #endif // _MARKING_STATS_
3304 }
3305 
3306 bool CMTask::should_exit_termination() {
3307   regular_clock_call();
3308   // This is called when we are in the termination protocol. We should
3309   // quit if, for some reason, this task wants to abort or the global
3310   // stack is not empty (this means that we can get work from it).
3311   return !_cm->mark_stack_empty() || has_aborted();
3312 }
3313 
3314 void CMTask::reached_limit() {
3315   assert(_words_scanned >= _words_scanned_limit ||
3316          _refs_reached >= _refs_reached_limit ,
3317          "shouldn't have been called otherwise");
3318   regular_clock_call();
3319 }
3320 
3321 void CMTask::regular_clock_call() {
3322   if (has_aborted()) return;
3323 
3324   // First, we need to recalculate the words scanned and refs reached
3325   // limits for the next clock call.
3326   recalculate_limits();
3327 
3328   // During the regular clock call we do the following
3329 
3330   // (1) If an overflow has been flagged, then we abort.
3331   if (_cm->has_overflown()) {
3332     set_has_aborted();
3333     return;
3334   }
3335 
3336   // If we are not concurrent (i.e. we're doing remark) we don't need
3337   // to check anything else. The other steps are only needed during
3338   // the concurrent marking phase.
3339   if (!concurrent()) return;
3340 
3341   // (2) If marking has been aborted for Full GC, then we also abort.
3342   if (_cm->has_aborted()) {
3343     set_has_aborted();
3344     statsOnly( ++_aborted_cm_aborted );
3345     return;
3346   }
3347 
3348   double curr_time_ms = os::elapsedVTime() * 1000.0;
3349 
3350   // (3) If marking stats are enabled, then we update the step history.
3351 #if _MARKING_STATS_
3352   if (_words_scanned >= _words_scanned_limit) {
3353     ++_clock_due_to_scanning;
3354   }
3355   if (_refs_reached >= _refs_reached_limit) {
3356     ++_clock_due_to_marking;
3357   }
3358 
3359   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3360   _interval_start_time_ms = curr_time_ms;
3361   _all_clock_intervals_ms.add(last_interval_ms);
3362 
3363   if (_cm->verbose_medium()) {
3364       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3365                         "scanned = "SIZE_FORMAT"%s, refs reached = "SIZE_FORMAT"%s",
3366                         _worker_id, last_interval_ms,
3367                         _words_scanned,
3368                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3369                         _refs_reached,
3370                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3371   }
3372 #endif // _MARKING_STATS_
3373 
3374   // (4) We check whether we should yield. If we have to, then we abort.
3375   if (SuspendibleThreadSet::should_yield()) {
3376     // We should yield. To do this we abort the task. The caller is
3377     // responsible for yielding.
3378     set_has_aborted();
3379     statsOnly( ++_aborted_yield );
3380     return;
3381   }
3382 
3383   // (5) We check whether we've reached our time quota. If we have,
3384   // then we abort.
3385   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3386   if (elapsed_time_ms > _time_target_ms) {
3387     set_has_aborted();
3388     _has_timed_out = true;
3389     statsOnly( ++_aborted_timed_out );
3390     return;
3391   }
3392 
3393   // (6) Finally, we check whether there are enough completed STAB
3394   // buffers available for processing. If there are, we abort.
3395   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3396   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3397     if (_cm->verbose_low()) {
3398       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3399                              _worker_id);
3400     }
3401     // we do need to process SATB buffers, we'll abort and restart
3402     // the marking task to do so
3403     set_has_aborted();
3404     statsOnly( ++_aborted_satb );
3405     return;
3406   }
3407 }
3408 
3409 void CMTask::recalculate_limits() {
3410   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3411   _words_scanned_limit      = _real_words_scanned_limit;
3412 
3413   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3414   _refs_reached_limit       = _real_refs_reached_limit;
3415 }
3416 
3417 void CMTask::decrease_limits() {
3418   // This is called when we believe that we're going to do an infrequent
3419   // operation which will increase the per byte scanned cost (i.e. move
3420   // entries to/from the global stack). It basically tries to decrease the
3421   // scanning limit so that the clock is called earlier.
3422 
3423   if (_cm->verbose_medium()) {
3424     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3425   }
3426 
3427   _words_scanned_limit = _real_words_scanned_limit -
3428     3 * words_scanned_period / 4;
3429   _refs_reached_limit  = _real_refs_reached_limit -
3430     3 * refs_reached_period / 4;
3431 }
3432 
3433 void CMTask::move_entries_to_global_stack() {
3434   // local array where we'll store the entries that will be popped
3435   // from the local queue
3436   oop buffer[global_stack_transfer_size];
3437 
3438   int n = 0;
3439   oop obj;
3440   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3441     buffer[n] = obj;
3442     ++n;
3443   }
3444 
3445   if (n > 0) {
3446     // we popped at least one entry from the local queue
3447 
3448     statsOnly( ++_global_transfers_to; _local_pops += n );
3449 
3450     if (!_cm->mark_stack_push(buffer, n)) {
3451       if (_cm->verbose_low()) {
3452         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3453                                _worker_id);
3454       }
3455       set_has_aborted();
3456     } else {
3457       // the transfer was successful
3458 
3459       if (_cm->verbose_medium()) {
3460         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3461                                _worker_id, n);
3462       }
3463       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3464                  if (tmp_size > _global_max_size) {
3465                    _global_max_size = tmp_size;
3466                  }
3467                  _global_pushes += n );
3468     }
3469   }
3470 
3471   // this operation was quite expensive, so decrease the limits
3472   decrease_limits();
3473 }
3474 
3475 void CMTask::get_entries_from_global_stack() {
3476   // local array where we'll store the entries that will be popped
3477   // from the global stack.
3478   oop buffer[global_stack_transfer_size];
3479   int n;
3480   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3481   assert(n <= global_stack_transfer_size,
3482          "we should not pop more than the given limit");
3483   if (n > 0) {
3484     // yes, we did actually pop at least one entry
3485 
3486     statsOnly( ++_global_transfers_from; _global_pops += n );
3487     if (_cm->verbose_medium()) {
3488       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3489                              _worker_id, n);
3490     }
3491     for (int i = 0; i < n; ++i) {
3492       bool success = _task_queue->push(buffer[i]);
3493       // We only call this when the local queue is empty or under a
3494       // given target limit. So, we do not expect this push to fail.
3495       assert(success, "invariant");
3496     }
3497 
3498     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3499                if (tmp_size > _local_max_size) {
3500                  _local_max_size = tmp_size;
3501                }
3502                _local_pushes += n );
3503   }
3504 
3505   // this operation was quite expensive, so decrease the limits
3506   decrease_limits();
3507 }
3508 
3509 void CMTask::drain_local_queue(bool partially) {
3510   if (has_aborted()) return;
3511 
3512   // Decide what the target size is, depending whether we're going to
3513   // drain it partially (so that other tasks can steal if they run out
3514   // of things to do) or totally (at the very end).
3515   size_t target_size;
3516   if (partially) {
3517     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3518   } else {
3519     target_size = 0;
3520   }
3521 
3522   if (_task_queue->size() > target_size) {
3523     if (_cm->verbose_high()) {
3524       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3525                              _worker_id, target_size);
3526     }
3527 
3528     oop obj;
3529     bool ret = _task_queue->pop_local(obj);
3530     while (ret) {
3531       statsOnly( ++_local_pops );
3532 
3533       if (_cm->verbose_high()) {
3534         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3535                                p2i((void*) obj));
3536       }
3537 
3538       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3539       assert(!_g1h->is_on_master_free_list(
3540                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3541 
3542       scan_object(obj);
3543 
3544       if (_task_queue->size() <= target_size || has_aborted()) {
3545         ret = false;
3546       } else {
3547         ret = _task_queue->pop_local(obj);
3548       }
3549     }
3550 
3551     if (_cm->verbose_high()) {
3552       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3553                              _worker_id, _task_queue->size());
3554     }
3555   }
3556 }
3557 
3558 void CMTask::drain_global_stack(bool partially) {
3559   if (has_aborted()) return;
3560 
3561   // We have a policy to drain the local queue before we attempt to
3562   // drain the global stack.
3563   assert(partially || _task_queue->size() == 0, "invariant");
3564 
3565   // Decide what the target size is, depending whether we're going to
3566   // drain it partially (so that other tasks can steal if they run out
3567   // of things to do) or totally (at the very end).  Notice that,
3568   // because we move entries from the global stack in chunks or
3569   // because another task might be doing the same, we might in fact
3570   // drop below the target. But, this is not a problem.
3571   size_t target_size;
3572   if (partially) {
3573     target_size = _cm->partial_mark_stack_size_target();
3574   } else {
3575     target_size = 0;
3576   }
3577 
3578   if (_cm->mark_stack_size() > target_size) {
3579     if (_cm->verbose_low()) {
3580       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3581                              _worker_id, target_size);
3582     }
3583 
3584     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3585       get_entries_from_global_stack();
3586       drain_local_queue(partially);
3587     }
3588 
3589     if (_cm->verbose_low()) {
3590       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3591                              _worker_id, _cm->mark_stack_size());
3592     }
3593   }
3594 }
3595 
3596 // SATB Queue has several assumptions on whether to call the par or
3597 // non-par versions of the methods. this is why some of the code is
3598 // replicated. We should really get rid of the single-threaded version
3599 // of the code to simplify things.
3600 void CMTask::drain_satb_buffers() {
3601   if (has_aborted()) return;
3602 
3603   // We set this so that the regular clock knows that we're in the
3604   // middle of draining buffers and doesn't set the abort flag when it
3605   // notices that SATB buffers are available for draining. It'd be
3606   // very counter productive if it did that. :-)
3607   _draining_satb_buffers = true;
3608 
3609   CMSATBBufferClosure satb_cl(this, _g1h);
3610   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3611 
3612   // This keeps claiming and applying the closure to completed buffers
3613   // until we run out of buffers or we need to abort.
3614   while (!has_aborted() &&
3615          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3616     if (_cm->verbose_medium()) {
3617       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3618     }
3619     statsOnly( ++_satb_buffers_processed );
3620     regular_clock_call();
3621   }
3622 
3623   _draining_satb_buffers = false;
3624 
3625   assert(has_aborted() ||
3626          concurrent() ||
3627          satb_mq_set.completed_buffers_num() == 0, "invariant");
3628 
3629   // again, this was a potentially expensive operation, decrease the
3630   // limits to get the regular clock call early
3631   decrease_limits();
3632 }
3633 
3634 void CMTask::print_stats() {
3635   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3636                          _worker_id, _calls);
3637   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3638                          _elapsed_time_ms, _termination_time_ms);
3639   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3640                          _step_times_ms.num(), _step_times_ms.avg(),
3641                          _step_times_ms.sd());
3642   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3643                          _step_times_ms.maximum(), _step_times_ms.sum());
3644 
3645 #if _MARKING_STATS_
3646   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3647                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3648                          _all_clock_intervals_ms.sd());
3649   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3650                          _all_clock_intervals_ms.maximum(),
3651                          _all_clock_intervals_ms.sum());
3652   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3653                          _clock_due_to_scanning, _clock_due_to_marking);
3654   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3655                          _objs_scanned, _objs_found_on_bitmap);
3656   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3657                          _local_pushes, _local_pops, _local_max_size);
3658   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3659                          _global_pushes, _global_pops, _global_max_size);
3660   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3661                          _global_transfers_to,_global_transfers_from);
3662   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3663   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3664   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3665                          _steal_attempts, _steals);
3666   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3667   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3668                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3669   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3670                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3671 #endif // _MARKING_STATS_
3672 }
3673 
3674 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3675   return _task_queues->steal(worker_id, hash_seed, obj);
3676 }
3677 
3678 /*****************************************************************************
3679 
3680     The do_marking_step(time_target_ms, ...) method is the building
3681     block of the parallel marking framework. It can be called in parallel
3682     with other invocations of do_marking_step() on different tasks
3683     (but only one per task, obviously) and concurrently with the
3684     mutator threads, or during remark, hence it eliminates the need
3685     for two versions of the code. When called during remark, it will
3686     pick up from where the task left off during the concurrent marking
3687     phase. Interestingly, tasks are also claimable during evacuation
3688     pauses too, since do_marking_step() ensures that it aborts before
3689     it needs to yield.
3690 
3691     The data structures that it uses to do marking work are the
3692     following:
3693 
3694       (1) Marking Bitmap. If there are gray objects that appear only
3695       on the bitmap (this happens either when dealing with an overflow
3696       or when the initial marking phase has simply marked the roots
3697       and didn't push them on the stack), then tasks claim heap
3698       regions whose bitmap they then scan to find gray objects. A
3699       global finger indicates where the end of the last claimed region
3700       is. A local finger indicates how far into the region a task has
3701       scanned. The two fingers are used to determine how to gray an
3702       object (i.e. whether simply marking it is OK, as it will be
3703       visited by a task in the future, or whether it needs to be also
3704       pushed on a stack).
3705 
3706       (2) Local Queue. The local queue of the task which is accessed
3707       reasonably efficiently by the task. Other tasks can steal from
3708       it when they run out of work. Throughout the marking phase, a
3709       task attempts to keep its local queue short but not totally
3710       empty, so that entries are available for stealing by other
3711       tasks. Only when there is no more work, a task will totally
3712       drain its local queue.
3713 
3714       (3) Global Mark Stack. This handles local queue overflow. During
3715       marking only sets of entries are moved between it and the local
3716       queues, as access to it requires a mutex and more fine-grain
3717       interaction with it which might cause contention. If it
3718       overflows, then the marking phase should restart and iterate
3719       over the bitmap to identify gray objects. Throughout the marking
3720       phase, tasks attempt to keep the global mark stack at a small
3721       length but not totally empty, so that entries are available for
3722       popping by other tasks. Only when there is no more work, tasks
3723       will totally drain the global mark stack.
3724 
3725       (4) SATB Buffer Queue. This is where completed SATB buffers are
3726       made available. Buffers are regularly removed from this queue
3727       and scanned for roots, so that the queue doesn't get too
3728       long. During remark, all completed buffers are processed, as
3729       well as the filled in parts of any uncompleted buffers.
3730 
3731     The do_marking_step() method tries to abort when the time target
3732     has been reached. There are a few other cases when the
3733     do_marking_step() method also aborts:
3734 
3735       (1) When the marking phase has been aborted (after a Full GC).
3736 
3737       (2) When a global overflow (on the global stack) has been
3738       triggered. Before the task aborts, it will actually sync up with
3739       the other tasks to ensure that all the marking data structures
3740       (local queues, stacks, fingers etc.)  are re-initialized so that
3741       when do_marking_step() completes, the marking phase can
3742       immediately restart.
3743 
3744       (3) When enough completed SATB buffers are available. The
3745       do_marking_step() method only tries to drain SATB buffers right
3746       at the beginning. So, if enough buffers are available, the
3747       marking step aborts and the SATB buffers are processed at
3748       the beginning of the next invocation.
3749 
3750       (4) To yield. when we have to yield then we abort and yield
3751       right at the end of do_marking_step(). This saves us from a lot
3752       of hassle as, by yielding we might allow a Full GC. If this
3753       happens then objects will be compacted underneath our feet, the
3754       heap might shrink, etc. We save checking for this by just
3755       aborting and doing the yield right at the end.
3756 
3757     From the above it follows that the do_marking_step() method should
3758     be called in a loop (or, otherwise, regularly) until it completes.
3759 
3760     If a marking step completes without its has_aborted() flag being
3761     true, it means it has completed the current marking phase (and
3762     also all other marking tasks have done so and have all synced up).
3763 
3764     A method called regular_clock_call() is invoked "regularly" (in
3765     sub ms intervals) throughout marking. It is this clock method that
3766     checks all the abort conditions which were mentioned above and
3767     decides when the task should abort. A work-based scheme is used to
3768     trigger this clock method: when the number of object words the
3769     marking phase has scanned or the number of references the marking
3770     phase has visited reach a given limit. Additional invocations to
3771     the method clock have been planted in a few other strategic places
3772     too. The initial reason for the clock method was to avoid calling
3773     vtime too regularly, as it is quite expensive. So, once it was in
3774     place, it was natural to piggy-back all the other conditions on it
3775     too and not constantly check them throughout the code.
3776 
3777     If do_termination is true then do_marking_step will enter its
3778     termination protocol.
3779 
3780     The value of is_serial must be true when do_marking_step is being
3781     called serially (i.e. by the VMThread) and do_marking_step should
3782     skip any synchronization in the termination and overflow code.
3783     Examples include the serial remark code and the serial reference
3784     processing closures.
3785 
3786     The value of is_serial must be false when do_marking_step is
3787     being called by any of the worker threads in a work gang.
3788     Examples include the concurrent marking code (CMMarkingTask),
3789     the MT remark code, and the MT reference processing closures.
3790 
3791  *****************************************************************************/
3792 
3793 void CMTask::do_marking_step(double time_target_ms,
3794                              bool do_termination,
3795                              bool is_serial) {
3796   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3797   assert(concurrent() == _cm->concurrent(), "they should be the same");
3798 
3799   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3800   assert(_task_queues != NULL, "invariant");
3801   assert(_task_queue != NULL, "invariant");
3802   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3803 
3804   assert(!_claimed,
3805          "only one thread should claim this task at any one time");
3806 
3807   // OK, this doesn't safeguard again all possible scenarios, as it is
3808   // possible for two threads to set the _claimed flag at the same
3809   // time. But it is only for debugging purposes anyway and it will
3810   // catch most problems.
3811   _claimed = true;
3812 
3813   _start_time_ms = os::elapsedVTime() * 1000.0;
3814   statsOnly( _interval_start_time_ms = _start_time_ms );
3815 
3816   // If do_stealing is true then do_marking_step will attempt to
3817   // steal work from the other CMTasks. It only makes sense to
3818   // enable stealing when the termination protocol is enabled
3819   // and do_marking_step() is not being called serially.
3820   bool do_stealing = do_termination && !is_serial;
3821 
3822   double diff_prediction_ms =
3823     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3824   _time_target_ms = time_target_ms - diff_prediction_ms;
3825 
3826   // set up the variables that are used in the work-based scheme to
3827   // call the regular clock method
3828   _words_scanned = 0;
3829   _refs_reached  = 0;
3830   recalculate_limits();
3831 
3832   // clear all flags
3833   clear_has_aborted();
3834   _has_timed_out = false;
3835   _draining_satb_buffers = false;
3836 
3837   ++_calls;
3838 
3839   if (_cm->verbose_low()) {
3840     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3841                            "target = %1.2lfms >>>>>>>>>>",
3842                            _worker_id, _calls, _time_target_ms);
3843   }
3844 
3845   // Set up the bitmap and oop closures. Anything that uses them is
3846   // eventually called from this method, so it is OK to allocate these
3847   // statically.
3848   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3849   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3850   set_cm_oop_closure(&cm_oop_closure);
3851 
3852   if (_cm->has_overflown()) {
3853     // This can happen if the mark stack overflows during a GC pause
3854     // and this task, after a yield point, restarts. We have to abort
3855     // as we need to get into the overflow protocol which happens
3856     // right at the end of this task.
3857     set_has_aborted();
3858   }
3859 
3860   // First drain any available SATB buffers. After this, we will not
3861   // look at SATB buffers before the next invocation of this method.
3862   // If enough completed SATB buffers are queued up, the regular clock
3863   // will abort this task so that it restarts.
3864   drain_satb_buffers();
3865   // ...then partially drain the local queue and the global stack
3866   drain_local_queue(true);
3867   drain_global_stack(true);
3868 
3869   do {
3870     if (!has_aborted() && _curr_region != NULL) {
3871       // This means that we're already holding on to a region.
3872       assert(_finger != NULL, "if region is not NULL, then the finger "
3873              "should not be NULL either");
3874 
3875       // We might have restarted this task after an evacuation pause
3876       // which might have evacuated the region we're holding on to
3877       // underneath our feet. Let's read its limit again to make sure
3878       // that we do not iterate over a region of the heap that
3879       // contains garbage (update_region_limit() will also move
3880       // _finger to the start of the region if it is found empty).
3881       update_region_limit();
3882       // We will start from _finger not from the start of the region,
3883       // as we might be restarting this task after aborting half-way
3884       // through scanning this region. In this case, _finger points to
3885       // the address where we last found a marked object. If this is a
3886       // fresh region, _finger points to start().
3887       MemRegion mr = MemRegion(_finger, _region_limit);
3888 
3889       if (_cm->verbose_low()) {
3890         gclog_or_tty->print_cr("[%u] we're scanning part "
3891                                "["PTR_FORMAT", "PTR_FORMAT") "
3892                                "of region "HR_FORMAT,
3893                                _worker_id, p2i(_finger), p2i(_region_limit),
3894                                HR_FORMAT_PARAMS(_curr_region));
3895       }
3896 
3897       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3898              "humongous regions should go around loop once only");
3899 
3900       // Some special cases:
3901       // If the memory region is empty, we can just give up the region.
3902       // If the current region is humongous then we only need to check
3903       // the bitmap for the bit associated with the start of the object,
3904       // scan the object if it's live, and give up the region.
3905       // Otherwise, let's iterate over the bitmap of the part of the region
3906       // that is left.
3907       // If the iteration is successful, give up the region.
3908       if (mr.is_empty()) {
3909         giveup_current_region();
3910         regular_clock_call();
3911       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3912         if (_nextMarkBitMap->isMarked(mr.start())) {
3913           // The object is marked - apply the closure
3914           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3915           bitmap_closure.do_bit(offset);
3916         }
3917         // Even if this task aborted while scanning the humongous object
3918         // we can (and should) give up the current region.
3919         giveup_current_region();
3920         regular_clock_call();
3921       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3922         giveup_current_region();
3923         regular_clock_call();
3924       } else {
3925         assert(has_aborted(), "currently the only way to do so");
3926         // The only way to abort the bitmap iteration is to return
3927         // false from the do_bit() method. However, inside the
3928         // do_bit() method we move the _finger to point to the
3929         // object currently being looked at. So, if we bail out, we
3930         // have definitely set _finger to something non-null.
3931         assert(_finger != NULL, "invariant");
3932 
3933         // Region iteration was actually aborted. So now _finger
3934         // points to the address of the object we last scanned. If we
3935         // leave it there, when we restart this task, we will rescan
3936         // the object. It is easy to avoid this. We move the finger by
3937         // enough to point to the next possible object header (the
3938         // bitmap knows by how much we need to move it as it knows its
3939         // granularity).
3940         assert(_finger < _region_limit, "invariant");
3941         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3942         // Check if bitmap iteration was aborted while scanning the last object
3943         if (new_finger >= _region_limit) {
3944           giveup_current_region();
3945         } else {
3946           move_finger_to(new_finger);
3947         }
3948       }
3949     }
3950     // At this point we have either completed iterating over the
3951     // region we were holding on to, or we have aborted.
3952 
3953     // We then partially drain the local queue and the global stack.
3954     // (Do we really need this?)
3955     drain_local_queue(true);
3956     drain_global_stack(true);
3957 
3958     // Read the note on the claim_region() method on why it might
3959     // return NULL with potentially more regions available for
3960     // claiming and why we have to check out_of_regions() to determine
3961     // whether we're done or not.
3962     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3963       // We are going to try to claim a new region. We should have
3964       // given up on the previous one.
3965       // Separated the asserts so that we know which one fires.
3966       assert(_curr_region  == NULL, "invariant");
3967       assert(_finger       == NULL, "invariant");
3968       assert(_region_limit == NULL, "invariant");
3969       if (_cm->verbose_low()) {
3970         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3971       }
3972       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3973       if (claimed_region != NULL) {
3974         // Yes, we managed to claim one
3975         statsOnly( ++_regions_claimed );
3976 
3977         if (_cm->verbose_low()) {
3978           gclog_or_tty->print_cr("[%u] we successfully claimed "
3979                                  "region "PTR_FORMAT,
3980                                  _worker_id, p2i(claimed_region));
3981         }
3982 
3983         setup_for_region(claimed_region);
3984         assert(_curr_region == claimed_region, "invariant");
3985       }
3986       // It is important to call the regular clock here. It might take
3987       // a while to claim a region if, for example, we hit a large
3988       // block of empty regions. So we need to call the regular clock
3989       // method once round the loop to make sure it's called
3990       // frequently enough.
3991       regular_clock_call();
3992     }
3993 
3994     if (!has_aborted() && _curr_region == NULL) {
3995       assert(_cm->out_of_regions(),
3996              "at this point we should be out of regions");
3997     }
3998   } while ( _curr_region != NULL && !has_aborted());
3999 
4000   if (!has_aborted()) {
4001     // We cannot check whether the global stack is empty, since other
4002     // tasks might be pushing objects to it concurrently.
4003     assert(_cm->out_of_regions(),
4004            "at this point we should be out of regions");
4005 
4006     if (_cm->verbose_low()) {
4007       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4008     }
4009 
4010     // Try to reduce the number of available SATB buffers so that
4011     // remark has less work to do.
4012     drain_satb_buffers();
4013   }
4014 
4015   // Since we've done everything else, we can now totally drain the
4016   // local queue and global stack.
4017   drain_local_queue(false);
4018   drain_global_stack(false);
4019 
4020   // Attempt at work stealing from other task's queues.
4021   if (do_stealing && !has_aborted()) {
4022     // We have not aborted. This means that we have finished all that
4023     // we could. Let's try to do some stealing...
4024 
4025     // We cannot check whether the global stack is empty, since other
4026     // tasks might be pushing objects to it concurrently.
4027     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4028            "only way to reach here");
4029 
4030     if (_cm->verbose_low()) {
4031       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4032     }
4033 
4034     while (!has_aborted()) {
4035       oop obj;
4036       statsOnly( ++_steal_attempts );
4037 
4038       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4039         if (_cm->verbose_medium()) {
4040           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4041                                  _worker_id, p2i((void*) obj));
4042         }
4043 
4044         statsOnly( ++_steals );
4045 
4046         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4047                "any stolen object should be marked");
4048         scan_object(obj);
4049 
4050         // And since we're towards the end, let's totally drain the
4051         // local queue and global stack.
4052         drain_local_queue(false);
4053         drain_global_stack(false);
4054       } else {
4055         break;
4056       }
4057     }
4058   }
4059 
4060   // If we are about to wrap up and go into termination, check if we
4061   // should raise the overflow flag.
4062   if (do_termination && !has_aborted()) {
4063     if (_cm->force_overflow()->should_force()) {
4064       _cm->set_has_overflown();
4065       regular_clock_call();
4066     }
4067   }
4068 
4069   // We still haven't aborted. Now, let's try to get into the
4070   // termination protocol.
4071   if (do_termination && !has_aborted()) {
4072     // We cannot check whether the global stack is empty, since other
4073     // tasks might be concurrently pushing objects on it.
4074     // Separated the asserts so that we know which one fires.
4075     assert(_cm->out_of_regions(), "only way to reach here");
4076     assert(_task_queue->size() == 0, "only way to reach here");
4077 
4078     if (_cm->verbose_low()) {
4079       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4080     }
4081 
4082     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4083 
4084     // The CMTask class also extends the TerminatorTerminator class,
4085     // hence its should_exit_termination() method will also decide
4086     // whether to exit the termination protocol or not.
4087     bool finished = (is_serial ||
4088                      _cm->terminator()->offer_termination(this));
4089     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4090     _termination_time_ms +=
4091       termination_end_time_ms - _termination_start_time_ms;
4092 
4093     if (finished) {
4094       // We're all done.
4095 
4096       if (_worker_id == 0) {
4097         // let's allow task 0 to do this
4098         if (concurrent()) {
4099           assert(_cm->concurrent_marking_in_progress(), "invariant");
4100           // we need to set this to false before the next
4101           // safepoint. This way we ensure that the marking phase
4102           // doesn't observe any more heap expansions.
4103           _cm->clear_concurrent_marking_in_progress();
4104         }
4105       }
4106 
4107       // We can now guarantee that the global stack is empty, since
4108       // all other tasks have finished. We separated the guarantees so
4109       // that, if a condition is false, we can immediately find out
4110       // which one.
4111       guarantee(_cm->out_of_regions(), "only way to reach here");
4112       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4113       guarantee(_task_queue->size() == 0, "only way to reach here");
4114       guarantee(!_cm->has_overflown(), "only way to reach here");
4115       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4116 
4117       if (_cm->verbose_low()) {
4118         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4119       }
4120     } else {
4121       // Apparently there's more work to do. Let's abort this task. It
4122       // will restart it and we can hopefully find more things to do.
4123 
4124       if (_cm->verbose_low()) {
4125         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4126                                _worker_id);
4127       }
4128 
4129       set_has_aborted();
4130       statsOnly( ++_aborted_termination );
4131     }
4132   }
4133 
4134   // Mainly for debugging purposes to make sure that a pointer to the
4135   // closure which was statically allocated in this frame doesn't
4136   // escape it by accident.
4137   set_cm_oop_closure(NULL);
4138   double end_time_ms = os::elapsedVTime() * 1000.0;
4139   double elapsed_time_ms = end_time_ms - _start_time_ms;
4140   // Update the step history.
4141   _step_times_ms.add(elapsed_time_ms);
4142 
4143   if (has_aborted()) {
4144     // The task was aborted for some reason.
4145 
4146     statsOnly( ++_aborted );
4147 
4148     if (_has_timed_out) {
4149       double diff_ms = elapsed_time_ms - _time_target_ms;
4150       // Keep statistics of how well we did with respect to hitting
4151       // our target only if we actually timed out (if we aborted for
4152       // other reasons, then the results might get skewed).
4153       _marking_step_diffs_ms.add(diff_ms);
4154     }
4155 
4156     if (_cm->has_overflown()) {
4157       // This is the interesting one. We aborted because a global
4158       // overflow was raised. This means we have to restart the
4159       // marking phase and start iterating over regions. However, in
4160       // order to do this we have to make sure that all tasks stop
4161       // what they are doing and re-initialize in a safe manner. We
4162       // will achieve this with the use of two barrier sync points.
4163 
4164       if (_cm->verbose_low()) {
4165         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4166       }
4167 
4168       if (!is_serial) {
4169         // We only need to enter the sync barrier if being called
4170         // from a parallel context
4171         _cm->enter_first_sync_barrier(_worker_id);
4172 
4173         // When we exit this sync barrier we know that all tasks have
4174         // stopped doing marking work. So, it's now safe to
4175         // re-initialize our data structures. At the end of this method,
4176         // task 0 will clear the global data structures.
4177       }
4178 
4179       statsOnly( ++_aborted_overflow );
4180 
4181       // We clear the local state of this task...
4182       clear_region_fields();
4183 
4184       if (!is_serial) {
4185         // ...and enter the second barrier.
4186         _cm->enter_second_sync_barrier(_worker_id);
4187       }
4188       // At this point, if we're during the concurrent phase of
4189       // marking, everything has been re-initialized and we're
4190       // ready to restart.
4191     }
4192 
4193     if (_cm->verbose_low()) {
4194       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4195                              "elapsed = %1.2lfms <<<<<<<<<<",
4196                              _worker_id, _time_target_ms, elapsed_time_ms);
4197       if (_cm->has_aborted()) {
4198         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4199                                _worker_id);
4200       }
4201     }
4202   } else {
4203     if (_cm->verbose_low()) {
4204       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4205                              "elapsed = %1.2lfms <<<<<<<<<<",
4206                              _worker_id, _time_target_ms, elapsed_time_ms);
4207     }
4208   }
4209 
4210   _claimed = false;
4211 }
4212 
4213 CMTask::CMTask(uint worker_id,
4214                ConcurrentMark* cm,
4215                size_t* marked_bytes,
4216                BitMap* card_bm,
4217                CMTaskQueue* task_queue,
4218                CMTaskQueueSet* task_queues)
4219   : _g1h(G1CollectedHeap::heap()),
4220     _worker_id(worker_id), _cm(cm),
4221     _claimed(false),
4222     _nextMarkBitMap(NULL), _hash_seed(17),
4223     _task_queue(task_queue),
4224     _task_queues(task_queues),
4225     _cm_oop_closure(NULL),
4226     _marked_bytes_array(marked_bytes),
4227     _card_bm(card_bm) {
4228   guarantee(task_queue != NULL, "invariant");
4229   guarantee(task_queues != NULL, "invariant");
4230 
4231   statsOnly( _clock_due_to_scanning = 0;
4232              _clock_due_to_marking  = 0 );
4233 
4234   _marking_step_diffs_ms.add(0.5);
4235 }
4236 
4237 // These are formatting macros that are used below to ensure
4238 // consistent formatting. The *_H_* versions are used to format the
4239 // header for a particular value and they should be kept consistent
4240 // with the corresponding macro. Also note that most of the macros add
4241 // the necessary white space (as a prefix) which makes them a bit
4242 // easier to compose.
4243 
4244 // All the output lines are prefixed with this string to be able to
4245 // identify them easily in a large log file.
4246 #define G1PPRL_LINE_PREFIX            "###"
4247 
4248 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4249 #ifdef _LP64
4250 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4251 #else // _LP64
4252 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4253 #endif // _LP64
4254 
4255 // For per-region info
4256 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4257 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4258 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4259 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4260 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4261 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4262 
4263 // For summary info
4264 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4265 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4266 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4267 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4268 
4269 G1PrintRegionLivenessInfoClosure::
4270 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4271   : _out(out),
4272     _total_used_bytes(0), _total_capacity_bytes(0),
4273     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4274     _hum_used_bytes(0), _hum_capacity_bytes(0),
4275     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4276     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4277   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4278   MemRegion g1_reserved = g1h->g1_reserved();
4279   double now = os::elapsedTime();
4280 
4281   // Print the header of the output.
4282   _out->cr();
4283   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4284   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4285                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4286                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4287                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4288                  HeapRegion::GrainBytes);
4289   _out->print_cr(G1PPRL_LINE_PREFIX);
4290   _out->print_cr(G1PPRL_LINE_PREFIX
4291                 G1PPRL_TYPE_H_FORMAT
4292                 G1PPRL_ADDR_BASE_H_FORMAT
4293                 G1PPRL_BYTE_H_FORMAT
4294                 G1PPRL_BYTE_H_FORMAT
4295                 G1PPRL_BYTE_H_FORMAT
4296                 G1PPRL_DOUBLE_H_FORMAT
4297                 G1PPRL_BYTE_H_FORMAT
4298                 G1PPRL_BYTE_H_FORMAT,
4299                 "type", "address-range",
4300                 "used", "prev-live", "next-live", "gc-eff",
4301                 "remset", "code-roots");
4302   _out->print_cr(G1PPRL_LINE_PREFIX
4303                 G1PPRL_TYPE_H_FORMAT
4304                 G1PPRL_ADDR_BASE_H_FORMAT
4305                 G1PPRL_BYTE_H_FORMAT
4306                 G1PPRL_BYTE_H_FORMAT
4307                 G1PPRL_BYTE_H_FORMAT
4308                 G1PPRL_DOUBLE_H_FORMAT
4309                 G1PPRL_BYTE_H_FORMAT
4310                 G1PPRL_BYTE_H_FORMAT,
4311                 "", "",
4312                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4313                 "(bytes)", "(bytes)");
4314 }
4315 
4316 // It takes as a parameter a reference to one of the _hum_* fields, it
4317 // deduces the corresponding value for a region in a humongous region
4318 // series (either the region size, or what's left if the _hum_* field
4319 // is < the region size), and updates the _hum_* field accordingly.
4320 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4321   size_t bytes = 0;
4322   // The > 0 check is to deal with the prev and next live bytes which
4323   // could be 0.
4324   if (*hum_bytes > 0) {
4325     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4326     *hum_bytes -= bytes;
4327   }
4328   return bytes;
4329 }
4330 
4331 // It deduces the values for a region in a humongous region series
4332 // from the _hum_* fields and updates those accordingly. It assumes
4333 // that that _hum_* fields have already been set up from the "starts
4334 // humongous" region and we visit the regions in address order.
4335 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4336                                                      size_t* capacity_bytes,
4337                                                      size_t* prev_live_bytes,
4338                                                      size_t* next_live_bytes) {
4339   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4340   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4341   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4342   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4343   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4344 }
4345 
4346 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4347   const char* type       = r->get_type_str();
4348   HeapWord* bottom       = r->bottom();
4349   HeapWord* end          = r->end();
4350   size_t capacity_bytes  = r->capacity();
4351   size_t used_bytes      = r->used();
4352   size_t prev_live_bytes = r->live_bytes();
4353   size_t next_live_bytes = r->next_live_bytes();
4354   double gc_eff          = r->gc_efficiency();
4355   size_t remset_bytes    = r->rem_set()->mem_size();
4356   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4357 
4358   if (r->is_starts_humongous()) {
4359     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4360            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4361            "they should have been zeroed after the last time we used them");
4362     // Set up the _hum_* fields.
4363     _hum_capacity_bytes  = capacity_bytes;
4364     _hum_used_bytes      = used_bytes;
4365     _hum_prev_live_bytes = prev_live_bytes;
4366     _hum_next_live_bytes = next_live_bytes;
4367     get_hum_bytes(&used_bytes, &capacity_bytes,
4368                   &prev_live_bytes, &next_live_bytes);
4369     end = bottom + HeapRegion::GrainWords;
4370   } else if (r->is_continues_humongous()) {
4371     get_hum_bytes(&used_bytes, &capacity_bytes,
4372                   &prev_live_bytes, &next_live_bytes);
4373     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4374   }
4375 
4376   _total_used_bytes      += used_bytes;
4377   _total_capacity_bytes  += capacity_bytes;
4378   _total_prev_live_bytes += prev_live_bytes;
4379   _total_next_live_bytes += next_live_bytes;
4380   _total_remset_bytes    += remset_bytes;
4381   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4382 
4383   // Print a line for this particular region.
4384   _out->print_cr(G1PPRL_LINE_PREFIX
4385                  G1PPRL_TYPE_FORMAT
4386                  G1PPRL_ADDR_BASE_FORMAT
4387                  G1PPRL_BYTE_FORMAT
4388                  G1PPRL_BYTE_FORMAT
4389                  G1PPRL_BYTE_FORMAT
4390                  G1PPRL_DOUBLE_FORMAT
4391                  G1PPRL_BYTE_FORMAT
4392                  G1PPRL_BYTE_FORMAT,
4393                  type, p2i(bottom), p2i(end),
4394                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4395                  remset_bytes, strong_code_roots_bytes);
4396 
4397   return false;
4398 }
4399 
4400 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4401   // add static memory usages to remembered set sizes
4402   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4403   // Print the footer of the output.
4404   _out->print_cr(G1PPRL_LINE_PREFIX);
4405   _out->print_cr(G1PPRL_LINE_PREFIX
4406                  " SUMMARY"
4407                  G1PPRL_SUM_MB_FORMAT("capacity")
4408                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4409                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4410                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4411                  G1PPRL_SUM_MB_FORMAT("remset")
4412                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4413                  bytes_to_mb(_total_capacity_bytes),
4414                  bytes_to_mb(_total_used_bytes),
4415                  perc(_total_used_bytes, _total_capacity_bytes),
4416                  bytes_to_mb(_total_prev_live_bytes),
4417                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4418                  bytes_to_mb(_total_next_live_bytes),
4419                  perc(_total_next_live_bytes, _total_capacity_bytes),
4420                  bytes_to_mb(_total_remset_bytes),
4421                  bytes_to_mb(_total_strong_code_roots_bytes));
4422   _out->cr();
4423 }