Old src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 513   if (verbose_level < no_verbose) {
 514     verbose_level = no_verbose;
 515   }
 516   if (verbose_level > high_verbose) {
 517     verbose_level = high_verbose;
 518   }
 519   _verbose_level = verbose_level;
 520 
 521   if (verbose_low()) {
 522     gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", "
 523                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 524   }
 525 
 526   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 527   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 528 
 529   // Create & start a ConcurrentMark thread.
 530   _cmThread = new ConcurrentMarkThread(this);
 531   assert(cmThread() != NULL, "CM Thread should have been created");
 532   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 533   if (_cmThread->osthread() == NULL) {
 534       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 535   }
 536 
 537   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 538   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 539   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 540 
 541   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 542   satb_qs.set_buffer_size(G1SATBBufferSize);
 543 
 544   _root_regions.init(_g1h, this);
 545 
 546   if (ConcGCThreads > ParallelGCThreads) {
 547     warning("Can't have more ConcGCThreads (%u) "
 548             "than ParallelGCThreads (%u).",
 549             ConcGCThreads, ParallelGCThreads);
 550     return;
 551   }
 552   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 553     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 554     // if both are set
 555     _sleep_factor             = 0.0;
 556     _marking_task_overhead    = 1.0;
 557   } else if (G1MarkingOverheadPercent > 0) {
 558     // We will calculate the number of parallel marking threads based
 559     // on a target overhead with respect to the soft real-time goal
 560     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 561     double overall_cm_overhead =
 562       (double) MaxGCPauseMillis * marking_overhead /
 563       (double) GCPauseIntervalMillis;
 564     double cpu_ratio = 1.0 / (double) os::processor_count();
 565     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 566     double marking_task_overhead =
 567       overall_cm_overhead / marking_thread_num *
 568                                               (double) os::processor_count();
 569     double sleep_factor =
 570                        (1.0 - marking_task_overhead) / marking_task_overhead;
 571 
 572     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 573     _sleep_factor             = sleep_factor;
 574     _marking_task_overhead    = marking_task_overhead;
 575   } else {
 576     // Calculate the number of parallel marking threads by scaling
 577     // the number of parallel GC threads.
 578     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 579     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 580     _sleep_factor             = 0.0;
 581     _marking_task_overhead    = 1.0;
 582   }
 583 
 584   assert(ConcGCThreads > 0, "Should have been set");
 585   _parallel_marking_threads = ConcGCThreads;
 586   _max_parallel_marking_threads = _parallel_marking_threads;
 587 
 588   if (parallel_marking_threads() > 1) {
 589     _cleanup_task_overhead = 1.0;
 590   } else {
 591     _cleanup_task_overhead = marking_task_overhead();
 592   }
 593   _cleanup_sleep_factor =
 594                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 595 
 596 #if 0
 597   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 598   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 599   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 600   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 601   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 602 #endif
 603 
 604   _parallel_workers = new WorkGang("G1 Marker",
 605        _max_parallel_marking_threads, false, true);
 606   if (_parallel_workers == NULL) {
 607     vm_exit_during_initialization("Failed necessary allocation.");
 608   } else {
 609     _parallel_workers->initialize_workers();
 610   }
 611 
 612   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 613     size_t mark_stack_size =
 614       MIN2(MarkStackSizeMax,
 615           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 616     // Verify that the calculated value for MarkStackSize is in range.
 617     // It would be nice to use the private utility routine from Arguments.
 618     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 619       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 620               "must be between 1 and " SIZE_FORMAT,
 621               mark_stack_size, MarkStackSizeMax);
 622       return;
 623     }
 624     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 625   } else {
 626     // Verify MarkStackSize is in range.
 627     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 628       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 629         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 630           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 631                   "must be between 1 and " SIZE_FORMAT,
 632                   MarkStackSize, MarkStackSizeMax);
 633           return;
 634         }
 635       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 636         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 637           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 638                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 639                   MarkStackSize, MarkStackSizeMax);
 640           return;
 641         }
 642       }
 643     }
 644   }
 645 
 646   if (!_markStack.allocate(MarkStackSize)) {
 647     warning("Failed to allocate CM marking stack");
 648     return;
 649   }
 650 
 651   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 652   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 653 
 654   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 655   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 656 
 657   BitMap::idx_t card_bm_size = _card_bm.size();
 658 
 659   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 660   _active_tasks = _max_worker_id;
 661 
 662   uint max_regions = _g1h->max_regions();
 663   for (uint i = 0; i < _max_worker_id; ++i) {
 664     CMTaskQueue* task_queue = new CMTaskQueue();
 665     task_queue->initialize();
 666     _task_queues->register_queue(i, task_queue);
 667 
 668     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 669     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 670 
 671     _tasks[i] = new CMTask(i, this,
 672                            _count_marked_bytes[i],
 673                            &_count_card_bitmaps[i],
 674                            task_queue, _task_queues);
 675 
 676     _accum_task_vtime[i] = 0.0;
 677   }
 678 
 679   // Calculate the card number for the bottom of the heap. Used
 680   // in biasing indexes into the accounting card bitmaps.
 681   _heap_bottom_card_num =
 682     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 683                                 CardTableModRefBS::card_shift);
 684 
 685   // Clear all the liveness counting data
 686   clear_all_count_data();
 687 
 688   // so that the call below can read a sensible value
 689   _heap_start = g1h->reserved_region().start();
 690   set_non_marking_state();
 691   _completed_initialization = true;
 692 }
 693 
 694 void ConcurrentMark::reset() {
 695   // Starting values for these two. This should be called in a STW
 696   // phase.
 697   MemRegion reserved = _g1h->g1_reserved();
 698   _heap_start = reserved.start();
 699   _heap_end   = reserved.end();
 700 
 701   // Separated the asserts so that we know which one fires.
 702   assert(_heap_start != NULL, "heap bounds should look ok");
 703   assert(_heap_end != NULL, "heap bounds should look ok");
 704   assert(_heap_start < _heap_end, "heap bounds should look ok");
 705 
 706   // Reset all the marking data structures and any necessary flags
 707   reset_marking_state();
 708 
 709   if (verbose_low()) {
 710     gclog_or_tty->print_cr("[global] resetting");
 711   }
 712 
 713   // We do reset all of them, since different phases will use
 714   // different number of active threads. So, it's easiest to have all
 715   // of them ready.
 716   for (uint i = 0; i < _max_worker_id; ++i) {
 717     _tasks[i]->reset(_nextMarkBitMap);
 718   }
 719 
 720   // we need this to make sure that the flag is on during the evac
 721   // pause with initial mark piggy-backed
 722   set_concurrent_marking_in_progress();
 723 }
 724 
 725 
 726 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 727   _markStack.set_should_expand();
 728   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 729   if (clear_overflow) {
 730     clear_has_overflown();
 731   } else {
 732     assert(has_overflown(), "pre-condition");
 733   }
 734   _finger = _heap_start;
 735 
 736   for (uint i = 0; i < _max_worker_id; ++i) {
 737     CMTaskQueue* queue = _task_queues->queue(i);
 738     queue->set_empty();
 739   }
 740 }
 741 
 742 void ConcurrentMark::set_concurrency(uint active_tasks) {
 743   assert(active_tasks <= _max_worker_id, "we should not have more");
 744 
 745   _active_tasks = active_tasks;
 746   // Need to update the three data structures below according to the
 747   // number of active threads for this phase.
 748   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 749   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 750   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 751 }
 752 
 753 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 754   set_concurrency(active_tasks);
 755 
 756   _concurrent = concurrent;
 757   // We propagate this to all tasks, not just the active ones.
 758   for (uint i = 0; i < _max_worker_id; ++i)
 759     _tasks[i]->set_concurrent(concurrent);
 760 
 761   if (concurrent) {
 762     set_concurrent_marking_in_progress();
 763   } else {
 764     // We currently assume that the concurrent flag has been set to
 765     // false before we start remark. At this point we should also be
 766     // in a STW phase.
 767     assert(!concurrent_marking_in_progress(), "invariant");
 768     assert(out_of_regions(),
 769            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 770            p2i(_finger), p2i(_heap_end));
 771   }
 772 }
 773 
 774 void ConcurrentMark::set_non_marking_state() {
 775   // We set the global marking state to some default values when we're
 776   // not doing marking.
 777   reset_marking_state();
 778   _active_tasks = 0;
 779   clear_concurrent_marking_in_progress();
 780 }
 781 
 782 ConcurrentMark::~ConcurrentMark() {
 783   // The ConcurrentMark instance is never freed.
 784   ShouldNotReachHere();
 785 }
 786 
 787 void ConcurrentMark::clearNextBitmap() {
 788   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 789 
 790   // Make sure that the concurrent mark thread looks to still be in
 791   // the current cycle.
 792   guarantee(cmThread()->during_cycle(), "invariant");
 793 
 794   // We are finishing up the current cycle by clearing the next
 795   // marking bitmap and getting it ready for the next cycle. During
 796   // this time no other cycle can start. So, let's make sure that this
 797   // is the case.
 798   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 799 
 800   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 801   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 802   _parallel_workers->run_task(&task);
 803 
 804   // Clear the liveness counting data. If the marking has been aborted, the abort()
 805   // call already did that.
 806   if (cl.complete()) {
 807     clear_all_count_data();
 808   }
 809 
 810   // Repeat the asserts from above.
 811   guarantee(cmThread()->during_cycle(), "invariant");
 812   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 813 }
 814 
 815 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 816   CMBitMap* _bitmap;
 817   bool _error;
 818  public:
 819   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 820   }
 821 
 822   virtual bool doHeapRegion(HeapRegion* r) {
 823     // This closure can be called concurrently to the mutator, so we must make sure
 824     // that the result of the getNextMarkedWordAddress() call is compared to the
 825     // value passed to it as limit to detect any found bits.
 826     // We can use the region's orig_end() for the limit and the comparison value
 827     // as it always contains the "real" end of the region that never changes and
 828     // has no side effects.
 829     // Due to the latter, there can also be no problem with the compiler generating
 830     // reloads of the orig_end() call.
 831     HeapWord* end = r->orig_end();
 832     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 833   }
 834 };
 835 
 836 bool ConcurrentMark::nextMarkBitmapIsClear() {
 837   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 838   _g1h->heap_region_iterate(&cl);
 839   return cl.complete();
 840 }
 841 
 842 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 843 public:
 844   bool doHeapRegion(HeapRegion* r) {
 845     if (!r->is_continues_humongous()) {
 846       r->note_start_of_marking();
 847     }
 848     return false;
 849   }
 850 };
 851 
 852 void ConcurrentMark::checkpointRootsInitialPre() {
 853   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 854   G1CollectorPolicy* g1p = g1h->g1_policy();
 855 
 856   _has_aborted = false;
 857 
 858   // Initialize marking structures. This has to be done in a STW phase.
 859   reset();
 860 
 861   // For each region note start of marking.
 862   NoteStartOfMarkHRClosure startcl;
 863   g1h->heap_region_iterate(&startcl);
 864 }
 865 
 866 
 867 void ConcurrentMark::checkpointRootsInitialPost() {
 868   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 869 
 870   // If we force an overflow during remark, the remark operation will
 871   // actually abort and we'll restart concurrent marking. If we always
 872   // force an overflow during remark we'll never actually complete the
 873   // marking phase. So, we initialize this here, at the start of the
 874   // cycle, so that at the remaining overflow number will decrease at
 875   // every remark and we'll eventually not need to cause one.
 876   force_overflow_stw()->init();
 877 
 878   // Start Concurrent Marking weak-reference discovery.
 879   ReferenceProcessor* rp = g1h->ref_processor_cm();
 880   // enable ("weak") refs discovery
 881   rp->enable_discovery();
 882   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 883 
 884   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 885   // This is the start of  the marking cycle, we're expected all
 886   // threads to have SATB queues with active set to false.
 887   satb_mq_set.set_active_all_threads(true, /* new active value */
 888                                      false /* expected_active */);
 889 
 890   _root_regions.prepare_for_scan();
 891 
 892   // update_g1_committed() will be called at the end of an evac pause
 893   // when marking is on. So, it's also called at the end of the
 894   // initial-mark pause to update the heap end, if the heap expands
 895   // during it. No need to call it here.
 896 }
 897 
 898 /*
 899  * Notice that in the next two methods, we actually leave the STS
 900  * during the barrier sync and join it immediately afterwards. If we
 901  * do not do this, the following deadlock can occur: one thread could
 902  * be in the barrier sync code, waiting for the other thread to also
 903  * sync up, whereas another one could be trying to yield, while also
 904  * waiting for the other threads to sync up too.
 905  *
 906  * Note, however, that this code is also used during remark and in
 907  * this case we should not attempt to leave / enter the STS, otherwise
 908  * we'll either hit an assert (debug / fastdebug) or deadlock
 909  * (product). So we should only leave / enter the STS if we are
 910  * operating concurrently.
 911  *
 912  * Because the thread that does the sync barrier has left the STS, it
 913  * is possible to be suspended for a Full GC or an evacuation pause
 914  * could occur. This is actually safe, since the entering the sync
 915  * barrier is one of the last things do_marking_step() does, and it
 916  * doesn't manipulate any data structures afterwards.
 917  */
 918 
 919 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 920   bool barrier_aborted;
 921 
 922   if (verbose_low()) {
 923     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 924   }
 925 
 926   {
 927     SuspendibleThreadSetLeaver sts_leave(concurrent());
 928     barrier_aborted = !_first_overflow_barrier_sync.enter();
 929   }
 930 
 931   // at this point everyone should have synced up and not be doing any
 932   // more work
 933 
 934   if (verbose_low()) {
 935     if (barrier_aborted) {
 936       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 937     } else {
 938       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 939     }
 940   }
 941 
 942   if (barrier_aborted) {
 943     // If the barrier aborted we ignore the overflow condition and
 944     // just abort the whole marking phase as quickly as possible.
 945     return;
 946   }
 947 
 948   // If we're executing the concurrent phase of marking, reset the marking
 949   // state; otherwise the marking state is reset after reference processing,
 950   // during the remark pause.
 951   // If we reset here as a result of an overflow during the remark we will
 952   // see assertion failures from any subsequent set_concurrency_and_phase()
 953   // calls.
 954   if (concurrent()) {
 955     // let the task associated with with worker 0 do this
 956     if (worker_id == 0) {
 957       // task 0 is responsible for clearing the global data structures
 958       // We should be here because of an overflow. During STW we should
 959       // not clear the overflow flag since we rely on it being true when
 960       // we exit this method to abort the pause and restart concurrent
 961       // marking.
 962       reset_marking_state(true /* clear_overflow */);
 963       force_overflow()->update();
 964 
 965       if (G1Log::fine()) {
 966         gclog_or_tty->gclog_stamp();
 967         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 968       }
 969     }
 970   }
 971 
 972   // after this, each task should reset its own data structures then
 973   // then go into the second barrier
 974 }
 975 
 976 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 977   bool barrier_aborted;
 978 
 979   if (verbose_low()) {
 980     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 981   }
 982 
 983   {
 984     SuspendibleThreadSetLeaver sts_leave(concurrent());
 985     barrier_aborted = !_second_overflow_barrier_sync.enter();
 986   }
 987 
 988   // at this point everything should be re-initialized and ready to go
 989 
 990   if (verbose_low()) {
 991     if (barrier_aborted) {
 992       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
 993     } else {
 994       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 995     }
 996   }
 997 }
 998 
 999 #ifndef PRODUCT
1000 void ForceOverflowSettings::init() {
1001   _num_remaining = G1ConcMarkForceOverflow;
1002   _force = false;
1003   update();
1004 }
1005 
1006 void ForceOverflowSettings::update() {
1007   if (_num_remaining > 0) {
1008     _num_remaining -= 1;
1009     _force = true;
1010   } else {
1011     _force = false;
1012   }
1013 }
1014 
1015 bool ForceOverflowSettings::should_force() {
1016   if (_force) {
1017     _force = false;
1018     return true;
1019   } else {
1020     return false;
1021   }
1022 }
1023 #endif // !PRODUCT
1024 
1025 class CMConcurrentMarkingTask: public AbstractGangTask {
1026 private:
1027   ConcurrentMark*       _cm;
1028   ConcurrentMarkThread* _cmt;
1029 
1030 public:
1031   void work(uint worker_id) {
1032     assert(Thread::current()->is_ConcurrentGC_thread(),
1033            "this should only be done by a conc GC thread");
1034     ResourceMark rm;
1035 
1036     double start_vtime = os::elapsedVTime();
1037 
1038     {
1039       SuspendibleThreadSetJoiner sts_join;
1040 
1041       assert(worker_id < _cm->active_tasks(), "invariant");
1042       CMTask* the_task = _cm->task(worker_id);
1043       the_task->record_start_time();
1044       if (!_cm->has_aborted()) {
1045         do {
1046           double start_vtime_sec = os::elapsedVTime();
1047           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1048 
1049           the_task->do_marking_step(mark_step_duration_ms,
1050                                     true  /* do_termination */,
1051                                     false /* is_serial*/);
1052 
1053           double end_vtime_sec = os::elapsedVTime();
1054           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1055           _cm->clear_has_overflown();
1056 
1057           _cm->do_yield_check(worker_id);
1058 
1059           jlong sleep_time_ms;
1060           if (!_cm->has_aborted() && the_task->has_aborted()) {
1061             sleep_time_ms =
1062               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1063             {
1064               SuspendibleThreadSetLeaver sts_leave;
1065               os::sleep(Thread::current(), sleep_time_ms, false);
1066             }
1067           }
1068         } while (!_cm->has_aborted() && the_task->has_aborted());
1069       }
1070       the_task->record_end_time();
1071       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1072     }
1073 
1074     double end_vtime = os::elapsedVTime();
1075     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1076   }
1077 
1078   CMConcurrentMarkingTask(ConcurrentMark* cm,
1079                           ConcurrentMarkThread* cmt) :
1080       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1081 
1082   ~CMConcurrentMarkingTask() { }
1083 };
1084 
1085 // Calculates the number of active workers for a concurrent
1086 // phase.
1087 uint ConcurrentMark::calc_parallel_marking_threads() {
1088   uint n_conc_workers = 0;
1089   if (!UseDynamicNumberOfGCThreads ||
1090       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1091        !ForceDynamicNumberOfGCThreads)) {
1092     n_conc_workers = max_parallel_marking_threads();
1093   } else {
1094     n_conc_workers =
1095       AdaptiveSizePolicy::calc_default_active_workers(
1096                                    max_parallel_marking_threads(),
1097                                    1, /* Minimum workers */
1098                                    parallel_marking_threads(),
1099                                    Threads::number_of_non_daemon_threads());
1100     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1101     // that scaling has already gone into "_max_parallel_marking_threads".
1102   }
1103   assert(n_conc_workers > 0, "Always need at least 1");
1104   return n_conc_workers;
1105 }
1106 
1107 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1108   // Currently, only survivors can be root regions.
1109   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1110   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1111 
1112   const uintx interval = PrefetchScanIntervalInBytes;
1113   HeapWord* curr = hr->bottom();
1114   const HeapWord* end = hr->top();
1115   while (curr < end) {
1116     Prefetch::read(curr, interval);
1117     oop obj = oop(curr);
1118     int size = obj->oop_iterate_size(&cl);
1119     assert(size == obj->size(), "sanity");
1120     curr += size;
1121   }
1122 }
1123 
1124 class CMRootRegionScanTask : public AbstractGangTask {
1125 private:
1126   ConcurrentMark* _cm;
1127 
1128 public:
1129   CMRootRegionScanTask(ConcurrentMark* cm) :
1130     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1131 
1132   void work(uint worker_id) {
1133     assert(Thread::current()->is_ConcurrentGC_thread(),
1134            "this should only be done by a conc GC thread");
1135 
1136     CMRootRegions* root_regions = _cm->root_regions();
1137     HeapRegion* hr = root_regions->claim_next();
1138     while (hr != NULL) {
1139       _cm->scanRootRegion(hr, worker_id);
1140       hr = root_regions->claim_next();
1141     }
1142   }
1143 };
1144 
1145 void ConcurrentMark::scanRootRegions() {
1146   double scan_start = os::elapsedTime();
1147 
1148   // Start of concurrent marking.
1149   ClassLoaderDataGraph::clear_claimed_marks();
1150 
1151   // scan_in_progress() will have been set to true only if there was
1152   // at least one root region to scan. So, if it's false, we
1153   // should not attempt to do any further work.
1154   if (root_regions()->scan_in_progress()) {
1155     if (G1Log::fine()) {
1156       gclog_or_tty->gclog_stamp();
1157       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1158     }
1159 
1160     _parallel_marking_threads = calc_parallel_marking_threads();
1161     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1162            "Maximum number of marking threads exceeded");
1163     uint active_workers = MAX2(1U, parallel_marking_threads());
1164 
1165     CMRootRegionScanTask task(this);
1166     _parallel_workers->set_active_workers(active_workers);
1167     _parallel_workers->run_task(&task);
1168 
1169     if (G1Log::fine()) {
1170       gclog_or_tty->gclog_stamp();
1171       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1172     }
1173 
1174     // It's possible that has_aborted() is true here without actually
1175     // aborting the survivor scan earlier. This is OK as it's
1176     // mainly used for sanity checking.
1177     root_regions()->scan_finished();
1178   }
1179 }
1180 
1181 void ConcurrentMark::markFromRoots() {
1182   // we might be tempted to assert that:
1183   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1184   //        "inconsistent argument?");
1185   // However that wouldn't be right, because it's possible that
1186   // a safepoint is indeed in progress as a younger generation
1187   // stop-the-world GC happens even as we mark in this generation.
1188 
1189   _restart_for_overflow = false;
1190   force_overflow_conc()->init();
1191 
1192   // _g1h has _n_par_threads
1193   _parallel_marking_threads = calc_parallel_marking_threads();
1194   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1195     "Maximum number of marking threads exceeded");
1196 
1197   uint active_workers = MAX2(1U, parallel_marking_threads());
1198   assert(active_workers > 0, "Should have been set");
1199 
1200   // Parallel task terminator is set in "set_concurrency_and_phase()"
1201   set_concurrency_and_phase(active_workers, true /* concurrent */);
1202 
1203   CMConcurrentMarkingTask markingTask(this, cmThread());
1204   _parallel_workers->set_active_workers(active_workers);
1205   _parallel_workers->run_task(&markingTask);
1206   print_stats();
1207 }
1208 
1209 // Helper class to get rid of some boilerplate code.
1210 class G1CMTraceTime : public StackObj {
1211   GCTraceTimeImpl _gc_trace_time;
1212   static bool doit_and_prepend(bool doit) {
1213     if (doit) {
1214       gclog_or_tty->put(' ');
1215     }
1216     return doit;
1217   }
1218 
1219  public:
1220   G1CMTraceTime(const char* title, bool doit)
1221     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1222   }
1223 };
1224 
1225 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1226   // world is stopped at this checkpoint
1227   assert(SafepointSynchronize::is_at_safepoint(),
1228          "world should be stopped");
1229 
1230   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1231 
1232   // If a full collection has happened, we shouldn't do this.
1233   if (has_aborted()) {
1234     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1235     return;
1236   }
1237 
1238   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1239 
1240   if (VerifyDuringGC) {
1241     HandleMark hm;  // handle scope
1242     g1h->prepare_for_verify();
1243     Universe::verify(VerifyOption_G1UsePrevMarking,
1244                      " VerifyDuringGC:(before)");
1245   }
1246   g1h->check_bitmaps("Remark Start");
1247 
1248   G1CollectorPolicy* g1p = g1h->g1_policy();
1249   g1p->record_concurrent_mark_remark_start();
1250 
1251   double start = os::elapsedTime();
1252 
1253   checkpointRootsFinalWork();
1254 
1255   double mark_work_end = os::elapsedTime();
1256 
1257   weakRefsWork(clear_all_soft_refs);
1258 
1259   if (has_overflown()) {
1260     // Oops.  We overflowed.  Restart concurrent marking.
1261     _restart_for_overflow = true;
1262     if (G1TraceMarkStackOverflow) {
1263       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1264     }
1265 
1266     // Verify the heap w.r.t. the previous marking bitmap.
1267     if (VerifyDuringGC) {
1268       HandleMark hm;  // handle scope
1269       g1h->prepare_for_verify();
1270       Universe::verify(VerifyOption_G1UsePrevMarking,
1271                        " VerifyDuringGC:(overflow)");
1272     }
1273 
1274     // Clear the marking state because we will be restarting
1275     // marking due to overflowing the global mark stack.
1276     reset_marking_state();
1277   } else {
1278     {
1279       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1280 
1281       // Aggregate the per-task counting data that we have accumulated
1282       // while marking.
1283       aggregate_count_data();
1284     }
1285 
1286     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1287     // We're done with marking.
1288     // This is the end of  the marking cycle, we're expected all
1289     // threads to have SATB queues with active set to true.
1290     satb_mq_set.set_active_all_threads(false, /* new active value */
1291                                        true /* expected_active */);
1292 
1293     if (VerifyDuringGC) {
1294       HandleMark hm;  // handle scope
1295       g1h->prepare_for_verify();
1296       Universe::verify(VerifyOption_G1UseNextMarking,
1297                        " VerifyDuringGC:(after)");
1298     }
1299     g1h->check_bitmaps("Remark End");
1300     assert(!restart_for_overflow(), "sanity");
1301     // Completely reset the marking state since marking completed
1302     set_non_marking_state();
1303   }
1304 
1305   // Expand the marking stack, if we have to and if we can.
1306   if (_markStack.should_expand()) {
1307     _markStack.expand();
1308   }
1309 
1310   // Statistics
1311   double now = os::elapsedTime();
1312   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1313   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1314   _remark_times.add((now - start) * 1000.0);
1315 
1316   g1p->record_concurrent_mark_remark_end();
1317 
1318   G1CMIsAliveClosure is_alive(g1h);
1319   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1320 }
1321 
1322 // Base class of the closures that finalize and verify the
1323 // liveness counting data.
1324 class CMCountDataClosureBase: public HeapRegionClosure {
1325 protected:
1326   G1CollectedHeap* _g1h;
1327   ConcurrentMark* _cm;
1328   CardTableModRefBS* _ct_bs;
1329 
1330   BitMap* _region_bm;
1331   BitMap* _card_bm;
1332 
1333   // Takes a region that's not empty (i.e., it has at least one
1334   // live object in it and sets its corresponding bit on the region
1335   // bitmap to 1. If the region is "starts humongous" it will also set
1336   // to 1 the bits on the region bitmap that correspond to its
1337   // associated "continues humongous" regions.
1338   void set_bit_for_region(HeapRegion* hr) {
1339     assert(!hr->is_continues_humongous(), "should have filtered those out");
1340 
1341     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1342     if (!hr->is_starts_humongous()) {
1343       // Normal (non-humongous) case: just set the bit.
1344       _region_bm->par_at_put(index, true);
1345     } else {
1346       // Starts humongous case: calculate how many regions are part of
1347       // this humongous region and then set the bit range.
1348       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1349       _region_bm->par_at_put_range(index, end_index, true);
1350     }
1351   }
1352 
1353 public:
1354   CMCountDataClosureBase(G1CollectedHeap* g1h,
1355                          BitMap* region_bm, BitMap* card_bm):
1356     _g1h(g1h), _cm(g1h->concurrent_mark()),
1357     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1358     _region_bm(region_bm), _card_bm(card_bm) { }
1359 };
1360 
1361 // Closure that calculates the # live objects per region. Used
1362 // for verification purposes during the cleanup pause.
1363 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1364   CMBitMapRO* _bm;
1365   size_t _region_marked_bytes;
1366 
1367 public:
1368   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1369                          BitMap* region_bm, BitMap* card_bm) :
1370     CMCountDataClosureBase(g1h, region_bm, card_bm),
1371     _bm(bm), _region_marked_bytes(0) { }
1372 
1373   bool doHeapRegion(HeapRegion* hr) {
1374 
1375     if (hr->is_continues_humongous()) {
1376       // We will ignore these here and process them when their
1377       // associated "starts humongous" region is processed (see
1378       // set_bit_for_heap_region()). Note that we cannot rely on their
1379       // associated "starts humongous" region to have their bit set to
1380       // 1 since, due to the region chunking in the parallel region
1381       // iteration, a "continues humongous" region might be visited
1382       // before its associated "starts humongous".
1383       return false;
1384     }
1385 
1386     HeapWord* ntams = hr->next_top_at_mark_start();
1387     HeapWord* start = hr->bottom();
1388 
1389     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1390            "Preconditions not met - "
1391            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1392            p2i(start), p2i(ntams), p2i(hr->end()));
1393 
1394     // Find the first marked object at or after "start".
1395     start = _bm->getNextMarkedWordAddress(start, ntams);
1396 
1397     size_t marked_bytes = 0;
1398 
1399     while (start < ntams) {
1400       oop obj = oop(start);
1401       int obj_sz = obj->size();
1402       HeapWord* obj_end = start + obj_sz;
1403 
1404       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1405       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1406 
1407       // Note: if we're looking at the last region in heap - obj_end
1408       // could be actually just beyond the end of the heap; end_idx
1409       // will then correspond to a (non-existent) card that is also
1410       // just beyond the heap.
1411       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1412         // end of object is not card aligned - increment to cover
1413         // all the cards spanned by the object
1414         end_idx += 1;
1415       }
1416 
1417       // Set the bits in the card BM for the cards spanned by this object.
1418       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1419 
1420       // Add the size of this object to the number of marked bytes.
1421       marked_bytes += (size_t)obj_sz * HeapWordSize;
1422 
1423       // Find the next marked object after this one.
1424       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1425     }
1426 
1427     // Mark the allocated-since-marking portion...
1428     HeapWord* top = hr->top();
1429     if (ntams < top) {
1430       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1431       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1432 
1433       // Note: if we're looking at the last region in heap - top
1434       // could be actually just beyond the end of the heap; end_idx
1435       // will then correspond to a (non-existent) card that is also
1436       // just beyond the heap.
1437       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1438         // end of object is not card aligned - increment to cover
1439         // all the cards spanned by the object
1440         end_idx += 1;
1441       }
1442       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1443 
1444       // This definitely means the region has live objects.
1445       set_bit_for_region(hr);
1446     }
1447 
1448     // Update the live region bitmap.
1449     if (marked_bytes > 0) {
1450       set_bit_for_region(hr);
1451     }
1452 
1453     // Set the marked bytes for the current region so that
1454     // it can be queried by a calling verification routine
1455     _region_marked_bytes = marked_bytes;
1456 
1457     return false;
1458   }
1459 
1460   size_t region_marked_bytes() const { return _region_marked_bytes; }
1461 };
1462 
1463 // Heap region closure used for verifying the counting data
1464 // that was accumulated concurrently and aggregated during
1465 // the remark pause. This closure is applied to the heap
1466 // regions during the STW cleanup pause.
1467 
1468 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1469   G1CollectedHeap* _g1h;
1470   ConcurrentMark* _cm;
1471   CalcLiveObjectsClosure _calc_cl;
1472   BitMap* _region_bm;   // Region BM to be verified
1473   BitMap* _card_bm;     // Card BM to be verified
1474   bool _verbose;        // verbose output?
1475 
1476   BitMap* _exp_region_bm; // Expected Region BM values
1477   BitMap* _exp_card_bm;   // Expected card BM values
1478 
1479   int _failures;
1480 
1481 public:
1482   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1483                                 BitMap* region_bm,
1484                                 BitMap* card_bm,
1485                                 BitMap* exp_region_bm,
1486                                 BitMap* exp_card_bm,
1487                                 bool verbose) :
1488     _g1h(g1h), _cm(g1h->concurrent_mark()),
1489     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1490     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1491     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1492     _failures(0) { }
1493 
1494   int failures() const { return _failures; }
1495 
1496   bool doHeapRegion(HeapRegion* hr) {
1497     if (hr->is_continues_humongous()) {
1498       // We will ignore these here and process them when their
1499       // associated "starts humongous" region is processed (see
1500       // set_bit_for_heap_region()). Note that we cannot rely on their
1501       // associated "starts humongous" region to have their bit set to
1502       // 1 since, due to the region chunking in the parallel region
1503       // iteration, a "continues humongous" region might be visited
1504       // before its associated "starts humongous".
1505       return false;
1506     }
1507 
1508     int failures = 0;
1509 
1510     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1511     // this region and set the corresponding bits in the expected region
1512     // and card bitmaps.
1513     bool res = _calc_cl.doHeapRegion(hr);
1514     assert(res == false, "should be continuing");
1515 
1516     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1517                     Mutex::_no_safepoint_check_flag);
1518 
1519     // Verify the marked bytes for this region.
1520     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1521     size_t act_marked_bytes = hr->next_marked_bytes();
1522 
1523     // We're not OK if expected marked bytes > actual marked bytes. It means
1524     // we have missed accounting some objects during the actual marking.
1525     if (exp_marked_bytes > act_marked_bytes) {
1526       if (_verbose) {
1527         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1528                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1529                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1530       }
1531       failures += 1;
1532     }
1533 
1534     // Verify the bit, for this region, in the actual and expected
1535     // (which was just calculated) region bit maps.
1536     // We're not OK if the bit in the calculated expected region
1537     // bitmap is set and the bit in the actual region bitmap is not.
1538     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1539 
1540     bool expected = _exp_region_bm->at(index);
1541     bool actual = _region_bm->at(index);
1542     if (expected && !actual) {
1543       if (_verbose) {
1544         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1545                                "expected: %s, actual: %s",
1546                                hr->hrm_index(),
1547                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1548       }
1549       failures += 1;
1550     }
1551 
1552     // Verify that the card bit maps for the cards spanned by the current
1553     // region match. We have an error if we have a set bit in the expected
1554     // bit map and the corresponding bit in the actual bitmap is not set.
1555 
1556     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1557     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1558 
1559     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1560       expected = _exp_card_bm->at(i);
1561       actual = _card_bm->at(i);
1562 
1563       if (expected && !actual) {
1564         if (_verbose) {
1565           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1566                                  "expected: %s, actual: %s",
1567                                  hr->hrm_index(), i,
1568                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1569         }
1570         failures += 1;
1571       }
1572     }
1573 
1574     if (failures > 0 && _verbose)  {
1575       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1576                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1577                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1578                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1579     }
1580 
1581     _failures += failures;
1582 
1583     // We could stop iteration over the heap when we
1584     // find the first violating region by returning true.
1585     return false;
1586   }
1587 };
1588 
1589 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1590 protected:
1591   G1CollectedHeap* _g1h;
1592   ConcurrentMark* _cm;
1593   BitMap* _actual_region_bm;
1594   BitMap* _actual_card_bm;
1595 
1596   uint    _n_workers;
1597 
1598   BitMap* _expected_region_bm;
1599   BitMap* _expected_card_bm;
1600 
1601   int  _failures;
1602   bool _verbose;
1603 
1604   HeapRegionClaimer _hrclaimer;
1605 
1606 public:
1607   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1608                             BitMap* region_bm, BitMap* card_bm,
1609                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1610     : AbstractGangTask("G1 verify final counting"),
1611       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1612       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1613       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1614       _failures(0), _verbose(false),
1615       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1616     assert(VerifyDuringGC, "don't call this otherwise");
1617     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1618     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1619 
1620     _verbose = _cm->verbose_medium();
1621   }
1622 
1623   void work(uint worker_id) {
1624     assert(worker_id < _n_workers, "invariant");
1625 
1626     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1627                                             _actual_region_bm, _actual_card_bm,
1628                                             _expected_region_bm,
1629                                             _expected_card_bm,
1630                                             _verbose);
1631 
1632     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1633 
1634     Atomic::add(verify_cl.failures(), &_failures);
1635   }
1636 
1637   int failures() const { return _failures; }
1638 };
1639 
1640 // Closure that finalizes the liveness counting data.
1641 // Used during the cleanup pause.
1642 // Sets the bits corresponding to the interval [NTAMS, top]
1643 // (which contains the implicitly live objects) in the
1644 // card liveness bitmap. Also sets the bit for each region,
1645 // containing live data, in the region liveness bitmap.
1646 
1647 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1648  public:
1649   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1650                               BitMap* region_bm,
1651                               BitMap* card_bm) :
1652     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1653 
1654   bool doHeapRegion(HeapRegion* hr) {
1655 
1656     if (hr->is_continues_humongous()) {
1657       // We will ignore these here and process them when their
1658       // associated "starts humongous" region is processed (see
1659       // set_bit_for_heap_region()). Note that we cannot rely on their
1660       // associated "starts humongous" region to have their bit set to
1661       // 1 since, due to the region chunking in the parallel region
1662       // iteration, a "continues humongous" region might be visited
1663       // before its associated "starts humongous".
1664       return false;
1665     }
1666 
1667     HeapWord* ntams = hr->next_top_at_mark_start();
1668     HeapWord* top   = hr->top();
1669 
1670     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1671 
1672     // Mark the allocated-since-marking portion...
1673     if (ntams < top) {
1674       // This definitely means the region has live objects.
1675       set_bit_for_region(hr);
1676 
1677       // Now set the bits in the card bitmap for [ntams, top)
1678       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1679       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1680 
1681       // Note: if we're looking at the last region in heap - top
1682       // could be actually just beyond the end of the heap; end_idx
1683       // will then correspond to a (non-existent) card that is also
1684       // just beyond the heap.
1685       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1686         // end of object is not card aligned - increment to cover
1687         // all the cards spanned by the object
1688         end_idx += 1;
1689       }
1690 
1691       assert(end_idx <= _card_bm->size(),
1692              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1693              end_idx, _card_bm->size());
1694       assert(start_idx < _card_bm->size(),
1695              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1696              start_idx, _card_bm->size());
1697 
1698       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1699     }
1700 
1701     // Set the bit for the region if it contains live data
1702     if (hr->next_marked_bytes() > 0) {
1703       set_bit_for_region(hr);
1704     }
1705 
1706     return false;
1707   }
1708 };
1709 
1710 class G1ParFinalCountTask: public AbstractGangTask {
1711 protected:
1712   G1CollectedHeap* _g1h;
1713   ConcurrentMark* _cm;
1714   BitMap* _actual_region_bm;
1715   BitMap* _actual_card_bm;
1716 
1717   uint    _n_workers;
1718   HeapRegionClaimer _hrclaimer;
1719 
1720 public:
1721   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1722     : AbstractGangTask("G1 final counting"),
1723       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1724       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1725       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1726   }
1727 
1728   void work(uint worker_id) {
1729     assert(worker_id < _n_workers, "invariant");
1730 
1731     FinalCountDataUpdateClosure final_update_cl(_g1h,
1732                                                 _actual_region_bm,
1733                                                 _actual_card_bm);
1734 
1735     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1736   }
1737 };
1738 
1739 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1740   G1CollectedHeap* _g1;
1741   size_t _freed_bytes;
1742   FreeRegionList* _local_cleanup_list;
1743   HeapRegionSetCount _old_regions_removed;
1744   HeapRegionSetCount _humongous_regions_removed;
1745   HRRSCleanupTask* _hrrs_cleanup_task;
1746 
1747 public:
1748   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1749                              FreeRegionList* local_cleanup_list,
1750                              HRRSCleanupTask* hrrs_cleanup_task) :
1751     _g1(g1),
1752     _freed_bytes(0),
1753     _local_cleanup_list(local_cleanup_list),
1754     _old_regions_removed(),
1755     _humongous_regions_removed(),
1756     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1757 
1758   size_t freed_bytes() { return _freed_bytes; }
1759   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1760   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1761 
1762   bool doHeapRegion(HeapRegion *hr) {
1763     if (hr->is_continues_humongous() || hr->is_archive()) {
1764       return false;
1765     }
1766     // We use a claim value of zero here because all regions
1767     // were claimed with value 1 in the FinalCount task.
1768     _g1->reset_gc_time_stamps(hr);
1769     hr->note_end_of_marking();
1770 
1771     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1772       _freed_bytes += hr->used();
1773       hr->set_containing_set(NULL);
1774       if (hr->is_humongous()) {
1775         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1776         _humongous_regions_removed.increment(1u, hr->capacity());
1777         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1778       } else {
1779         _old_regions_removed.increment(1u, hr->capacity());
1780         _g1->free_region(hr, _local_cleanup_list, true);
1781       }
1782     } else {
1783       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1784     }
1785 
1786     return false;
1787   }
1788 };
1789 
1790 class G1ParNoteEndTask: public AbstractGangTask {
1791   friend class G1NoteEndOfConcMarkClosure;
1792 
1793 protected:
1794   G1CollectedHeap* _g1h;
1795   FreeRegionList* _cleanup_list;
1796   HeapRegionClaimer _hrclaimer;
1797 
1798 public:
1799   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1800       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1801   }
1802 
1803   void work(uint worker_id) {
1804     FreeRegionList local_cleanup_list("Local Cleanup List");
1805     HRRSCleanupTask hrrs_cleanup_task;
1806     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1807                                            &hrrs_cleanup_task);
1808     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1809     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1810 
1811     // Now update the lists
1812     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1813     {
1814       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1815       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1816 
1817       // If we iterate over the global cleanup list at the end of
1818       // cleanup to do this printing we will not guarantee to only
1819       // generate output for the newly-reclaimed regions (the list
1820       // might not be empty at the beginning of cleanup; we might
1821       // still be working on its previous contents). So we do the
1822       // printing here, before we append the new regions to the global
1823       // cleanup list.
1824 
1825       G1HRPrinter* hr_printer = _g1h->hr_printer();
1826       if (hr_printer->is_active()) {
1827         FreeRegionListIterator iter(&local_cleanup_list);
1828         while (iter.more_available()) {
1829           HeapRegion* hr = iter.get_next();
1830           hr_printer->cleanup(hr);
1831         }
1832       }
1833 
1834       _cleanup_list->add_ordered(&local_cleanup_list);
1835       assert(local_cleanup_list.is_empty(), "post-condition");
1836 
1837       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1838     }
1839   }
1840 };
1841 
1842 class G1ParScrubRemSetTask: public AbstractGangTask {
1843 protected:
1844   G1RemSet* _g1rs;
1845   BitMap* _region_bm;
1846   BitMap* _card_bm;
1847   HeapRegionClaimer _hrclaimer;
1848 
1849 public:
1850   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1851       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1852   }
1853 
1854   void work(uint worker_id) {
1855     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1856   }
1857 
1858 };
1859 
1860 void ConcurrentMark::cleanup() {
1861   // world is stopped at this checkpoint
1862   assert(SafepointSynchronize::is_at_safepoint(),
1863          "world should be stopped");
1864   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1865 
1866   // If a full collection has happened, we shouldn't do this.
1867   if (has_aborted()) {
1868     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1869     return;
1870   }
1871 
1872   g1h->verify_region_sets_optional();
1873 
1874   if (VerifyDuringGC) {
1875     HandleMark hm;  // handle scope
1876     g1h->prepare_for_verify();
1877     Universe::verify(VerifyOption_G1UsePrevMarking,
1878                      " VerifyDuringGC:(before)");
1879   }
1880   g1h->check_bitmaps("Cleanup Start");
1881 
1882   G1CollectorPolicy* g1p = g1h->g1_policy();
1883   g1p->record_concurrent_mark_cleanup_start();
1884 
1885   double start = os::elapsedTime();
1886 
1887   HeapRegionRemSet::reset_for_cleanup_tasks();
1888 
1889   // Do counting once more with the world stopped for good measure.
1890   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1891 
1892   g1h->workers()->run_task(&g1_par_count_task);
1893 
1894   if (VerifyDuringGC) {
1895     // Verify that the counting data accumulated during marking matches
1896     // that calculated by walking the marking bitmap.
1897 
1898     // Bitmaps to hold expected values
1899     BitMap expected_region_bm(_region_bm.size(), true);
1900     BitMap expected_card_bm(_card_bm.size(), true);
1901 
1902     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1903                                                  &_region_bm,
1904                                                  &_card_bm,
1905                                                  &expected_region_bm,
1906                                                  &expected_card_bm);
1907 
1908     g1h->workers()->run_task(&g1_par_verify_task);
1909 
1910     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1911   }
1912 
1913   size_t start_used_bytes = g1h->used();
1914   g1h->collector_state()->set_mark_in_progress(false);
1915 
1916   double count_end = os::elapsedTime();
1917   double this_final_counting_time = (count_end - start);
1918   _total_counting_time += this_final_counting_time;
1919 
1920   if (G1PrintRegionLivenessInfo) {
1921     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1922     _g1h->heap_region_iterate(&cl);
1923   }
1924 
1925   // Install newly created mark bitMap as "prev".
1926   swapMarkBitMaps();
1927 
1928   g1h->reset_gc_time_stamp();
1929 
1930   uint n_workers = _g1h->workers()->active_workers();
1931 
1932   // Note end of marking in all heap regions.
1933   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1934   g1h->workers()->run_task(&g1_par_note_end_task);
1935   g1h->check_gc_time_stamps();
1936 
1937   if (!cleanup_list_is_empty()) {
1938     // The cleanup list is not empty, so we'll have to process it
1939     // concurrently. Notify anyone else that might be wanting free
1940     // regions that there will be more free regions coming soon.
1941     g1h->set_free_regions_coming();
1942   }
1943 
1944   // call below, since it affects the metric by which we sort the heap
1945   // regions.
1946   if (G1ScrubRemSets) {
1947     double rs_scrub_start = os::elapsedTime();
1948     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1949     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1950 
1951     double rs_scrub_end = os::elapsedTime();
1952     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1953     _total_rs_scrub_time += this_rs_scrub_time;
1954   }
1955 
1956   // this will also free any regions totally full of garbage objects,
1957   // and sort the regions.
1958   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1959 
1960   // Statistics.
1961   double end = os::elapsedTime();
1962   _cleanup_times.add((end - start) * 1000.0);
1963 
1964   if (G1Log::fine()) {
1965     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1966   }
1967 
1968   // Clean up will have freed any regions completely full of garbage.
1969   // Update the soft reference policy with the new heap occupancy.
1970   Universe::update_heap_info_at_gc();
1971 
1972   if (VerifyDuringGC) {
1973     HandleMark hm;  // handle scope
1974     g1h->prepare_for_verify();
1975     Universe::verify(VerifyOption_G1UsePrevMarking,
1976                      " VerifyDuringGC:(after)");
1977   }
1978 
1979   g1h->check_bitmaps("Cleanup End");
1980 
1981   g1h->verify_region_sets_optional();
1982 
1983   // We need to make this be a "collection" so any collection pause that
1984   // races with it goes around and waits for completeCleanup to finish.
1985   g1h->increment_total_collections();
1986 
1987   // Clean out dead classes and update Metaspace sizes.
1988   if (ClassUnloadingWithConcurrentMark) {
1989     ClassLoaderDataGraph::purge();
1990   }
1991   MetaspaceGC::compute_new_size();
1992 
1993   // We reclaimed old regions so we should calculate the sizes to make
1994   // sure we update the old gen/space data.
1995   g1h->g1mm()->update_sizes();
1996   g1h->allocation_context_stats().update_after_mark();
1997 
1998   g1h->trace_heap_after_concurrent_cycle();
1999 }
2000 
2001 void ConcurrentMark::completeCleanup() {
2002   if (has_aborted()) return;
2003 
2004   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2005 
2006   _cleanup_list.verify_optional();
2007   FreeRegionList tmp_free_list("Tmp Free List");
2008 
2009   if (G1ConcRegionFreeingVerbose) {
2010     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2011                            "cleanup list has %u entries",
2012                            _cleanup_list.length());
2013   }
2014 
2015   // No one else should be accessing the _cleanup_list at this point,
2016   // so it is not necessary to take any locks
2017   while (!_cleanup_list.is_empty()) {
2018     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2019     assert(hr != NULL, "Got NULL from a non-empty list");
2020     hr->par_clear();
2021     tmp_free_list.add_ordered(hr);
2022 
2023     // Instead of adding one region at a time to the secondary_free_list,
2024     // we accumulate them in the local list and move them a few at a
2025     // time. This also cuts down on the number of notify_all() calls
2026     // we do during this process. We'll also append the local list when
2027     // _cleanup_list is empty (which means we just removed the last
2028     // region from the _cleanup_list).
2029     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2030         _cleanup_list.is_empty()) {
2031       if (G1ConcRegionFreeingVerbose) {
2032         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2033                                "appending %u entries to the secondary_free_list, "
2034                                "cleanup list still has %u entries",
2035                                tmp_free_list.length(),
2036                                _cleanup_list.length());
2037       }
2038 
2039       {
2040         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2041         g1h->secondary_free_list_add(&tmp_free_list);
2042         SecondaryFreeList_lock->notify_all();
2043       }
2044 #ifndef PRODUCT
2045       if (G1StressConcRegionFreeing) {
2046         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2047           os::sleep(Thread::current(), (jlong) 1, false);
2048         }
2049       }
2050 #endif
2051     }
2052   }
2053   assert(tmp_free_list.is_empty(), "post-condition");
2054 }
2055 
2056 // Supporting Object and Oop closures for reference discovery
2057 // and processing in during marking
2058 
2059 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2060   HeapWord* addr = (HeapWord*)obj;
2061   return addr != NULL &&
2062          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2063 }
2064 
2065 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2066 // Uses the CMTask associated with a worker thread (for serial reference
2067 // processing the CMTask for worker 0 is used) to preserve (mark) and
2068 // trace referent objects.
2069 //
2070 // Using the CMTask and embedded local queues avoids having the worker
2071 // threads operating on the global mark stack. This reduces the risk
2072 // of overflowing the stack - which we would rather avoid at this late
2073 // state. Also using the tasks' local queues removes the potential
2074 // of the workers interfering with each other that could occur if
2075 // operating on the global stack.
2076 
2077 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2078   ConcurrentMark* _cm;
2079   CMTask*         _task;
2080   int             _ref_counter_limit;
2081   int             _ref_counter;
2082   bool            _is_serial;
2083  public:
2084   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2085     _cm(cm), _task(task), _is_serial(is_serial),
2086     _ref_counter_limit(G1RefProcDrainInterval) {
2087     assert(_ref_counter_limit > 0, "sanity");
2088     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2089     _ref_counter = _ref_counter_limit;
2090   }
2091 
2092   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2093   virtual void do_oop(      oop* p) { do_oop_work(p); }
2094 
2095   template <class T> void do_oop_work(T* p) {
2096     if (!_cm->has_overflown()) {
2097       oop obj = oopDesc::load_decode_heap_oop(p);
2098       if (_cm->verbose_high()) {
2099         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2100                                "*" PTR_FORMAT " = " PTR_FORMAT,
2101                                _task->worker_id(), p2i(p), p2i((void*) obj));
2102       }
2103 
2104       _task->deal_with_reference(obj);
2105       _ref_counter--;
2106 
2107       if (_ref_counter == 0) {
2108         // We have dealt with _ref_counter_limit references, pushing them
2109         // and objects reachable from them on to the local stack (and
2110         // possibly the global stack). Call CMTask::do_marking_step() to
2111         // process these entries.
2112         //
2113         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2114         // there's nothing more to do (i.e. we're done with the entries that
2115         // were pushed as a result of the CMTask::deal_with_reference() calls
2116         // above) or we overflow.
2117         //
2118         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2119         // flag while there may still be some work to do. (See the comment at
2120         // the beginning of CMTask::do_marking_step() for those conditions -
2121         // one of which is reaching the specified time target.) It is only
2122         // when CMTask::do_marking_step() returns without setting the
2123         // has_aborted() flag that the marking step has completed.
2124         do {
2125           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2126           _task->do_marking_step(mark_step_duration_ms,
2127                                  false      /* do_termination */,
2128                                  _is_serial);
2129         } while (_task->has_aborted() && !_cm->has_overflown());
2130         _ref_counter = _ref_counter_limit;
2131       }
2132     } else {
2133       if (_cm->verbose_high()) {
2134          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2135       }
2136     }
2137   }
2138 };
2139 
2140 // 'Drain' oop closure used by both serial and parallel reference processing.
2141 // Uses the CMTask associated with a given worker thread (for serial
2142 // reference processing the CMtask for worker 0 is used). Calls the
2143 // do_marking_step routine, with an unbelievably large timeout value,
2144 // to drain the marking data structures of the remaining entries
2145 // added by the 'keep alive' oop closure above.
2146 
2147 class G1CMDrainMarkingStackClosure: public VoidClosure {
2148   ConcurrentMark* _cm;
2149   CMTask*         _task;
2150   bool            _is_serial;
2151  public:
2152   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2153     _cm(cm), _task(task), _is_serial(is_serial) {
2154     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2155   }
2156 
2157   void do_void() {
2158     do {
2159       if (_cm->verbose_high()) {
2160         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2161                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2162       }
2163 
2164       // We call CMTask::do_marking_step() to completely drain the local
2165       // and global marking stacks of entries pushed by the 'keep alive'
2166       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2167       //
2168       // CMTask::do_marking_step() is called in a loop, which we'll exit
2169       // if there's nothing more to do (i.e. we've completely drained the
2170       // entries that were pushed as a a result of applying the 'keep alive'
2171       // closure to the entries on the discovered ref lists) or we overflow
2172       // the global marking stack.
2173       //
2174       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2175       // flag while there may still be some work to do. (See the comment at
2176       // the beginning of CMTask::do_marking_step() for those conditions -
2177       // one of which is reaching the specified time target.) It is only
2178       // when CMTask::do_marking_step() returns without setting the
2179       // has_aborted() flag that the marking step has completed.
2180 
2181       _task->do_marking_step(1000000000.0 /* something very large */,
2182                              true         /* do_termination */,
2183                              _is_serial);
2184     } while (_task->has_aborted() && !_cm->has_overflown());
2185   }
2186 };
2187 
2188 // Implementation of AbstractRefProcTaskExecutor for parallel
2189 // reference processing at the end of G1 concurrent marking
2190 
2191 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2192 private:
2193   G1CollectedHeap* _g1h;
2194   ConcurrentMark*  _cm;
2195   WorkGang*        _workers;
2196   uint             _active_workers;
2197 
2198 public:
2199   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2200                           ConcurrentMark* cm,
2201                           WorkGang* workers,
2202                           uint n_workers) :
2203     _g1h(g1h), _cm(cm),
2204     _workers(workers), _active_workers(n_workers) { }
2205 
2206   // Executes the given task using concurrent marking worker threads.
2207   virtual void execute(ProcessTask& task);
2208   virtual void execute(EnqueueTask& task);
2209 };
2210 
2211 class G1CMRefProcTaskProxy: public AbstractGangTask {
2212   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2213   ProcessTask&     _proc_task;
2214   G1CollectedHeap* _g1h;
2215   ConcurrentMark*  _cm;
2216 
2217 public:
2218   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2219                      G1CollectedHeap* g1h,
2220                      ConcurrentMark* cm) :
2221     AbstractGangTask("Process reference objects in parallel"),
2222     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2223     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2224     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2225   }
2226 
2227   virtual void work(uint worker_id) {
2228     ResourceMark rm;
2229     HandleMark hm;
2230     CMTask* task = _cm->task(worker_id);
2231     G1CMIsAliveClosure g1_is_alive(_g1h);
2232     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2233     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2234 
2235     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2236   }
2237 };
2238 
2239 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2240   assert(_workers != NULL, "Need parallel worker threads.");
2241   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2242 
2243   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2244 
2245   // We need to reset the concurrency level before each
2246   // proxy task execution, so that the termination protocol
2247   // and overflow handling in CMTask::do_marking_step() knows
2248   // how many workers to wait for.
2249   _cm->set_concurrency(_active_workers);
2250   _workers->run_task(&proc_task_proxy);
2251 }
2252 
2253 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2254   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2255   EnqueueTask& _enq_task;
2256 
2257 public:
2258   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2259     AbstractGangTask("Enqueue reference objects in parallel"),
2260     _enq_task(enq_task) { }
2261 
2262   virtual void work(uint worker_id) {
2263     _enq_task.work(worker_id);
2264   }
2265 };
2266 
2267 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2268   assert(_workers != NULL, "Need parallel worker threads.");
2269   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2270 
2271   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2272 
2273   // Not strictly necessary but...
2274   //
2275   // We need to reset the concurrency level before each
2276   // proxy task execution, so that the termination protocol
2277   // and overflow handling in CMTask::do_marking_step() knows
2278   // how many workers to wait for.
2279   _cm->set_concurrency(_active_workers);
2280   _workers->run_task(&enq_task_proxy);
2281 }
2282 
2283 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2284   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2285 }
2286 
2287 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2288   if (has_overflown()) {
2289     // Skip processing the discovered references if we have
2290     // overflown the global marking stack. Reference objects
2291     // only get discovered once so it is OK to not
2292     // de-populate the discovered reference lists. We could have,
2293     // but the only benefit would be that, when marking restarts,
2294     // less reference objects are discovered.
2295     return;
2296   }
2297 
2298   ResourceMark rm;
2299   HandleMark   hm;
2300 
2301   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2302 
2303   // Is alive closure.
2304   G1CMIsAliveClosure g1_is_alive(g1h);
2305 
2306   // Inner scope to exclude the cleaning of the string and symbol
2307   // tables from the displayed time.
2308   {
2309     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2310 
2311     ReferenceProcessor* rp = g1h->ref_processor_cm();
2312 
2313     // See the comment in G1CollectedHeap::ref_processing_init()
2314     // about how reference processing currently works in G1.
2315 
2316     // Set the soft reference policy
2317     rp->setup_policy(clear_all_soft_refs);
2318     assert(_markStack.isEmpty(), "mark stack should be empty");
2319 
2320     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2321     // in serial reference processing. Note these closures are also
2322     // used for serially processing (by the the current thread) the
2323     // JNI references during parallel reference processing.
2324     //
2325     // These closures do not need to synchronize with the worker
2326     // threads involved in parallel reference processing as these
2327     // instances are executed serially by the current thread (e.g.
2328     // reference processing is not multi-threaded and is thus
2329     // performed by the current thread instead of a gang worker).
2330     //
2331     // The gang tasks involved in parallel reference processing create
2332     // their own instances of these closures, which do their own
2333     // synchronization among themselves.
2334     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2335     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2336 
2337     // We need at least one active thread. If reference processing
2338     // is not multi-threaded we use the current (VMThread) thread,
2339     // otherwise we use the work gang from the G1CollectedHeap and
2340     // we utilize all the worker threads we can.
2341     bool processing_is_mt = rp->processing_is_mt();
2342     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2343     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2344 
2345     // Parallel processing task executor.
2346     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2347                                               g1h->workers(), active_workers);
2348     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2349 
2350     // Set the concurrency level. The phase was already set prior to
2351     // executing the remark task.
2352     set_concurrency(active_workers);
2353 
2354     // Set the degree of MT processing here.  If the discovery was done MT,
2355     // the number of threads involved during discovery could differ from
2356     // the number of active workers.  This is OK as long as the discovered
2357     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2358     rp->set_active_mt_degree(active_workers);
2359 
2360     // Process the weak references.
2361     const ReferenceProcessorStats& stats =
2362         rp->process_discovered_references(&g1_is_alive,
2363                                           &g1_keep_alive,
2364                                           &g1_drain_mark_stack,
2365                                           executor,
2366                                           g1h->gc_timer_cm());
2367     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2368 
2369     // The do_oop work routines of the keep_alive and drain_marking_stack
2370     // oop closures will set the has_overflown flag if we overflow the
2371     // global marking stack.
2372 
2373     assert(_markStack.overflow() || _markStack.isEmpty(),
2374             "mark stack should be empty (unless it overflowed)");
2375 
2376     if (_markStack.overflow()) {
2377       // This should have been done already when we tried to push an
2378       // entry on to the global mark stack. But let's do it again.
2379       set_has_overflown();
2380     }
2381 
2382     assert(rp->num_q() == active_workers, "why not");
2383 
2384     rp->enqueue_discovered_references(executor);
2385 
2386     rp->verify_no_references_recorded();
2387     assert(!rp->discovery_enabled(), "Post condition");
2388   }
2389 
2390   if (has_overflown()) {
2391     // We can not trust g1_is_alive if the marking stack overflowed
2392     return;
2393   }
2394 
2395   assert(_markStack.isEmpty(), "Marking should have completed");
2396 
2397   // Unload Klasses, String, Symbols, Code Cache, etc.
2398   {
2399     G1CMTraceTime trace("Unloading", G1Log::finer());
2400 
2401     if (ClassUnloadingWithConcurrentMark) {
2402       bool purged_classes;
2403 
2404       {
2405         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2406         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2407       }
2408 
2409       {
2410         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2411         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2412       }
2413     }
2414 
2415     if (G1StringDedup::is_enabled()) {
2416       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2417       G1StringDedup::unlink(&g1_is_alive);
2418     }
2419   }
2420 }
2421 
2422 void ConcurrentMark::swapMarkBitMaps() {
2423   CMBitMapRO* temp = _prevMarkBitMap;
2424   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2425   _nextMarkBitMap  = (CMBitMap*)  temp;
2426 }
2427 
2428 // Closure for marking entries in SATB buffers.
2429 class CMSATBBufferClosure : public SATBBufferClosure {
2430 private:
2431   CMTask* _task;
2432   G1CollectedHeap* _g1h;
2433 
2434   // This is very similar to CMTask::deal_with_reference, but with
2435   // more relaxed requirements for the argument, so this must be more
2436   // circumspect about treating the argument as an object.
2437   void do_entry(void* entry) const {
2438     _task->increment_refs_reached();
2439     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2440     if (entry < hr->next_top_at_mark_start()) {
2441       // Until we get here, we don't know whether entry refers to a valid
2442       // object; it could instead have been a stale reference.
2443       oop obj = static_cast<oop>(entry);
2444       assert(obj->is_oop(true /* ignore mark word */),
2445              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2446       _task->make_reference_grey(obj, hr);
2447     }
2448   }
2449 
2450 public:
2451   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2452     : _task(task), _g1h(g1h) { }
2453 
2454   virtual void do_buffer(void** buffer, size_t size) {
2455     for (size_t i = 0; i < size; ++i) {
2456       do_entry(buffer[i]);
2457     }
2458   }
2459 };
2460 
2461 class G1RemarkThreadsClosure : public ThreadClosure {
2462   CMSATBBufferClosure _cm_satb_cl;
2463   G1CMOopClosure _cm_cl;
2464   MarkingCodeBlobClosure _code_cl;
2465   int _thread_parity;
2466 
2467  public:
2468   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2469     _cm_satb_cl(task, g1h),
2470     _cm_cl(g1h, g1h->concurrent_mark(), task),
2471     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2472     _thread_parity(Threads::thread_claim_parity()) {}
2473 
2474   void do_thread(Thread* thread) {
2475     if (thread->is_Java_thread()) {
2476       if (thread->claim_oops_do(true, _thread_parity)) {
2477         JavaThread* jt = (JavaThread*)thread;
2478 
2479         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2480         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2481         // * Alive if on the stack of an executing method
2482         // * Weakly reachable otherwise
2483         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2484         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2485         jt->nmethods_do(&_code_cl);
2486 
2487         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2488       }
2489     } else if (thread->is_VM_thread()) {
2490       if (thread->claim_oops_do(true, _thread_parity)) {
2491         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2492       }
2493     }
2494   }
2495 };
2496 
2497 class CMRemarkTask: public AbstractGangTask {
2498 private:
2499   ConcurrentMark* _cm;
2500 public:
2501   void work(uint worker_id) {
2502     // Since all available tasks are actually started, we should
2503     // only proceed if we're supposed to be active.
2504     if (worker_id < _cm->active_tasks()) {
2505       CMTask* task = _cm->task(worker_id);
2506       task->record_start_time();
2507       {
2508         ResourceMark rm;
2509         HandleMark hm;
2510 
2511         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2512         Threads::threads_do(&threads_f);
2513       }
2514 
2515       do {
2516         task->do_marking_step(1000000000.0 /* something very large */,
2517                               true         /* do_termination       */,
2518                               false        /* is_serial            */);
2519       } while (task->has_aborted() && !_cm->has_overflown());
2520       // If we overflow, then we do not want to restart. We instead
2521       // want to abort remark and do concurrent marking again.
2522       task->record_end_time();
2523     }
2524   }
2525 
2526   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2527     AbstractGangTask("Par Remark"), _cm(cm) {
2528     _cm->terminator()->reset_for_reuse(active_workers);
2529   }
2530 };
2531 
2532 void ConcurrentMark::checkpointRootsFinalWork() {
2533   ResourceMark rm;
2534   HandleMark   hm;
2535   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2536 
2537   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2538 
2539   g1h->ensure_parsability(false);
2540 
2541   // this is remark, so we'll use up all active threads
2542   uint active_workers = g1h->workers()->active_workers();
2543   set_concurrency_and_phase(active_workers, false /* concurrent */);
2544   // Leave _parallel_marking_threads at it's
2545   // value originally calculated in the ConcurrentMark
2546   // constructor and pass values of the active workers
2547   // through the gang in the task.
2548 
2549   {
2550     StrongRootsScope srs(active_workers);
2551 
2552     CMRemarkTask remarkTask(this, active_workers);
2553     // We will start all available threads, even if we decide that the
2554     // active_workers will be fewer. The extra ones will just bail out
2555     // immediately.
2556     g1h->workers()->run_task(&remarkTask);
2557   }
2558 
2559   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2560   guarantee(has_overflown() ||
2561             satb_mq_set.completed_buffers_num() == 0,
2562             "Invariant: has_overflown = %s, num buffers = %d",
2563             BOOL_TO_STR(has_overflown()),
2564             satb_mq_set.completed_buffers_num());
2565 
2566   print_stats();
2567 }
2568 
2569 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2570   // Note we are overriding the read-only view of the prev map here, via
2571   // the cast.
2572   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2573 }
2574 
2575 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2576   _nextMarkBitMap->clearRange(mr);
2577 }
2578 
2579 HeapRegion*
2580 ConcurrentMark::claim_region(uint worker_id) {
2581   // "checkpoint" the finger
2582   HeapWord* finger = _finger;
2583 
2584   // _heap_end will not change underneath our feet; it only changes at
2585   // yield points.
2586   while (finger < _heap_end) {
2587     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2588 
2589     // Note on how this code handles humongous regions. In the
2590     // normal case the finger will reach the start of a "starts
2591     // humongous" (SH) region. Its end will either be the end of the
2592     // last "continues humongous" (CH) region in the sequence, or the
2593     // standard end of the SH region (if the SH is the only region in
2594     // the sequence). That way claim_region() will skip over the CH
2595     // regions. However, there is a subtle race between a CM thread
2596     // executing this method and a mutator thread doing a humongous
2597     // object allocation. The two are not mutually exclusive as the CM
2598     // thread does not need to hold the Heap_lock when it gets
2599     // here. So there is a chance that claim_region() will come across
2600     // a free region that's in the progress of becoming a SH or a CH
2601     // region. In the former case, it will either
2602     //   a) Miss the update to the region's end, in which case it will
2603     //      visit every subsequent CH region, will find their bitmaps
2604     //      empty, and do nothing, or
2605     //   b) Will observe the update of the region's end (in which case
2606     //      it will skip the subsequent CH regions).
2607     // If it comes across a region that suddenly becomes CH, the
2608     // scenario will be similar to b). So, the race between
2609     // claim_region() and a humongous object allocation might force us
2610     // to do a bit of unnecessary work (due to some unnecessary bitmap
2611     // iterations) but it should not introduce and correctness issues.
2612     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2613 
2614     // Above heap_region_containing_raw may return NULL as we always scan claim
2615     // until the end of the heap. In this case, just jump to the next region.
2616     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2617 
2618     // Is the gap between reading the finger and doing the CAS too long?
2619     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2620     if (res == finger && curr_region != NULL) {
2621       // we succeeded
2622       HeapWord*   bottom        = curr_region->bottom();
2623       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2624 
2625       if (verbose_low()) {
2626         gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
2627                                "[" PTR_FORMAT ", " PTR_FORMAT "), "
2628                                "limit = " PTR_FORMAT,
2629                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2630       }
2631 
2632       // notice that _finger == end cannot be guaranteed here since,
2633       // someone else might have moved the finger even further
2634       assert(_finger >= end, "the finger should have moved forward");
2635 
2636       if (verbose_low()) {
2637         gclog_or_tty->print_cr("[%u] we were successful with region = "
2638                                PTR_FORMAT, worker_id, p2i(curr_region));
2639       }
2640 
2641       if (limit > bottom) {
2642         if (verbose_low()) {
2643           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
2644                                  "returning it ", worker_id, p2i(curr_region));
2645         }
2646         return curr_region;
2647       } else {
2648         assert(limit == bottom,
2649                "the region limit should be at bottom");
2650         if (verbose_low()) {
2651           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
2652                                  "returning NULL", worker_id, p2i(curr_region));
2653         }
2654         // we return NULL and the caller should try calling
2655         // claim_region() again.
2656         return NULL;
2657       }
2658     } else {
2659       assert(_finger > finger, "the finger should have moved forward");
2660       if (verbose_low()) {
2661         if (curr_region == NULL) {
2662           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2663                                  "global finger = " PTR_FORMAT ", "
2664                                  "our finger = " PTR_FORMAT,
2665                                  worker_id, p2i(_finger), p2i(finger));
2666         } else {
2667           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2668                                  "global finger = " PTR_FORMAT ", "
2669                                  "our finger = " PTR_FORMAT,
2670                                  worker_id, p2i(_finger), p2i(finger));
2671         }
2672       }
2673 
2674       // read it again
2675       finger = _finger;
2676     }
2677   }
2678 
2679   return NULL;
2680 }
2681 
2682 #ifndef PRODUCT
2683 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2684 private:
2685   G1CollectedHeap* _g1h;
2686   const char* _phase;
2687   int _info;
2688 
2689 public:
2690   VerifyNoCSetOops(const char* phase, int info = -1) :
2691     _g1h(G1CollectedHeap::heap()),
2692     _phase(phase),
2693     _info(info)
2694   { }
2695 
2696   void operator()(oop obj) const {
2697     guarantee(obj->is_oop(),
2698               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2699               p2i(obj), _phase, _info);
2700     guarantee(!_g1h->obj_in_cs(obj),
2701               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2702               p2i(obj), _phase, _info);
2703   }
2704 };
2705 
2706 void ConcurrentMark::verify_no_cset_oops() {
2707   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2708   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2709     return;
2710   }
2711 
2712   // Verify entries on the global mark stack
2713   _markStack.iterate(VerifyNoCSetOops("Stack"));
2714 
2715   // Verify entries on the task queues
2716   for (uint i = 0; i < _max_worker_id; ++i) {
2717     CMTaskQueue* queue = _task_queues->queue(i);
2718     queue->iterate(VerifyNoCSetOops("Queue", i));
2719   }
2720 
2721   // Verify the global finger
2722   HeapWord* global_finger = finger();
2723   if (global_finger != NULL && global_finger < _heap_end) {
2724     // The global finger always points to a heap region boundary. We
2725     // use heap_region_containing_raw() to get the containing region
2726     // given that the global finger could be pointing to a free region
2727     // which subsequently becomes continues humongous. If that
2728     // happens, heap_region_containing() will return the bottom of the
2729     // corresponding starts humongous region and the check below will
2730     // not hold any more.
2731     // Since we always iterate over all regions, we might get a NULL HeapRegion
2732     // here.
2733     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2734     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2735               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2736               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2737   }
2738 
2739   // Verify the task fingers
2740   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2741   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2742     CMTask* task = _tasks[i];
2743     HeapWord* task_finger = task->finger();
2744     if (task_finger != NULL && task_finger < _heap_end) {
2745       // See above note on the global finger verification.
2746       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2747       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2748                 !task_hr->in_collection_set(),
2749                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2750                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2751     }
2752   }
2753 }
2754 #endif // PRODUCT
2755 
2756 // Aggregate the counting data that was constructed concurrently
2757 // with marking.
2758 class AggregateCountDataHRClosure: public HeapRegionClosure {
2759   G1CollectedHeap* _g1h;
2760   ConcurrentMark* _cm;
2761   CardTableModRefBS* _ct_bs;
2762   BitMap* _cm_card_bm;
2763   uint _max_worker_id;
2764 
2765  public:
2766   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2767                               BitMap* cm_card_bm,
2768                               uint max_worker_id) :
2769     _g1h(g1h), _cm(g1h->concurrent_mark()),
2770     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2771     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2772 
2773   bool doHeapRegion(HeapRegion* hr) {
2774     if (hr->is_continues_humongous()) {
2775       // We will ignore these here and process them when their
2776       // associated "starts humongous" region is processed.
2777       // Note that we cannot rely on their associated
2778       // "starts humongous" region to have their bit set to 1
2779       // since, due to the region chunking in the parallel region
2780       // iteration, a "continues humongous" region might be visited
2781       // before its associated "starts humongous".
2782       return false;
2783     }
2784 
2785     HeapWord* start = hr->bottom();
2786     HeapWord* limit = hr->next_top_at_mark_start();
2787     HeapWord* end = hr->end();
2788 
2789     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2790            "Preconditions not met - "
2791            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2792            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2793            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2794 
2795     assert(hr->next_marked_bytes() == 0, "Precondition");
2796 
2797     if (start == limit) {
2798       // NTAMS of this region has not been set so nothing to do.
2799       return false;
2800     }
2801 
2802     // 'start' should be in the heap.
2803     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2804     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2805     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2806 
2807     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2808     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2809     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2810 
2811     // If ntams is not card aligned then we bump card bitmap index
2812     // for limit so that we get the all the cards spanned by
2813     // the object ending at ntams.
2814     // Note: if this is the last region in the heap then ntams
2815     // could be actually just beyond the end of the the heap;
2816     // limit_idx will then  correspond to a (non-existent) card
2817     // that is also outside the heap.
2818     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2819       limit_idx += 1;
2820     }
2821 
2822     assert(limit_idx <= end_idx, "or else use atomics");
2823 
2824     // Aggregate the "stripe" in the count data associated with hr.
2825     uint hrm_index = hr->hrm_index();
2826     size_t marked_bytes = 0;
2827 
2828     for (uint i = 0; i < _max_worker_id; i += 1) {
2829       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2830       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2831 
2832       // Fetch the marked_bytes in this region for task i and
2833       // add it to the running total for this region.
2834       marked_bytes += marked_bytes_array[hrm_index];
2835 
2836       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2837       // into the global card bitmap.
2838       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2839 
2840       while (scan_idx < limit_idx) {
2841         assert(task_card_bm->at(scan_idx) == true, "should be");
2842         _cm_card_bm->set_bit(scan_idx);
2843         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2844 
2845         // BitMap::get_next_one_offset() can handle the case when
2846         // its left_offset parameter is greater than its right_offset
2847         // parameter. It does, however, have an early exit if
2848         // left_offset == right_offset. So let's limit the value
2849         // passed in for left offset here.
2850         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2851         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2852       }
2853     }
2854 
2855     // Update the marked bytes for this region.
2856     hr->add_to_marked_bytes(marked_bytes);
2857 
2858     // Next heap region
2859     return false;
2860   }
2861 };
2862 
2863 class G1AggregateCountDataTask: public AbstractGangTask {
2864 protected:
2865   G1CollectedHeap* _g1h;
2866   ConcurrentMark* _cm;
2867   BitMap* _cm_card_bm;
2868   uint _max_worker_id;
2869   uint _active_workers;
2870   HeapRegionClaimer _hrclaimer;
2871 
2872 public:
2873   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2874                            ConcurrentMark* cm,
2875                            BitMap* cm_card_bm,
2876                            uint max_worker_id,
2877                            uint n_workers) :
2878       AbstractGangTask("Count Aggregation"),
2879       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2880       _max_worker_id(max_worker_id),
2881       _active_workers(n_workers),
2882       _hrclaimer(_active_workers) {
2883   }
2884 
2885   void work(uint worker_id) {
2886     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2887 
2888     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2889   }
2890 };
2891 
2892 
2893 void ConcurrentMark::aggregate_count_data() {
2894   uint n_workers = _g1h->workers()->active_workers();
2895 
2896   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2897                                            _max_worker_id, n_workers);
2898 
2899   _g1h->workers()->run_task(&g1_par_agg_task);
2900 }
2901 
2902 // Clear the per-worker arrays used to store the per-region counting data
2903 void ConcurrentMark::clear_all_count_data() {
2904   // Clear the global card bitmap - it will be filled during
2905   // liveness count aggregation (during remark) and the
2906   // final counting task.
2907   _card_bm.clear();
2908 
2909   // Clear the global region bitmap - it will be filled as part
2910   // of the final counting task.
2911   _region_bm.clear();
2912 
2913   uint max_regions = _g1h->max_regions();
2914   assert(_max_worker_id > 0, "uninitialized");
2915 
2916   for (uint i = 0; i < _max_worker_id; i += 1) {
2917     BitMap* task_card_bm = count_card_bitmap_for(i);
2918     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2919 
2920     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2921     assert(marked_bytes_array != NULL, "uninitialized");
2922 
2923     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2924     task_card_bm->clear();
2925   }
2926 }
2927 
2928 void ConcurrentMark::print_stats() {
2929   if (verbose_stats()) {
2930     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2931     for (size_t i = 0; i < _active_tasks; ++i) {
2932       _tasks[i]->print_stats();
2933       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2934     }
2935   }
2936 }
2937 
2938 // abandon current marking iteration due to a Full GC
2939 void ConcurrentMark::abort() {
2940   if (!cmThread()->during_cycle() || _has_aborted) {
2941     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2942     return;
2943   }
2944 
2945   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2946   // concurrent bitmap clearing.
2947   _nextMarkBitMap->clearAll();
2948 
2949   // Note we cannot clear the previous marking bitmap here
2950   // since VerifyDuringGC verifies the objects marked during
2951   // a full GC against the previous bitmap.
2952 
2953   // Clear the liveness counting data
2954   clear_all_count_data();
2955   // Empty mark stack
2956   reset_marking_state();
2957   for (uint i = 0; i < _max_worker_id; ++i) {
2958     _tasks[i]->clear_region_fields();
2959   }
2960   _first_overflow_barrier_sync.abort();
2961   _second_overflow_barrier_sync.abort();
2962   _has_aborted = true;
2963 
2964   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2965   satb_mq_set.abandon_partial_marking();
2966   // This can be called either during or outside marking, we'll read
2967   // the expected_active value from the SATB queue set.
2968   satb_mq_set.set_active_all_threads(
2969                                  false, /* new active value */
2970                                  satb_mq_set.is_active() /* expected_active */);
2971 
2972   _g1h->trace_heap_after_concurrent_cycle();
2973   _g1h->register_concurrent_cycle_end();
2974 }
2975 
2976 static void print_ms_time_info(const char* prefix, const char* name,
2977                                NumberSeq& ns) {
2978   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2979                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2980   if (ns.num() > 0) {
2981     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2982                            prefix, ns.sd(), ns.maximum());
2983   }
2984 }
2985 
2986 void ConcurrentMark::print_summary_info() {
2987   gclog_or_tty->print_cr(" Concurrent marking:");
2988   print_ms_time_info("  ", "init marks", _init_times);
2989   print_ms_time_info("  ", "remarks", _remark_times);
2990   {
2991     print_ms_time_info("     ", "final marks", _remark_mark_times);
2992     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2993 
2994   }
2995   print_ms_time_info("  ", "cleanups", _cleanup_times);
2996   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2997                          _total_counting_time,
2998                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2999                           (double)_cleanup_times.num()
3000                          : 0.0));
3001   if (G1ScrubRemSets) {
3002     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3003                            _total_rs_scrub_time,
3004                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3005                             (double)_cleanup_times.num()
3006                            : 0.0));
3007   }
3008   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3009                          (_init_times.sum() + _remark_times.sum() +
3010                           _cleanup_times.sum())/1000.0);
3011   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3012                 "(%8.2f s marking).",
3013                 cmThread()->vtime_accum(),
3014                 cmThread()->vtime_mark_accum());
3015 }
3016 
3017 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3018   _parallel_workers->print_worker_threads_on(st);
3019 }
3020 
3021 void ConcurrentMark::print_on_error(outputStream* st) const {
3022   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3023       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3024   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3025   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3026 }
3027 
3028 // We take a break if someone is trying to stop the world.
3029 bool ConcurrentMark::do_yield_check(uint worker_id) {
3030   if (SuspendibleThreadSet::should_yield()) {
3031     if (worker_id == 0) {
3032       _g1h->g1_policy()->record_concurrent_pause();
3033     }
3034     SuspendibleThreadSet::yield();
3035     return true;
3036   } else {
3037     return false;
3038   }
3039 }
3040 
3041 #ifndef PRODUCT
3042 // for debugging purposes
3043 void ConcurrentMark::print_finger() {
3044   gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
3045                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3046   for (uint i = 0; i < _max_worker_id; ++i) {
3047     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3048   }
3049   gclog_or_tty->cr();
3050 }
3051 #endif
3052 
3053 // Closure for iteration over bitmaps
3054 class CMBitMapClosure : public BitMapClosure {
3055 private:
3056   // the bitmap that is being iterated over
3057   CMBitMap*                   _nextMarkBitMap;
3058   ConcurrentMark*             _cm;
3059   CMTask*                     _task;
3060 
3061 public:
3062   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3063     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3064 
3065   bool do_bit(size_t offset) {
3066     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3067     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3068     assert( addr < _cm->finger(), "invariant");
3069     assert(addr >= _task->finger(), "invariant");
3070 
3071     // We move that task's local finger along.
3072     _task->move_finger_to(addr);
3073 
3074     _task->scan_object(oop(addr));
3075     // we only partially drain the local queue and global stack
3076     _task->drain_local_queue(true);
3077     _task->drain_global_stack(true);
3078 
3079     // if the has_aborted flag has been raised, we need to bail out of
3080     // the iteration
3081     return !_task->has_aborted();
3082   }
3083 };
3084 
3085 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
3086   ReferenceProcessor* result = NULL;
3087   if (G1UseConcMarkReferenceProcessing) {
3088     result = g1h->ref_processor_cm();
3089     assert(result != NULL, "should not be NULL");
3090   }
3091   return result;
3092 }
3093 
3094 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3095                                ConcurrentMark* cm,
3096                                CMTask* task)
3097   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
3098     _g1h(g1h), _cm(cm), _task(task)
3099 { }
3100 
3101 void CMTask::setup_for_region(HeapRegion* hr) {
3102   assert(hr != NULL,
3103         "claim_region() should have filtered out NULL regions");
3104   assert(!hr->is_continues_humongous(),
3105         "claim_region() should have filtered out continues humongous regions");
3106 
3107   if (_cm->verbose_low()) {
3108     gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3109                            _worker_id, p2i(hr));
3110   }
3111 
3112   _curr_region  = hr;
3113   _finger       = hr->bottom();
3114   update_region_limit();
3115 }
3116 
3117 void CMTask::update_region_limit() {
3118   HeapRegion* hr            = _curr_region;
3119   HeapWord* bottom          = hr->bottom();
3120   HeapWord* limit           = hr->next_top_at_mark_start();
3121 
3122   if (limit == bottom) {
3123     if (_cm->verbose_low()) {
3124       gclog_or_tty->print_cr("[%u] found an empty region "
3125                              "[" PTR_FORMAT ", " PTR_FORMAT ")",
3126                              _worker_id, p2i(bottom), p2i(limit));
3127     }
3128     // The region was collected underneath our feet.
3129     // We set the finger to bottom to ensure that the bitmap
3130     // iteration that will follow this will not do anything.
3131     // (this is not a condition that holds when we set the region up,
3132     // as the region is not supposed to be empty in the first place)
3133     _finger = bottom;
3134   } else if (limit >= _region_limit) {
3135     assert(limit >= _finger, "peace of mind");
3136   } else {
3137     assert(limit < _region_limit, "only way to get here");
3138     // This can happen under some pretty unusual circumstances.  An
3139     // evacuation pause empties the region underneath our feet (NTAMS
3140     // at bottom). We then do some allocation in the region (NTAMS
3141     // stays at bottom), followed by the region being used as a GC
3142     // alloc region (NTAMS will move to top() and the objects
3143     // originally below it will be grayed). All objects now marked in
3144     // the region are explicitly grayed, if below the global finger,
3145     // and we do not need in fact to scan anything else. So, we simply
3146     // set _finger to be limit to ensure that the bitmap iteration
3147     // doesn't do anything.
3148     _finger = limit;
3149   }
3150 
3151   _region_limit = limit;
3152 }
3153 
3154 void CMTask::giveup_current_region() {
3155   assert(_curr_region != NULL, "invariant");
3156   if (_cm->verbose_low()) {
3157     gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3158                            _worker_id, p2i(_curr_region));
3159   }
3160   clear_region_fields();
3161 }
3162 
3163 void CMTask::clear_region_fields() {
3164   // Values for these three fields that indicate that we're not
3165   // holding on to a region.
3166   _curr_region   = NULL;
3167   _finger        = NULL;
3168   _region_limit  = NULL;
3169 }
3170 
3171 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3172   if (cm_oop_closure == NULL) {
3173     assert(_cm_oop_closure != NULL, "invariant");
3174   } else {
3175     assert(_cm_oop_closure == NULL, "invariant");
3176   }
3177   _cm_oop_closure = cm_oop_closure;
3178 }
3179 
3180 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3181   guarantee(nextMarkBitMap != NULL, "invariant");
3182 
3183   if (_cm->verbose_low()) {
3184     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3185   }
3186 
3187   _nextMarkBitMap                = nextMarkBitMap;
3188   clear_region_fields();
3189 
3190   _calls                         = 0;
3191   _elapsed_time_ms               = 0.0;
3192   _termination_time_ms           = 0.0;
3193   _termination_start_time_ms     = 0.0;
3194 }
3195 
3196 bool CMTask::should_exit_termination() {
3197   regular_clock_call();
3198   // This is called when we are in the termination protocol. We should
3199   // quit if, for some reason, this task wants to abort or the global
3200   // stack is not empty (this means that we can get work from it).
3201   return !_cm->mark_stack_empty() || has_aborted();
3202 }
3203 
3204 void CMTask::reached_limit() {
3205   assert(_words_scanned >= _words_scanned_limit ||
3206          _refs_reached >= _refs_reached_limit ,
3207          "shouldn't have been called otherwise");
3208   regular_clock_call();
3209 }
3210 
3211 void CMTask::regular_clock_call() {
3212   if (has_aborted()) return;
3213 
3214   // First, we need to recalculate the words scanned and refs reached
3215   // limits for the next clock call.
3216   recalculate_limits();
3217 
3218   // During the regular clock call we do the following
3219 
3220   // (1) If an overflow has been flagged, then we abort.
3221   if (_cm->has_overflown()) {
3222     set_has_aborted();
3223     return;
3224   }
3225 
3226   // If we are not concurrent (i.e. we're doing remark) we don't need
3227   // to check anything else. The other steps are only needed during
3228   // the concurrent marking phase.
3229   if (!concurrent()) return;
3230 
3231   // (2) If marking has been aborted for Full GC, then we also abort.
3232   if (_cm->has_aborted()) {
3233     set_has_aborted();
3234     return;
3235   }
3236 
3237   double curr_time_ms = os::elapsedVTime() * 1000.0;
3238 
3239   // (4) We check whether we should yield. If we have to, then we abort.
3240   if (SuspendibleThreadSet::should_yield()) {
3241     // We should yield. To do this we abort the task. The caller is
3242     // responsible for yielding.
3243     set_has_aborted();
3244     return;
3245   }
3246 
3247   // (5) We check whether we've reached our time quota. If we have,
3248   // then we abort.
3249   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3250   if (elapsed_time_ms > _time_target_ms) {
3251     set_has_aborted();
3252     _has_timed_out = true;
3253     return;
3254   }
3255 
3256   // (6) Finally, we check whether there are enough completed STAB
3257   // buffers available for processing. If there are, we abort.
3258   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3259   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3260     if (_cm->verbose_low()) {
3261       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3262                              _worker_id);
3263     }
3264     // we do need to process SATB buffers, we'll abort and restart
3265     // the marking task to do so
3266     set_has_aborted();
3267     return;
3268   }
3269 }
3270 
3271 void CMTask::recalculate_limits() {
3272   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3273   _words_scanned_limit      = _real_words_scanned_limit;
3274 
3275   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3276   _refs_reached_limit       = _real_refs_reached_limit;
3277 }
3278 
3279 void CMTask::decrease_limits() {
3280   // This is called when we believe that we're going to do an infrequent
3281   // operation which will increase the per byte scanned cost (i.e. move
3282   // entries to/from the global stack). It basically tries to decrease the
3283   // scanning limit so that the clock is called earlier.
3284 
3285   if (_cm->verbose_medium()) {
3286     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3287   }
3288 
3289   _words_scanned_limit = _real_words_scanned_limit -
3290     3 * words_scanned_period / 4;
3291   _refs_reached_limit  = _real_refs_reached_limit -
3292     3 * refs_reached_period / 4;
3293 }
3294 
3295 void CMTask::move_entries_to_global_stack() {
3296   // local array where we'll store the entries that will be popped
3297   // from the local queue
3298   oop buffer[global_stack_transfer_size];
3299 
3300   int n = 0;
3301   oop obj;
3302   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3303     buffer[n] = obj;
3304     ++n;
3305   }
3306 
3307   if (n > 0) {
3308     // we popped at least one entry from the local queue
3309 
3310     if (!_cm->mark_stack_push(buffer, n)) {
3311       if (_cm->verbose_low()) {
3312         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3313                                _worker_id);
3314       }
3315       set_has_aborted();
3316     } else {
3317       // the transfer was successful
3318 
3319       if (_cm->verbose_medium()) {
3320         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3321                                _worker_id, n);
3322       }
3323     }
3324   }
3325 
3326   // this operation was quite expensive, so decrease the limits
3327   decrease_limits();
3328 }
3329 
3330 void CMTask::get_entries_from_global_stack() {
3331   // local array where we'll store the entries that will be popped
3332   // from the global stack.
3333   oop buffer[global_stack_transfer_size];
3334   int n;
3335   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3336   assert(n <= global_stack_transfer_size,
3337          "we should not pop more than the given limit");
3338   if (n > 0) {
3339     // yes, we did actually pop at least one entry
3340     if (_cm->verbose_medium()) {
3341       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3342                              _worker_id, n);
3343     }
3344     for (int i = 0; i < n; ++i) {
3345       bool success = _task_queue->push(buffer[i]);
3346       // We only call this when the local queue is empty or under a
3347       // given target limit. So, we do not expect this push to fail.
3348       assert(success, "invariant");
3349     }
3350   }
3351 
3352   // this operation was quite expensive, so decrease the limits
3353   decrease_limits();
3354 }
3355 
3356 void CMTask::drain_local_queue(bool partially) {
3357   if (has_aborted()) return;
3358 
3359   // Decide what the target size is, depending whether we're going to
3360   // drain it partially (so that other tasks can steal if they run out
3361   // of things to do) or totally (at the very end).
3362   size_t target_size;
3363   if (partially) {
3364     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3365   } else {
3366     target_size = 0;
3367   }
3368 
3369   if (_task_queue->size() > target_size) {
3370     if (_cm->verbose_high()) {
3371       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3372                              _worker_id, target_size);
3373     }
3374 
3375     oop obj;
3376     bool ret = _task_queue->pop_local(obj);
3377     while (ret) {
3378       if (_cm->verbose_high()) {
3379         gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3380                                p2i((void*) obj));
3381       }
3382 
3383       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3384       assert(!_g1h->is_on_master_free_list(
3385                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3386 
3387       scan_object(obj);
3388 
3389       if (_task_queue->size() <= target_size || has_aborted()) {
3390         ret = false;
3391       } else {
3392         ret = _task_queue->pop_local(obj);
3393       }
3394     }
3395 
3396     if (_cm->verbose_high()) {
3397       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3398                              _worker_id, _task_queue->size());
3399     }
3400   }
3401 }
3402 
3403 void CMTask::drain_global_stack(bool partially) {
3404   if (has_aborted()) return;
3405 
3406   // We have a policy to drain the local queue before we attempt to
3407   // drain the global stack.
3408   assert(partially || _task_queue->size() == 0, "invariant");
3409 
3410   // Decide what the target size is, depending whether we're going to
3411   // drain it partially (so that other tasks can steal if they run out
3412   // of things to do) or totally (at the very end).  Notice that,
3413   // because we move entries from the global stack in chunks or
3414   // because another task might be doing the same, we might in fact
3415   // drop below the target. But, this is not a problem.
3416   size_t target_size;
3417   if (partially) {
3418     target_size = _cm->partial_mark_stack_size_target();
3419   } else {
3420     target_size = 0;
3421   }
3422 
3423   if (_cm->mark_stack_size() > target_size) {
3424     if (_cm->verbose_low()) {
3425       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3426                              _worker_id, target_size);
3427     }
3428 
3429     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3430       get_entries_from_global_stack();
3431       drain_local_queue(partially);
3432     }
3433 
3434     if (_cm->verbose_low()) {
3435       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3436                              _worker_id, _cm->mark_stack_size());
3437     }
3438   }
3439 }
3440 
3441 // SATB Queue has several assumptions on whether to call the par or
3442 // non-par versions of the methods. this is why some of the code is
3443 // replicated. We should really get rid of the single-threaded version
3444 // of the code to simplify things.
3445 void CMTask::drain_satb_buffers() {
3446   if (has_aborted()) return;
3447 
3448   // We set this so that the regular clock knows that we're in the
3449   // middle of draining buffers and doesn't set the abort flag when it
3450   // notices that SATB buffers are available for draining. It'd be
3451   // very counter productive if it did that. :-)
3452   _draining_satb_buffers = true;
3453 
3454   CMSATBBufferClosure satb_cl(this, _g1h);
3455   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3456 
3457   // This keeps claiming and applying the closure to completed buffers
3458   // until we run out of buffers or we need to abort.
3459   while (!has_aborted() &&
3460          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3461     if (_cm->verbose_medium()) {
3462       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3463     }
3464     regular_clock_call();
3465   }
3466 
3467   _draining_satb_buffers = false;
3468 
3469   assert(has_aborted() ||
3470          concurrent() ||
3471          satb_mq_set.completed_buffers_num() == 0, "invariant");
3472 
3473   // again, this was a potentially expensive operation, decrease the
3474   // limits to get the regular clock call early
3475   decrease_limits();
3476 }
3477 
3478 void CMTask::print_stats() {
3479   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3480                          _worker_id, _calls);
3481   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3482                          _elapsed_time_ms, _termination_time_ms);
3483   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3484                          _step_times_ms.num(), _step_times_ms.avg(),
3485                          _step_times_ms.sd());
3486   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3487                          _step_times_ms.maximum(), _step_times_ms.sum());
3488 }
3489 
3490 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3491   return _task_queues->steal(worker_id, hash_seed, obj);
3492 }
3493 
3494 /*****************************************************************************
3495 
3496     The do_marking_step(time_target_ms, ...) method is the building
3497     block of the parallel marking framework. It can be called in parallel
3498     with other invocations of do_marking_step() on different tasks
3499     (but only one per task, obviously) and concurrently with the
3500     mutator threads, or during remark, hence it eliminates the need
3501     for two versions of the code. When called during remark, it will
3502     pick up from where the task left off during the concurrent marking
3503     phase. Interestingly, tasks are also claimable during evacuation
3504     pauses too, since do_marking_step() ensures that it aborts before
3505     it needs to yield.
3506 
3507     The data structures that it uses to do marking work are the
3508     following:
3509 
3510       (1) Marking Bitmap. If there are gray objects that appear only
3511       on the bitmap (this happens either when dealing with an overflow
3512       or when the initial marking phase has simply marked the roots
3513       and didn't push them on the stack), then tasks claim heap
3514       regions whose bitmap they then scan to find gray objects. A
3515       global finger indicates where the end of the last claimed region
3516       is. A local finger indicates how far into the region a task has
3517       scanned. The two fingers are used to determine how to gray an
3518       object (i.e. whether simply marking it is OK, as it will be
3519       visited by a task in the future, or whether it needs to be also
3520       pushed on a stack).
3521 
3522       (2) Local Queue. The local queue of the task which is accessed
3523       reasonably efficiently by the task. Other tasks can steal from
3524       it when they run out of work. Throughout the marking phase, a
3525       task attempts to keep its local queue short but not totally
3526       empty, so that entries are available for stealing by other
3527       tasks. Only when there is no more work, a task will totally
3528       drain its local queue.
3529 
3530       (3) Global Mark Stack. This handles local queue overflow. During
3531       marking only sets of entries are moved between it and the local
3532       queues, as access to it requires a mutex and more fine-grain
3533       interaction with it which might cause contention. If it
3534       overflows, then the marking phase should restart and iterate
3535       over the bitmap to identify gray objects. Throughout the marking
3536       phase, tasks attempt to keep the global mark stack at a small
3537       length but not totally empty, so that entries are available for
3538       popping by other tasks. Only when there is no more work, tasks
3539       will totally drain the global mark stack.
3540 
3541       (4) SATB Buffer Queue. This is where completed SATB buffers are
3542       made available. Buffers are regularly removed from this queue
3543       and scanned for roots, so that the queue doesn't get too
3544       long. During remark, all completed buffers are processed, as
3545       well as the filled in parts of any uncompleted buffers.
3546 
3547     The do_marking_step() method tries to abort when the time target
3548     has been reached. There are a few other cases when the
3549     do_marking_step() method also aborts:
3550 
3551       (1) When the marking phase has been aborted (after a Full GC).
3552 
3553       (2) When a global overflow (on the global stack) has been
3554       triggered. Before the task aborts, it will actually sync up with
3555       the other tasks to ensure that all the marking data structures
3556       (local queues, stacks, fingers etc.)  are re-initialized so that
3557       when do_marking_step() completes, the marking phase can
3558       immediately restart.
3559 
3560       (3) When enough completed SATB buffers are available. The
3561       do_marking_step() method only tries to drain SATB buffers right
3562       at the beginning. So, if enough buffers are available, the
3563       marking step aborts and the SATB buffers are processed at
3564       the beginning of the next invocation.
3565 
3566       (4) To yield. when we have to yield then we abort and yield
3567       right at the end of do_marking_step(). This saves us from a lot
3568       of hassle as, by yielding we might allow a Full GC. If this
3569       happens then objects will be compacted underneath our feet, the
3570       heap might shrink, etc. We save checking for this by just
3571       aborting and doing the yield right at the end.
3572 
3573     From the above it follows that the do_marking_step() method should
3574     be called in a loop (or, otherwise, regularly) until it completes.
3575 
3576     If a marking step completes without its has_aborted() flag being
3577     true, it means it has completed the current marking phase (and
3578     also all other marking tasks have done so and have all synced up).
3579 
3580     A method called regular_clock_call() is invoked "regularly" (in
3581     sub ms intervals) throughout marking. It is this clock method that
3582     checks all the abort conditions which were mentioned above and
3583     decides when the task should abort. A work-based scheme is used to
3584     trigger this clock method: when the number of object words the
3585     marking phase has scanned or the number of references the marking
3586     phase has visited reach a given limit. Additional invocations to
3587     the method clock have been planted in a few other strategic places
3588     too. The initial reason for the clock method was to avoid calling
3589     vtime too regularly, as it is quite expensive. So, once it was in
3590     place, it was natural to piggy-back all the other conditions on it
3591     too and not constantly check them throughout the code.
3592 
3593     If do_termination is true then do_marking_step will enter its
3594     termination protocol.
3595 
3596     The value of is_serial must be true when do_marking_step is being
3597     called serially (i.e. by the VMThread) and do_marking_step should
3598     skip any synchronization in the termination and overflow code.
3599     Examples include the serial remark code and the serial reference
3600     processing closures.
3601 
3602     The value of is_serial must be false when do_marking_step is
3603     being called by any of the worker threads in a work gang.
3604     Examples include the concurrent marking code (CMMarkingTask),
3605     the MT remark code, and the MT reference processing closures.
3606 
3607  *****************************************************************************/
3608 
3609 void CMTask::do_marking_step(double time_target_ms,
3610                              bool do_termination,
3611                              bool is_serial) {
3612   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3613   assert(concurrent() == _cm->concurrent(), "they should be the same");
3614 
3615   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3616   assert(_task_queues != NULL, "invariant");
3617   assert(_task_queue != NULL, "invariant");
3618   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3619 
3620   assert(!_claimed,
3621          "only one thread should claim this task at any one time");
3622 
3623   // OK, this doesn't safeguard again all possible scenarios, as it is
3624   // possible for two threads to set the _claimed flag at the same
3625   // time. But it is only for debugging purposes anyway and it will
3626   // catch most problems.
3627   _claimed = true;
3628 
3629   _start_time_ms = os::elapsedVTime() * 1000.0;
3630 
3631   // If do_stealing is true then do_marking_step will attempt to
3632   // steal work from the other CMTasks. It only makes sense to
3633   // enable stealing when the termination protocol is enabled
3634   // and do_marking_step() is not being called serially.
3635   bool do_stealing = do_termination && !is_serial;
3636 
3637   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3638   _time_target_ms = time_target_ms - diff_prediction_ms;
3639 
3640   // set up the variables that are used in the work-based scheme to
3641   // call the regular clock method
3642   _words_scanned = 0;
3643   _refs_reached  = 0;
3644   recalculate_limits();
3645 
3646   // clear all flags
3647   clear_has_aborted();
3648   _has_timed_out = false;
3649   _draining_satb_buffers = false;
3650 
3651   ++_calls;
3652 
3653   if (_cm->verbose_low()) {
3654     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3655                            "target = %1.2lfms >>>>>>>>>>",
3656                            _worker_id, _calls, _time_target_ms);
3657   }
3658 
3659   // Set up the bitmap and oop closures. Anything that uses them is
3660   // eventually called from this method, so it is OK to allocate these
3661   // statically.
3662   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3663   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3664   set_cm_oop_closure(&cm_oop_closure);
3665 
3666   if (_cm->has_overflown()) {
3667     // This can happen if the mark stack overflows during a GC pause
3668     // and this task, after a yield point, restarts. We have to abort
3669     // as we need to get into the overflow protocol which happens
3670     // right at the end of this task.
3671     set_has_aborted();
3672   }
3673 
3674   // First drain any available SATB buffers. After this, we will not
3675   // look at SATB buffers before the next invocation of this method.
3676   // If enough completed SATB buffers are queued up, the regular clock
3677   // will abort this task so that it restarts.
3678   drain_satb_buffers();
3679   // ...then partially drain the local queue and the global stack
3680   drain_local_queue(true);
3681   drain_global_stack(true);
3682 
3683   do {
3684     if (!has_aborted() && _curr_region != NULL) {
3685       // This means that we're already holding on to a region.
3686       assert(_finger != NULL, "if region is not NULL, then the finger "
3687              "should not be NULL either");
3688 
3689       // We might have restarted this task after an evacuation pause
3690       // which might have evacuated the region we're holding on to
3691       // underneath our feet. Let's read its limit again to make sure
3692       // that we do not iterate over a region of the heap that
3693       // contains garbage (update_region_limit() will also move
3694       // _finger to the start of the region if it is found empty).
3695       update_region_limit();
3696       // We will start from _finger not from the start of the region,
3697       // as we might be restarting this task after aborting half-way
3698       // through scanning this region. In this case, _finger points to
3699       // the address where we last found a marked object. If this is a
3700       // fresh region, _finger points to start().
3701       MemRegion mr = MemRegion(_finger, _region_limit);
3702 
3703       if (_cm->verbose_low()) {
3704         gclog_or_tty->print_cr("[%u] we're scanning part "
3705                                "[" PTR_FORMAT ", " PTR_FORMAT ") "
3706                                "of region " HR_FORMAT,
3707                                _worker_id, p2i(_finger), p2i(_region_limit),
3708                                HR_FORMAT_PARAMS(_curr_region));
3709       }
3710 
3711       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3712              "humongous regions should go around loop once only");
3713 
3714       // Some special cases:
3715       // If the memory region is empty, we can just give up the region.
3716       // If the current region is humongous then we only need to check
3717       // the bitmap for the bit associated with the start of the object,
3718       // scan the object if it's live, and give up the region.
3719       // Otherwise, let's iterate over the bitmap of the part of the region
3720       // that is left.
3721       // If the iteration is successful, give up the region.
3722       if (mr.is_empty()) {
3723         giveup_current_region();
3724         regular_clock_call();
3725       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3726         if (_nextMarkBitMap->isMarked(mr.start())) {
3727           // The object is marked - apply the closure
3728           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3729           bitmap_closure.do_bit(offset);
3730         }
3731         // Even if this task aborted while scanning the humongous object
3732         // we can (and should) give up the current region.
3733         giveup_current_region();
3734         regular_clock_call();
3735       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3736         giveup_current_region();
3737         regular_clock_call();
3738       } else {
3739         assert(has_aborted(), "currently the only way to do so");
3740         // The only way to abort the bitmap iteration is to return
3741         // false from the do_bit() method. However, inside the
3742         // do_bit() method we move the _finger to point to the
3743         // object currently being looked at. So, if we bail out, we
3744         // have definitely set _finger to something non-null.
3745         assert(_finger != NULL, "invariant");
3746 
3747         // Region iteration was actually aborted. So now _finger
3748         // points to the address of the object we last scanned. If we
3749         // leave it there, when we restart this task, we will rescan
3750         // the object. It is easy to avoid this. We move the finger by
3751         // enough to point to the next possible object header (the
3752         // bitmap knows by how much we need to move it as it knows its
3753         // granularity).
3754         assert(_finger < _region_limit, "invariant");
3755         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3756         // Check if bitmap iteration was aborted while scanning the last object
3757         if (new_finger >= _region_limit) {
3758           giveup_current_region();
3759         } else {
3760           move_finger_to(new_finger);
3761         }
3762       }
3763     }
3764     // At this point we have either completed iterating over the
3765     // region we were holding on to, or we have aborted.
3766 
3767     // We then partially drain the local queue and the global stack.
3768     // (Do we really need this?)
3769     drain_local_queue(true);
3770     drain_global_stack(true);
3771 
3772     // Read the note on the claim_region() method on why it might
3773     // return NULL with potentially more regions available for
3774     // claiming and why we have to check out_of_regions() to determine
3775     // whether we're done or not.
3776     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3777       // We are going to try to claim a new region. We should have
3778       // given up on the previous one.
3779       // Separated the asserts so that we know which one fires.
3780       assert(_curr_region  == NULL, "invariant");
3781       assert(_finger       == NULL, "invariant");
3782       assert(_region_limit == NULL, "invariant");
3783       if (_cm->verbose_low()) {
3784         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3785       }
3786       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3787       if (claimed_region != NULL) {
3788         // Yes, we managed to claim one
3789         if (_cm->verbose_low()) {
3790           gclog_or_tty->print_cr("[%u] we successfully claimed "
3791                                  "region " PTR_FORMAT,
3792                                  _worker_id, p2i(claimed_region));
3793         }
3794 
3795         setup_for_region(claimed_region);
3796         assert(_curr_region == claimed_region, "invariant");
3797       }
3798       // It is important to call the regular clock here. It might take
3799       // a while to claim a region if, for example, we hit a large
3800       // block of empty regions. So we need to call the regular clock
3801       // method once round the loop to make sure it's called
3802       // frequently enough.
3803       regular_clock_call();
3804     }
3805 
3806     if (!has_aborted() && _curr_region == NULL) {
3807       assert(_cm->out_of_regions(),
3808              "at this point we should be out of regions");
3809     }
3810   } while ( _curr_region != NULL && !has_aborted());
3811 
3812   if (!has_aborted()) {
3813     // We cannot check whether the global stack is empty, since other
3814     // tasks might be pushing objects to it concurrently.
3815     assert(_cm->out_of_regions(),
3816            "at this point we should be out of regions");
3817 
3818     if (_cm->verbose_low()) {
3819       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
3820     }
3821 
3822     // Try to reduce the number of available SATB buffers so that
3823     // remark has less work to do.
3824     drain_satb_buffers();
3825   }
3826 
3827   // Since we've done everything else, we can now totally drain the
3828   // local queue and global stack.
3829   drain_local_queue(false);
3830   drain_global_stack(false);
3831 
3832   // Attempt at work stealing from other task's queues.
3833   if (do_stealing && !has_aborted()) {
3834     // We have not aborted. This means that we have finished all that
3835     // we could. Let's try to do some stealing...
3836 
3837     // We cannot check whether the global stack is empty, since other
3838     // tasks might be pushing objects to it concurrently.
3839     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3840            "only way to reach here");
3841 
3842     if (_cm->verbose_low()) {
3843       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
3844     }
3845 
3846     while (!has_aborted()) {
3847       oop obj;
3848       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3849         if (_cm->verbose_medium()) {
3850           gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
3851                                  _worker_id, p2i((void*) obj));
3852         }
3853 
3854         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3855                "any stolen object should be marked");
3856         scan_object(obj);
3857 
3858         // And since we're towards the end, let's totally drain the
3859         // local queue and global stack.
3860         drain_local_queue(false);
3861         drain_global_stack(false);
3862       } else {
3863         break;
3864       }
3865     }
3866   }
3867 
3868   // If we are about to wrap up and go into termination, check if we
3869   // should raise the overflow flag.
3870   if (do_termination && !has_aborted()) {
3871     if (_cm->force_overflow()->should_force()) {
3872       _cm->set_has_overflown();
3873       regular_clock_call();
3874     }
3875   }
3876 
3877   // We still haven't aborted. Now, let's try to get into the
3878   // termination protocol.
3879   if (do_termination && !has_aborted()) {
3880     // We cannot check whether the global stack is empty, since other
3881     // tasks might be concurrently pushing objects on it.
3882     // Separated the asserts so that we know which one fires.
3883     assert(_cm->out_of_regions(), "only way to reach here");
3884     assert(_task_queue->size() == 0, "only way to reach here");
3885 
3886     if (_cm->verbose_low()) {
3887       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
3888     }
3889 
3890     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3891 
3892     // The CMTask class also extends the TerminatorTerminator class,
3893     // hence its should_exit_termination() method will also decide
3894     // whether to exit the termination protocol or not.
3895     bool finished = (is_serial ||
3896                      _cm->terminator()->offer_termination(this));
3897     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3898     _termination_time_ms +=
3899       termination_end_time_ms - _termination_start_time_ms;
3900 
3901     if (finished) {
3902       // We're all done.
3903 
3904       if (_worker_id == 0) {
3905         // let's allow task 0 to do this
3906         if (concurrent()) {
3907           assert(_cm->concurrent_marking_in_progress(), "invariant");
3908           // we need to set this to false before the next
3909           // safepoint. This way we ensure that the marking phase
3910           // doesn't observe any more heap expansions.
3911           _cm->clear_concurrent_marking_in_progress();
3912         }
3913       }
3914 
3915       // We can now guarantee that the global stack is empty, since
3916       // all other tasks have finished. We separated the guarantees so
3917       // that, if a condition is false, we can immediately find out
3918       // which one.
3919       guarantee(_cm->out_of_regions(), "only way to reach here");
3920       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3921       guarantee(_task_queue->size() == 0, "only way to reach here");
3922       guarantee(!_cm->has_overflown(), "only way to reach here");
3923       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3924 
3925       if (_cm->verbose_low()) {
3926         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
3927       }
3928     } else {
3929       // Apparently there's more work to do. Let's abort this task. It
3930       // will restart it and we can hopefully find more things to do.
3931 
3932       if (_cm->verbose_low()) {
3933         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
3934                                _worker_id);
3935       }
3936 
3937       set_has_aborted();
3938     }
3939   }
3940 
3941   // Mainly for debugging purposes to make sure that a pointer to the
3942   // closure which was statically allocated in this frame doesn't
3943   // escape it by accident.
3944   set_cm_oop_closure(NULL);
3945   double end_time_ms = os::elapsedVTime() * 1000.0;
3946   double elapsed_time_ms = end_time_ms - _start_time_ms;
3947   // Update the step history.
3948   _step_times_ms.add(elapsed_time_ms);
3949 
3950   if (has_aborted()) {
3951     // The task was aborted for some reason.
3952     if (_has_timed_out) {
3953       double diff_ms = elapsed_time_ms - _time_target_ms;
3954       // Keep statistics of how well we did with respect to hitting
3955       // our target only if we actually timed out (if we aborted for
3956       // other reasons, then the results might get skewed).
3957       _marking_step_diffs_ms.add(diff_ms);
3958     }
3959 
3960     if (_cm->has_overflown()) {
3961       // This is the interesting one. We aborted because a global
3962       // overflow was raised. This means we have to restart the
3963       // marking phase and start iterating over regions. However, in
3964       // order to do this we have to make sure that all tasks stop
3965       // what they are doing and re-initialize in a safe manner. We
3966       // will achieve this with the use of two barrier sync points.
3967 
3968       if (_cm->verbose_low()) {
3969         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
3970       }
3971 
3972       if (!is_serial) {
3973         // We only need to enter the sync barrier if being called
3974         // from a parallel context
3975         _cm->enter_first_sync_barrier(_worker_id);
3976 
3977         // When we exit this sync barrier we know that all tasks have
3978         // stopped doing marking work. So, it's now safe to
3979         // re-initialize our data structures. At the end of this method,
3980         // task 0 will clear the global data structures.
3981       }
3982 
3983       // We clear the local state of this task...
3984       clear_region_fields();
3985 
3986       if (!is_serial) {
3987         // ...and enter the second barrier.
3988         _cm->enter_second_sync_barrier(_worker_id);
3989       }
3990       // At this point, if we're during the concurrent phase of
3991       // marking, everything has been re-initialized and we're
3992       // ready to restart.
3993     }
3994 
3995     if (_cm->verbose_low()) {
3996       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3997                              "elapsed = %1.2lfms <<<<<<<<<<",
3998                              _worker_id, _time_target_ms, elapsed_time_ms);
3999       if (_cm->has_aborted()) {
4000         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4001                                _worker_id);
4002       }
4003     }
4004   } else {
4005     if (_cm->verbose_low()) {
4006       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4007                              "elapsed = %1.2lfms <<<<<<<<<<",
4008                              _worker_id, _time_target_ms, elapsed_time_ms);
4009     }
4010   }
4011 
4012   _claimed = false;
4013 }
4014 
4015 CMTask::CMTask(uint worker_id,
4016                ConcurrentMark* cm,
4017                size_t* marked_bytes,
4018                BitMap* card_bm,
4019                CMTaskQueue* task_queue,
4020                CMTaskQueueSet* task_queues)
4021   : _g1h(G1CollectedHeap::heap()),
4022     _worker_id(worker_id), _cm(cm),
4023     _claimed(false),
4024     _nextMarkBitMap(NULL), _hash_seed(17),
4025     _task_queue(task_queue),
4026     _task_queues(task_queues),
4027     _cm_oop_closure(NULL),
4028     _marked_bytes_array(marked_bytes),
4029     _card_bm(card_bm) {
4030   guarantee(task_queue != NULL, "invariant");
4031   guarantee(task_queues != NULL, "invariant");
4032 
4033   _marking_step_diffs_ms.add(0.5);
4034 }
4035 
4036 // These are formatting macros that are used below to ensure
4037 // consistent formatting. The *_H_* versions are used to format the
4038 // header for a particular value and they should be kept consistent
4039 // with the corresponding macro. Also note that most of the macros add
4040 // the necessary white space (as a prefix) which makes them a bit
4041 // easier to compose.
4042 
4043 // All the output lines are prefixed with this string to be able to
4044 // identify them easily in a large log file.
4045 #define G1PPRL_LINE_PREFIX            "###"
4046 
4047 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4048 #ifdef _LP64
4049 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4050 #else // _LP64
4051 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4052 #endif // _LP64
4053 
4054 // For per-region info
4055 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4056 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4057 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4058 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4059 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4060 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4061 
4062 // For summary info
4063 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4064 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4065 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4066 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4067 
4068 G1PrintRegionLivenessInfoClosure::
4069 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4070   : _out(out),
4071     _total_used_bytes(0), _total_capacity_bytes(0),
4072     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4073     _hum_used_bytes(0), _hum_capacity_bytes(0),
4074     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4075     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4076   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4077   MemRegion g1_reserved = g1h->g1_reserved();
4078   double now = os::elapsedTime();
4079 
4080   // Print the header of the output.
4081   _out->cr();
4082   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4083   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4084                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4085                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4086                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4087                  HeapRegion::GrainBytes);
4088   _out->print_cr(G1PPRL_LINE_PREFIX);
4089   _out->print_cr(G1PPRL_LINE_PREFIX
4090                 G1PPRL_TYPE_H_FORMAT
4091                 G1PPRL_ADDR_BASE_H_FORMAT
4092                 G1PPRL_BYTE_H_FORMAT
4093                 G1PPRL_BYTE_H_FORMAT
4094                 G1PPRL_BYTE_H_FORMAT
4095                 G1PPRL_DOUBLE_H_FORMAT
4096                 G1PPRL_BYTE_H_FORMAT
4097                 G1PPRL_BYTE_H_FORMAT,
4098                 "type", "address-range",
4099                 "used", "prev-live", "next-live", "gc-eff",
4100                 "remset", "code-roots");
4101   _out->print_cr(G1PPRL_LINE_PREFIX
4102                 G1PPRL_TYPE_H_FORMAT
4103                 G1PPRL_ADDR_BASE_H_FORMAT
4104                 G1PPRL_BYTE_H_FORMAT
4105                 G1PPRL_BYTE_H_FORMAT
4106                 G1PPRL_BYTE_H_FORMAT
4107                 G1PPRL_DOUBLE_H_FORMAT
4108                 G1PPRL_BYTE_H_FORMAT
4109                 G1PPRL_BYTE_H_FORMAT,
4110                 "", "",
4111                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4112                 "(bytes)", "(bytes)");
4113 }
4114 
4115 // It takes as a parameter a reference to one of the _hum_* fields, it
4116 // deduces the corresponding value for a region in a humongous region
4117 // series (either the region size, or what's left if the _hum_* field
4118 // is < the region size), and updates the _hum_* field accordingly.
4119 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4120   size_t bytes = 0;
4121   // The > 0 check is to deal with the prev and next live bytes which
4122   // could be 0.
4123   if (*hum_bytes > 0) {
4124     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4125     *hum_bytes -= bytes;
4126   }
4127   return bytes;
4128 }
4129 
4130 // It deduces the values for a region in a humongous region series
4131 // from the _hum_* fields and updates those accordingly. It assumes
4132 // that that _hum_* fields have already been set up from the "starts
4133 // humongous" region and we visit the regions in address order.
4134 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4135                                                      size_t* capacity_bytes,
4136                                                      size_t* prev_live_bytes,
4137                                                      size_t* next_live_bytes) {
4138   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4139   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4140   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4141   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4142   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4143 }
4144 
4145 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4146   const char* type       = r->get_type_str();
4147   HeapWord* bottom       = r->bottom();
4148   HeapWord* end          = r->end();
4149   size_t capacity_bytes  = r->capacity();
4150   size_t used_bytes      = r->used();
4151   size_t prev_live_bytes = r->live_bytes();
4152   size_t next_live_bytes = r->next_live_bytes();
4153   double gc_eff          = r->gc_efficiency();
4154   size_t remset_bytes    = r->rem_set()->mem_size();
4155   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4156 
4157   if (r->is_starts_humongous()) {
4158     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4159            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4160            "they should have been zeroed after the last time we used them");
4161     // Set up the _hum_* fields.
4162     _hum_capacity_bytes  = capacity_bytes;
4163     _hum_used_bytes      = used_bytes;
4164     _hum_prev_live_bytes = prev_live_bytes;
4165     _hum_next_live_bytes = next_live_bytes;
4166     get_hum_bytes(&used_bytes, &capacity_bytes,
4167                   &prev_live_bytes, &next_live_bytes);
4168     end = bottom + HeapRegion::GrainWords;
4169   } else if (r->is_continues_humongous()) {
4170     get_hum_bytes(&used_bytes, &capacity_bytes,
4171                   &prev_live_bytes, &next_live_bytes);
4172     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4173   }
4174 
4175   _total_used_bytes      += used_bytes;
4176   _total_capacity_bytes  += capacity_bytes;
4177   _total_prev_live_bytes += prev_live_bytes;
4178   _total_next_live_bytes += next_live_bytes;
4179   _total_remset_bytes    += remset_bytes;
4180   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4181 
4182   // Print a line for this particular region.
4183   _out->print_cr(G1PPRL_LINE_PREFIX
4184                  G1PPRL_TYPE_FORMAT
4185                  G1PPRL_ADDR_BASE_FORMAT
4186                  G1PPRL_BYTE_FORMAT
4187                  G1PPRL_BYTE_FORMAT
4188                  G1PPRL_BYTE_FORMAT
4189                  G1PPRL_DOUBLE_FORMAT
4190                  G1PPRL_BYTE_FORMAT
4191                  G1PPRL_BYTE_FORMAT,
4192                  type, p2i(bottom), p2i(end),
4193                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4194                  remset_bytes, strong_code_roots_bytes);
4195 
4196   return false;
4197 }
4198 
4199 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4200   // add static memory usages to remembered set sizes
4201   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4202   // Print the footer of the output.
4203   _out->print_cr(G1PPRL_LINE_PREFIX);
4204   _out->print_cr(G1PPRL_LINE_PREFIX
4205                  " SUMMARY"
4206                  G1PPRL_SUM_MB_FORMAT("capacity")
4207                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4208                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4209                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4210                  G1PPRL_SUM_MB_FORMAT("remset")
4211                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4212                  bytes_to_mb(_total_capacity_bytes),
4213                  bytes_to_mb(_total_used_bytes),
4214                  perc(_total_used_bytes, _total_capacity_bytes),
4215                  bytes_to_mb(_total_prev_live_bytes),
4216                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4217                  bytes_to_mb(_total_next_live_bytes),
4218                  perc(_total_next_live_bytes, _total_capacity_bytes),
4219                  bytes_to_mb(_total_remset_bytes),
4220                  bytes_to_mb(_total_strong_code_roots_bytes));
4221   _out->cr();
4222 }