1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 458   return MAX2((n_par_threads + 2) / 4, 1U);
 459 }
 460 
 461 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 462   _g1h(g1h),
 463   _markBitMap1(),
 464   _markBitMap2(),
 465   _parallel_marking_threads(0),
 466   _max_parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 473   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 474             CardTableModRefBS::card_shift,
 475             false /* in_resource_area*/),
 476 
 477   _prevMarkBitMap(&_markBitMap1),
 478   _nextMarkBitMap(&_markBitMap2),
 479 
 480   _markStack(this),
 481   // _finger set in set_non_marking_state
 482 
 483   _max_worker_id(ParallelGCThreads),
 484   // _active_tasks set in set_non_marking_state
 485   // _tasks set inside the constructor
 486   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 487   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 488 
 489   _has_overflown(false),
 490   _concurrent(false),
 491   _has_aborted(false),
 492   _restart_for_overflow(false),
 493   _concurrent_marking_in_progress(false),
 494 
 495   // _verbose_level set below
 496 
 497   _init_times(),
 498   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 499   _cleanup_times(),
 500   _total_counting_time(0.0),
 501   _total_rs_scrub_time(0.0),
 502 
 503   _parallel_workers(NULL),
 504 
 505   _count_card_bitmaps(NULL),
 506   _count_marked_bytes(NULL),
 507   _completed_initialization(false) {
 508   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 509   if (verbose_level < no_verbose) {
 510     verbose_level = no_verbose;
 511   }
 512   if (verbose_level > high_verbose) {
 513     verbose_level = high_verbose;
 514   }
 515   _verbose_level = verbose_level;
 516 
 517   if (verbose_low()) {
 518     gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", "
 519                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 520   }
 521 
 522   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 523   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 524 
 525   // Create & start a ConcurrentMark thread.
 526   _cmThread = new ConcurrentMarkThread(this);
 527   assert(cmThread() != NULL, "CM Thread should have been created");
 528   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 529   if (_cmThread->osthread() == NULL) {
 530       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 531   }
 532 
 533   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 534   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 535   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 536 
 537   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 538   satb_qs.set_buffer_size(G1SATBBufferSize);
 539 
 540   _root_regions.init(_g1h, this);
 541 
 542   if (ConcGCThreads > ParallelGCThreads) {
 543     warning("Can't have more ConcGCThreads (%u) "
 544             "than ParallelGCThreads (%u).",
 545             ConcGCThreads, ParallelGCThreads);
 546     return;
 547   }
 548   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 549     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 550     // if both are set
 551     _sleep_factor             = 0.0;
 552     _marking_task_overhead    = 1.0;
 553   } else if (G1MarkingOverheadPercent > 0) {
 554     // We will calculate the number of parallel marking threads based
 555     // on a target overhead with respect to the soft real-time goal
 556     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 557     double overall_cm_overhead =
 558       (double) MaxGCPauseMillis * marking_overhead /
 559       (double) GCPauseIntervalMillis;
 560     double cpu_ratio = 1.0 / (double) os::processor_count();
 561     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 562     double marking_task_overhead =
 563       overall_cm_overhead / marking_thread_num *
 564                                               (double) os::processor_count();
 565     double sleep_factor =
 566                        (1.0 - marking_task_overhead) / marking_task_overhead;
 567 
 568     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 569     _sleep_factor             = sleep_factor;
 570     _marking_task_overhead    = marking_task_overhead;
 571   } else {
 572     // Calculate the number of parallel marking threads by scaling
 573     // the number of parallel GC threads.
 574     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 575     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 576     _sleep_factor             = 0.0;
 577     _marking_task_overhead    = 1.0;
 578   }
 579 
 580   assert(ConcGCThreads > 0, "Should have been set");
 581   _parallel_marking_threads = ConcGCThreads;
 582   _max_parallel_marking_threads = _parallel_marking_threads;
 583 
 584   if (parallel_marking_threads() > 1) {
 585     _cleanup_task_overhead = 1.0;
 586   } else {
 587     _cleanup_task_overhead = marking_task_overhead();
 588   }
 589   _cleanup_sleep_factor =
 590                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 591 
 592 #if 0
 593   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 594   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 595   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 596   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 597   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 598 #endif
 599 
 600   _parallel_workers = new WorkGang("G1 Marker",
 601        _max_parallel_marking_threads, false, true);
 602   if (_parallel_workers == NULL) {
 603     vm_exit_during_initialization("Failed necessary allocation.");
 604   } else {
 605     _parallel_workers->initialize_workers();
 606   }
 607 
 608   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 609     size_t mark_stack_size =
 610       MIN2(MarkStackSizeMax,
 611           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 612     // Verify that the calculated value for MarkStackSize is in range.
 613     // It would be nice to use the private utility routine from Arguments.
 614     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 615       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 616               "must be between 1 and " SIZE_FORMAT,
 617               mark_stack_size, MarkStackSizeMax);
 618       return;
 619     }
 620     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 621   } else {
 622     // Verify MarkStackSize is in range.
 623     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 624       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 625         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 626           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 627                   "must be between 1 and " SIZE_FORMAT,
 628                   MarkStackSize, MarkStackSizeMax);
 629           return;
 630         }
 631       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 632         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 633           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 634                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 635                   MarkStackSize, MarkStackSizeMax);
 636           return;
 637         }
 638       }
 639     }
 640   }
 641 
 642   if (!_markStack.allocate(MarkStackSize)) {
 643     warning("Failed to allocate CM marking stack");
 644     return;
 645   }
 646 
 647   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 648   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 649 
 650   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 651   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 652 
 653   BitMap::idx_t card_bm_size = _card_bm.size();
 654 
 655   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 656   _active_tasks = _max_worker_id;
 657 
 658   uint max_regions = _g1h->max_regions();
 659   for (uint i = 0; i < _max_worker_id; ++i) {
 660     CMTaskQueue* task_queue = new CMTaskQueue();
 661     task_queue->initialize();
 662     _task_queues->register_queue(i, task_queue);
 663 
 664     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 665     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 666 
 667     _tasks[i] = new CMTask(i, this,
 668                            _count_marked_bytes[i],
 669                            &_count_card_bitmaps[i],
 670                            task_queue, _task_queues);
 671 
 672     _accum_task_vtime[i] = 0.0;
 673   }
 674 
 675   // Calculate the card number for the bottom of the heap. Used
 676   // in biasing indexes into the accounting card bitmaps.
 677   _heap_bottom_card_num =
 678     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 679                                 CardTableModRefBS::card_shift);
 680 
 681   // Clear all the liveness counting data
 682   clear_all_count_data();
 683 
 684   // so that the call below can read a sensible value
 685   _heap_start = g1h->reserved_region().start();
 686   set_non_marking_state();
 687   _completed_initialization = true;
 688 }
 689 
 690 void ConcurrentMark::reset() {
 691   // Starting values for these two. This should be called in a STW
 692   // phase.
 693   MemRegion reserved = _g1h->g1_reserved();
 694   _heap_start = reserved.start();
 695   _heap_end   = reserved.end();
 696 
 697   // Separated the asserts so that we know which one fires.
 698   assert(_heap_start != NULL, "heap bounds should look ok");
 699   assert(_heap_end != NULL, "heap bounds should look ok");
 700   assert(_heap_start < _heap_end, "heap bounds should look ok");
 701 
 702   // Reset all the marking data structures and any necessary flags
 703   reset_marking_state();
 704 
 705   if (verbose_low()) {
 706     gclog_or_tty->print_cr("[global] resetting");
 707   }
 708 
 709   // We do reset all of them, since different phases will use
 710   // different number of active threads. So, it's easiest to have all
 711   // of them ready.
 712   for (uint i = 0; i < _max_worker_id; ++i) {
 713     _tasks[i]->reset(_nextMarkBitMap);
 714   }
 715 
 716   // we need this to make sure that the flag is on during the evac
 717   // pause with initial mark piggy-backed
 718   set_concurrent_marking_in_progress();
 719 }
 720 
 721 
 722 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 723   _markStack.set_should_expand();
 724   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 725   if (clear_overflow) {
 726     clear_has_overflown();
 727   } else {
 728     assert(has_overflown(), "pre-condition");
 729   }
 730   _finger = _heap_start;
 731 
 732   for (uint i = 0; i < _max_worker_id; ++i) {
 733     CMTaskQueue* queue = _task_queues->queue(i);
 734     queue->set_empty();
 735   }
 736 }
 737 
 738 void ConcurrentMark::set_concurrency(uint active_tasks) {
 739   assert(active_tasks <= _max_worker_id, "we should not have more");
 740 
 741   _active_tasks = active_tasks;
 742   // Need to update the three data structures below according to the
 743   // number of active threads for this phase.
 744   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 745   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 746   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 747 }
 748 
 749 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 750   set_concurrency(active_tasks);
 751 
 752   _concurrent = concurrent;
 753   // We propagate this to all tasks, not just the active ones.
 754   for (uint i = 0; i < _max_worker_id; ++i)
 755     _tasks[i]->set_concurrent(concurrent);
 756 
 757   if (concurrent) {
 758     set_concurrent_marking_in_progress();
 759   } else {
 760     // We currently assume that the concurrent flag has been set to
 761     // false before we start remark. At this point we should also be
 762     // in a STW phase.
 763     assert(!concurrent_marking_in_progress(), "invariant");
 764     assert(out_of_regions(),
 765            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 766            p2i(_finger), p2i(_heap_end));
 767   }
 768 }
 769 
 770 void ConcurrentMark::set_non_marking_state() {
 771   // We set the global marking state to some default values when we're
 772   // not doing marking.
 773   reset_marking_state();
 774   _active_tasks = 0;
 775   clear_concurrent_marking_in_progress();
 776 }
 777 
 778 ConcurrentMark::~ConcurrentMark() {
 779   // The ConcurrentMark instance is never freed.
 780   ShouldNotReachHere();
 781 }
 782 
 783 void ConcurrentMark::clearNextBitmap() {
 784   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 785 
 786   // Make sure that the concurrent mark thread looks to still be in
 787   // the current cycle.
 788   guarantee(cmThread()->during_cycle(), "invariant");
 789 
 790   // We are finishing up the current cycle by clearing the next
 791   // marking bitmap and getting it ready for the next cycle. During
 792   // this time no other cycle can start. So, let's make sure that this
 793   // is the case.
 794   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 795 
 796   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 797   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 798   _parallel_workers->run_task(&task);
 799 
 800   // Clear the liveness counting data. If the marking has been aborted, the abort()
 801   // call already did that.
 802   if (cl.complete()) {
 803     clear_all_count_data();
 804   }
 805 
 806   // Repeat the asserts from above.
 807   guarantee(cmThread()->during_cycle(), "invariant");
 808   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 809 }
 810 
 811 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 812   CMBitMap* _bitmap;
 813   bool _error;
 814  public:
 815   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 816   }
 817 
 818   virtual bool doHeapRegion(HeapRegion* r) {
 819     // This closure can be called concurrently to the mutator, so we must make sure
 820     // that the result of the getNextMarkedWordAddress() call is compared to the
 821     // value passed to it as limit to detect any found bits.
 822     // We can use the region's orig_end() for the limit and the comparison value
 823     // as it always contains the "real" end of the region that never changes and
 824     // has no side effects.
 825     // Due to the latter, there can also be no problem with the compiler generating
 826     // reloads of the orig_end() call.
 827     HeapWord* end = r->orig_end();
 828     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 829   }
 830 };
 831 
 832 bool ConcurrentMark::nextMarkBitmapIsClear() {
 833   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 834   _g1h->heap_region_iterate(&cl);
 835   return cl.complete();
 836 }
 837 
 838 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 839 public:
 840   bool doHeapRegion(HeapRegion* r) {
 841     if (!r->is_continues_humongous()) {
 842       r->note_start_of_marking();
 843     }
 844     return false;
 845   }
 846 };
 847 
 848 void ConcurrentMark::checkpointRootsInitialPre() {
 849   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 850   G1CollectorPolicy* g1p = g1h->g1_policy();
 851 
 852   _has_aborted = false;
 853 
 854   // Initialize marking structures. This has to be done in a STW phase.
 855   reset();
 856 
 857   // For each region note start of marking.
 858   NoteStartOfMarkHRClosure startcl;
 859   g1h->heap_region_iterate(&startcl);
 860 }
 861 
 862 
 863 void ConcurrentMark::checkpointRootsInitialPost() {
 864   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 865 
 866   // If we force an overflow during remark, the remark operation will
 867   // actually abort and we'll restart concurrent marking. If we always
 868   // force an overflow during remark we'll never actually complete the
 869   // marking phase. So, we initialize this here, at the start of the
 870   // cycle, so that at the remaining overflow number will decrease at
 871   // every remark and we'll eventually not need to cause one.
 872   force_overflow_stw()->init();
 873 
 874   // Start Concurrent Marking weak-reference discovery.
 875   ReferenceProcessor* rp = g1h->ref_processor_cm();
 876   // enable ("weak") refs discovery
 877   rp->enable_discovery();
 878   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 879 
 880   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 881   // This is the start of  the marking cycle, we're expected all
 882   // threads to have SATB queues with active set to false.
 883   satb_mq_set.set_active_all_threads(true, /* new active value */
 884                                      false /* expected_active */);
 885 
 886   _root_regions.prepare_for_scan();
 887 
 888   // update_g1_committed() will be called at the end of an evac pause
 889   // when marking is on. So, it's also called at the end of the
 890   // initial-mark pause to update the heap end, if the heap expands
 891   // during it. No need to call it here.
 892 }
 893 
 894 /*
 895  * Notice that in the next two methods, we actually leave the STS
 896  * during the barrier sync and join it immediately afterwards. If we
 897  * do not do this, the following deadlock can occur: one thread could
 898  * be in the barrier sync code, waiting for the other thread to also
 899  * sync up, whereas another one could be trying to yield, while also
 900  * waiting for the other threads to sync up too.
 901  *
 902  * Note, however, that this code is also used during remark and in
 903  * this case we should not attempt to leave / enter the STS, otherwise
 904  * we'll either hit an assert (debug / fastdebug) or deadlock
 905  * (product). So we should only leave / enter the STS if we are
 906  * operating concurrently.
 907  *
 908  * Because the thread that does the sync barrier has left the STS, it
 909  * is possible to be suspended for a Full GC or an evacuation pause
 910  * could occur. This is actually safe, since the entering the sync
 911  * barrier is one of the last things do_marking_step() does, and it
 912  * doesn't manipulate any data structures afterwards.
 913  */
 914 
 915 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 916   bool barrier_aborted;
 917 
 918   if (verbose_low()) {
 919     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 920   }
 921 
 922   {
 923     SuspendibleThreadSetLeaver sts_leave(concurrent());
 924     barrier_aborted = !_first_overflow_barrier_sync.enter();
 925   }
 926 
 927   // at this point everyone should have synced up and not be doing any
 928   // more work
 929 
 930   if (verbose_low()) {
 931     if (barrier_aborted) {
 932       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 933     } else {
 934       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 935     }
 936   }
 937 
 938   if (barrier_aborted) {
 939     // If the barrier aborted we ignore the overflow condition and
 940     // just abort the whole marking phase as quickly as possible.
 941     return;
 942   }
 943 
 944   // If we're executing the concurrent phase of marking, reset the marking
 945   // state; otherwise the marking state is reset after reference processing,
 946   // during the remark pause.
 947   // If we reset here as a result of an overflow during the remark we will
 948   // see assertion failures from any subsequent set_concurrency_and_phase()
 949   // calls.
 950   if (concurrent()) {
 951     // let the task associated with with worker 0 do this
 952     if (worker_id == 0) {
 953       // task 0 is responsible for clearing the global data structures
 954       // We should be here because of an overflow. During STW we should
 955       // not clear the overflow flag since we rely on it being true when
 956       // we exit this method to abort the pause and restart concurrent
 957       // marking.
 958       reset_marking_state(true /* clear_overflow */);
 959       force_overflow()->update();
 960 
 961       if (G1Log::fine()) {
 962         gclog_or_tty->gclog_stamp();
 963         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 964       }
 965     }
 966   }
 967 
 968   // after this, each task should reset its own data structures then
 969   // then go into the second barrier
 970 }
 971 
 972 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 973   bool barrier_aborted;
 974 
 975   if (verbose_low()) {
 976     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 977   }
 978 
 979   {
 980     SuspendibleThreadSetLeaver sts_leave(concurrent());
 981     barrier_aborted = !_second_overflow_barrier_sync.enter();
 982   }
 983 
 984   // at this point everything should be re-initialized and ready to go
 985 
 986   if (verbose_low()) {
 987     if (barrier_aborted) {
 988       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
 989     } else {
 990       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 991     }
 992   }
 993 }
 994 
 995 #ifndef PRODUCT
 996 void ForceOverflowSettings::init() {
 997   _num_remaining = G1ConcMarkForceOverflow;
 998   _force = false;
 999   update();
1000 }
1001 
1002 void ForceOverflowSettings::update() {
1003   if (_num_remaining > 0) {
1004     _num_remaining -= 1;
1005     _force = true;
1006   } else {
1007     _force = false;
1008   }
1009 }
1010 
1011 bool ForceOverflowSettings::should_force() {
1012   if (_force) {
1013     _force = false;
1014     return true;
1015   } else {
1016     return false;
1017   }
1018 }
1019 #endif // !PRODUCT
1020 
1021 class CMConcurrentMarkingTask: public AbstractGangTask {
1022 private:
1023   ConcurrentMark*       _cm;
1024   ConcurrentMarkThread* _cmt;
1025 
1026 public:
1027   void work(uint worker_id) {
1028     assert(Thread::current()->is_ConcurrentGC_thread(),
1029            "this should only be done by a conc GC thread");
1030     ResourceMark rm;
1031 
1032     double start_vtime = os::elapsedVTime();
1033 
1034     {
1035       SuspendibleThreadSetJoiner sts_join;
1036 
1037       assert(worker_id < _cm->active_tasks(), "invariant");
1038       CMTask* the_task = _cm->task(worker_id);
1039       the_task->record_start_time();
1040       if (!_cm->has_aborted()) {
1041         do {
1042           double start_vtime_sec = os::elapsedVTime();
1043           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1044 
1045           the_task->do_marking_step(mark_step_duration_ms,
1046                                     true  /* do_termination */,
1047                                     false /* is_serial*/);
1048 
1049           double end_vtime_sec = os::elapsedVTime();
1050           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1051           _cm->clear_has_overflown();
1052 
1053           _cm->do_yield_check(worker_id);
1054 
1055           jlong sleep_time_ms;
1056           if (!_cm->has_aborted() && the_task->has_aborted()) {
1057             sleep_time_ms =
1058               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1059             {
1060               SuspendibleThreadSetLeaver sts_leave;
1061               os::sleep(Thread::current(), sleep_time_ms, false);
1062             }
1063           }
1064         } while (!_cm->has_aborted() && the_task->has_aborted());
1065       }
1066       the_task->record_end_time();
1067       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1068     }
1069 
1070     double end_vtime = os::elapsedVTime();
1071     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1072   }
1073 
1074   CMConcurrentMarkingTask(ConcurrentMark* cm,
1075                           ConcurrentMarkThread* cmt) :
1076       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1077 
1078   ~CMConcurrentMarkingTask() { }
1079 };
1080 
1081 // Calculates the number of active workers for a concurrent
1082 // phase.
1083 uint ConcurrentMark::calc_parallel_marking_threads() {
1084   uint n_conc_workers = 0;
1085   if (!UseDynamicNumberOfGCThreads ||
1086       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1087        !ForceDynamicNumberOfGCThreads)) {
1088     n_conc_workers = max_parallel_marking_threads();
1089   } else {
1090     n_conc_workers =
1091       AdaptiveSizePolicy::calc_default_active_workers(
1092                                    max_parallel_marking_threads(),
1093                                    1, /* Minimum workers */
1094                                    parallel_marking_threads(),
1095                                    Threads::number_of_non_daemon_threads());
1096     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1097     // that scaling has already gone into "_max_parallel_marking_threads".
1098   }
1099   assert(n_conc_workers > 0, "Always need at least 1");
1100   return n_conc_workers;
1101 }
1102 
1103 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1104   // Currently, only survivors can be root regions.
1105   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1106   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1107 
1108   const uintx interval = PrefetchScanIntervalInBytes;
1109   HeapWord* curr = hr->bottom();
1110   const HeapWord* end = hr->top();
1111   while (curr < end) {
1112     Prefetch::read(curr, interval);
1113     oop obj = oop(curr);
1114     int size = obj->oop_iterate_size(&cl);
1115     assert(size == obj->size(), "sanity");
1116     curr += size;
1117   }
1118 }
1119 
1120 class CMRootRegionScanTask : public AbstractGangTask {
1121 private:
1122   ConcurrentMark* _cm;
1123 
1124 public:
1125   CMRootRegionScanTask(ConcurrentMark* cm) :
1126     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1127 
1128   void work(uint worker_id) {
1129     assert(Thread::current()->is_ConcurrentGC_thread(),
1130            "this should only be done by a conc GC thread");
1131 
1132     CMRootRegions* root_regions = _cm->root_regions();
1133     HeapRegion* hr = root_regions->claim_next();
1134     while (hr != NULL) {
1135       _cm->scanRootRegion(hr, worker_id);
1136       hr = root_regions->claim_next();
1137     }
1138   }
1139 };
1140 
1141 void ConcurrentMark::scanRootRegions() {
1142   double scan_start = os::elapsedTime();
1143 
1144   // Start of concurrent marking.
1145   ClassLoaderDataGraph::clear_claimed_marks();
1146 
1147   // scan_in_progress() will have been set to true only if there was
1148   // at least one root region to scan. So, if it's false, we
1149   // should not attempt to do any further work.
1150   if (root_regions()->scan_in_progress()) {
1151     if (G1Log::fine()) {
1152       gclog_or_tty->gclog_stamp();
1153       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1154     }
1155 
1156     _parallel_marking_threads = calc_parallel_marking_threads();
1157     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1158            "Maximum number of marking threads exceeded");
1159     uint active_workers = MAX2(1U, parallel_marking_threads());
1160 
1161     CMRootRegionScanTask task(this);
1162     _parallel_workers->set_active_workers(active_workers);
1163     _parallel_workers->run_task(&task);
1164 
1165     if (G1Log::fine()) {
1166       gclog_or_tty->gclog_stamp();
1167       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1168     }
1169 
1170     // It's possible that has_aborted() is true here without actually
1171     // aborting the survivor scan earlier. This is OK as it's
1172     // mainly used for sanity checking.
1173     root_regions()->scan_finished();
1174   }
1175 }
1176 
1177 void ConcurrentMark::markFromRoots() {
1178   // we might be tempted to assert that:
1179   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1180   //        "inconsistent argument?");
1181   // However that wouldn't be right, because it's possible that
1182   // a safepoint is indeed in progress as a younger generation
1183   // stop-the-world GC happens even as we mark in this generation.
1184 
1185   _restart_for_overflow = false;
1186   force_overflow_conc()->init();
1187 
1188   // _g1h has _n_par_threads
1189   _parallel_marking_threads = calc_parallel_marking_threads();
1190   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1191     "Maximum number of marking threads exceeded");
1192 
1193   uint active_workers = MAX2(1U, parallel_marking_threads());
1194   assert(active_workers > 0, "Should have been set");
1195 
1196   // Parallel task terminator is set in "set_concurrency_and_phase()"
1197   set_concurrency_and_phase(active_workers, true /* concurrent */);
1198 
1199   CMConcurrentMarkingTask markingTask(this, cmThread());
1200   _parallel_workers->set_active_workers(active_workers);
1201   _parallel_workers->run_task(&markingTask);
1202   print_stats();
1203 }
1204 
1205 // Helper class to get rid of some boilerplate code.
1206 class G1CMTraceTime : public StackObj {
1207   GCTraceTimeImpl _gc_trace_time;
1208   static bool doit_and_prepend(bool doit) {
1209     if (doit) {
1210       gclog_or_tty->put(' ');
1211     }
1212     return doit;
1213   }
1214 
1215  public:
1216   G1CMTraceTime(const char* title, bool doit)
1217     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1218   }
1219 };
1220 
1221 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1222   // world is stopped at this checkpoint
1223   assert(SafepointSynchronize::is_at_safepoint(),
1224          "world should be stopped");
1225 
1226   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1227 
1228   // If a full collection has happened, we shouldn't do this.
1229   if (has_aborted()) {
1230     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1231     return;
1232   }
1233 
1234   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1235 
1236   if (VerifyDuringGC) {
1237     HandleMark hm;  // handle scope
1238     g1h->prepare_for_verify();
1239     Universe::verify(VerifyOption_G1UsePrevMarking,
1240                      " VerifyDuringGC:(before)");
1241   }
1242   g1h->check_bitmaps("Remark Start");
1243 
1244   G1CollectorPolicy* g1p = g1h->g1_policy();
1245   g1p->record_concurrent_mark_remark_start();
1246 
1247   double start = os::elapsedTime();
1248 
1249   checkpointRootsFinalWork();
1250 
1251   double mark_work_end = os::elapsedTime();
1252 
1253   weakRefsWork(clear_all_soft_refs);
1254 
1255   if (has_overflown()) {
1256     // Oops.  We overflowed.  Restart concurrent marking.
1257     _restart_for_overflow = true;
1258     if (G1TraceMarkStackOverflow) {
1259       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1260     }
1261 
1262     // Verify the heap w.r.t. the previous marking bitmap.
1263     if (VerifyDuringGC) {
1264       HandleMark hm;  // handle scope
1265       g1h->prepare_for_verify();
1266       Universe::verify(VerifyOption_G1UsePrevMarking,
1267                        " VerifyDuringGC:(overflow)");
1268     }
1269 
1270     // Clear the marking state because we will be restarting
1271     // marking due to overflowing the global mark stack.
1272     reset_marking_state();
1273   } else {
1274     {
1275       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1276 
1277       // Aggregate the per-task counting data that we have accumulated
1278       // while marking.
1279       aggregate_count_data();
1280     }
1281 
1282     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1283     // We're done with marking.
1284     // This is the end of  the marking cycle, we're expected all
1285     // threads to have SATB queues with active set to true.
1286     satb_mq_set.set_active_all_threads(false, /* new active value */
1287                                        true /* expected_active */);
1288 
1289     if (VerifyDuringGC) {
1290       HandleMark hm;  // handle scope
1291       g1h->prepare_for_verify();
1292       Universe::verify(VerifyOption_G1UseNextMarking,
1293                        " VerifyDuringGC:(after)");
1294     }
1295     g1h->check_bitmaps("Remark End");
1296     assert(!restart_for_overflow(), "sanity");
1297     // Completely reset the marking state since marking completed
1298     set_non_marking_state();
1299   }
1300 
1301   // Expand the marking stack, if we have to and if we can.
1302   if (_markStack.should_expand()) {
1303     _markStack.expand();
1304   }
1305 
1306   // Statistics
1307   double now = os::elapsedTime();
1308   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1309   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1310   _remark_times.add((now - start) * 1000.0);
1311 
1312   g1p->record_concurrent_mark_remark_end();
1313 
1314   G1CMIsAliveClosure is_alive(g1h);
1315   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1316 }
1317 
1318 // Base class of the closures that finalize and verify the
1319 // liveness counting data.
1320 class CMCountDataClosureBase: public HeapRegionClosure {
1321 protected:
1322   G1CollectedHeap* _g1h;
1323   ConcurrentMark* _cm;
1324   CardTableModRefBS* _ct_bs;
1325 
1326   BitMap* _region_bm;
1327   BitMap* _card_bm;
1328 
1329   // Takes a region that's not empty (i.e., it has at least one
1330   // live object in it and sets its corresponding bit on the region
1331   // bitmap to 1. If the region is "starts humongous" it will also set
1332   // to 1 the bits on the region bitmap that correspond to its
1333   // associated "continues humongous" regions.
1334   void set_bit_for_region(HeapRegion* hr) {
1335     assert(!hr->is_continues_humongous(), "should have filtered those out");
1336 
1337     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1338     if (!hr->is_starts_humongous()) {
1339       // Normal (non-humongous) case: just set the bit.
1340       _region_bm->par_at_put(index, true);
1341     } else {
1342       // Starts humongous case: calculate how many regions are part of
1343       // this humongous region and then set the bit range.
1344       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1345       _region_bm->par_at_put_range(index, end_index, true);
1346     }
1347   }
1348 
1349 public:
1350   CMCountDataClosureBase(G1CollectedHeap* g1h,
1351                          BitMap* region_bm, BitMap* card_bm):
1352     _g1h(g1h), _cm(g1h->concurrent_mark()),
1353     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1354     _region_bm(region_bm), _card_bm(card_bm) { }
1355 };
1356 
1357 // Closure that calculates the # live objects per region. Used
1358 // for verification purposes during the cleanup pause.
1359 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1360   CMBitMapRO* _bm;
1361   size_t _region_marked_bytes;
1362 
1363 public:
1364   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1365                          BitMap* region_bm, BitMap* card_bm) :
1366     CMCountDataClosureBase(g1h, region_bm, card_bm),
1367     _bm(bm), _region_marked_bytes(0) { }
1368 
1369   bool doHeapRegion(HeapRegion* hr) {
1370 
1371     if (hr->is_continues_humongous()) {
1372       // We will ignore these here and process them when their
1373       // associated "starts humongous" region is processed (see
1374       // set_bit_for_heap_region()). Note that we cannot rely on their
1375       // associated "starts humongous" region to have their bit set to
1376       // 1 since, due to the region chunking in the parallel region
1377       // iteration, a "continues humongous" region might be visited
1378       // before its associated "starts humongous".
1379       return false;
1380     }
1381 
1382     HeapWord* ntams = hr->next_top_at_mark_start();
1383     HeapWord* start = hr->bottom();
1384 
1385     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1386            "Preconditions not met - "
1387            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1388            p2i(start), p2i(ntams), p2i(hr->end()));
1389 
1390     // Find the first marked object at or after "start".
1391     start = _bm->getNextMarkedWordAddress(start, ntams);
1392 
1393     size_t marked_bytes = 0;
1394 
1395     while (start < ntams) {
1396       oop obj = oop(start);
1397       int obj_sz = obj->size();
1398       HeapWord* obj_end = start + obj_sz;
1399 
1400       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1401       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1402 
1403       // Note: if we're looking at the last region in heap - obj_end
1404       // could be actually just beyond the end of the heap; end_idx
1405       // will then correspond to a (non-existent) card that is also
1406       // just beyond the heap.
1407       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1408         // end of object is not card aligned - increment to cover
1409         // all the cards spanned by the object
1410         end_idx += 1;
1411       }
1412 
1413       // Set the bits in the card BM for the cards spanned by this object.
1414       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1415 
1416       // Add the size of this object to the number of marked bytes.
1417       marked_bytes += (size_t)obj_sz * HeapWordSize;
1418 
1419       // Find the next marked object after this one.
1420       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1421     }
1422 
1423     // Mark the allocated-since-marking portion...
1424     HeapWord* top = hr->top();
1425     if (ntams < top) {
1426       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1427       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1428 
1429       // Note: if we're looking at the last region in heap - top
1430       // could be actually just beyond the end of the heap; end_idx
1431       // will then correspond to a (non-existent) card that is also
1432       // just beyond the heap.
1433       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1434         // end of object is not card aligned - increment to cover
1435         // all the cards spanned by the object
1436         end_idx += 1;
1437       }
1438       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1439 
1440       // This definitely means the region has live objects.
1441       set_bit_for_region(hr);
1442     }
1443 
1444     // Update the live region bitmap.
1445     if (marked_bytes > 0) {
1446       set_bit_for_region(hr);
1447     }
1448 
1449     // Set the marked bytes for the current region so that
1450     // it can be queried by a calling verification routine
1451     _region_marked_bytes = marked_bytes;
1452 
1453     return false;
1454   }
1455 
1456   size_t region_marked_bytes() const { return _region_marked_bytes; }
1457 };
1458 
1459 // Heap region closure used for verifying the counting data
1460 // that was accumulated concurrently and aggregated during
1461 // the remark pause. This closure is applied to the heap
1462 // regions during the STW cleanup pause.
1463 
1464 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1465   G1CollectedHeap* _g1h;
1466   ConcurrentMark* _cm;
1467   CalcLiveObjectsClosure _calc_cl;
1468   BitMap* _region_bm;   // Region BM to be verified
1469   BitMap* _card_bm;     // Card BM to be verified
1470   bool _verbose;        // verbose output?
1471 
1472   BitMap* _exp_region_bm; // Expected Region BM values
1473   BitMap* _exp_card_bm;   // Expected card BM values
1474 
1475   int _failures;
1476 
1477 public:
1478   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1479                                 BitMap* region_bm,
1480                                 BitMap* card_bm,
1481                                 BitMap* exp_region_bm,
1482                                 BitMap* exp_card_bm,
1483                                 bool verbose) :
1484     _g1h(g1h), _cm(g1h->concurrent_mark()),
1485     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1486     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1487     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1488     _failures(0) { }
1489 
1490   int failures() const { return _failures; }
1491 
1492   bool doHeapRegion(HeapRegion* hr) {
1493     if (hr->is_continues_humongous()) {
1494       // We will ignore these here and process them when their
1495       // associated "starts humongous" region is processed (see
1496       // set_bit_for_heap_region()). Note that we cannot rely on their
1497       // associated "starts humongous" region to have their bit set to
1498       // 1 since, due to the region chunking in the parallel region
1499       // iteration, a "continues humongous" region might be visited
1500       // before its associated "starts humongous".
1501       return false;
1502     }
1503 
1504     int failures = 0;
1505 
1506     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1507     // this region and set the corresponding bits in the expected region
1508     // and card bitmaps.
1509     bool res = _calc_cl.doHeapRegion(hr);
1510     assert(res == false, "should be continuing");
1511 
1512     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1513                     Mutex::_no_safepoint_check_flag);
1514 
1515     // Verify the marked bytes for this region.
1516     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1517     size_t act_marked_bytes = hr->next_marked_bytes();
1518 
1519     // We're not OK if expected marked bytes > actual marked bytes. It means
1520     // we have missed accounting some objects during the actual marking.
1521     if (exp_marked_bytes > act_marked_bytes) {
1522       if (_verbose) {
1523         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1524                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1525                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1526       }
1527       failures += 1;
1528     }
1529 
1530     // Verify the bit, for this region, in the actual and expected
1531     // (which was just calculated) region bit maps.
1532     // We're not OK if the bit in the calculated expected region
1533     // bitmap is set and the bit in the actual region bitmap is not.
1534     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1535 
1536     bool expected = _exp_region_bm->at(index);
1537     bool actual = _region_bm->at(index);
1538     if (expected && !actual) {
1539       if (_verbose) {
1540         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1541                                "expected: %s, actual: %s",
1542                                hr->hrm_index(),
1543                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1544       }
1545       failures += 1;
1546     }
1547 
1548     // Verify that the card bit maps for the cards spanned by the current
1549     // region match. We have an error if we have a set bit in the expected
1550     // bit map and the corresponding bit in the actual bitmap is not set.
1551 
1552     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1553     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1554 
1555     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1556       expected = _exp_card_bm->at(i);
1557       actual = _card_bm->at(i);
1558 
1559       if (expected && !actual) {
1560         if (_verbose) {
1561           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1562                                  "expected: %s, actual: %s",
1563                                  hr->hrm_index(), i,
1564                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1565         }
1566         failures += 1;
1567       }
1568     }
1569 
1570     if (failures > 0 && _verbose)  {
1571       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1572                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1573                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1574                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1575     }
1576 
1577     _failures += failures;
1578 
1579     // We could stop iteration over the heap when we
1580     // find the first violating region by returning true.
1581     return false;
1582   }
1583 };
1584 
1585 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1586 protected:
1587   G1CollectedHeap* _g1h;
1588   ConcurrentMark* _cm;
1589   BitMap* _actual_region_bm;
1590   BitMap* _actual_card_bm;
1591 
1592   uint    _n_workers;
1593 
1594   BitMap* _expected_region_bm;
1595   BitMap* _expected_card_bm;
1596 
1597   int  _failures;
1598   bool _verbose;
1599 
1600   HeapRegionClaimer _hrclaimer;
1601 
1602 public:
1603   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1604                             BitMap* region_bm, BitMap* card_bm,
1605                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1606     : AbstractGangTask("G1 verify final counting"),
1607       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1608       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1609       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1610       _failures(0), _verbose(false),
1611       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1612     assert(VerifyDuringGC, "don't call this otherwise");
1613     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1614     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1615 
1616     _verbose = _cm->verbose_medium();
1617   }
1618 
1619   void work(uint worker_id) {
1620     assert(worker_id < _n_workers, "invariant");
1621 
1622     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1623                                             _actual_region_bm, _actual_card_bm,
1624                                             _expected_region_bm,
1625                                             _expected_card_bm,
1626                                             _verbose);
1627 
1628     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1629 
1630     Atomic::add(verify_cl.failures(), &_failures);
1631   }
1632 
1633   int failures() const { return _failures; }
1634 };
1635 
1636 // Closure that finalizes the liveness counting data.
1637 // Used during the cleanup pause.
1638 // Sets the bits corresponding to the interval [NTAMS, top]
1639 // (which contains the implicitly live objects) in the
1640 // card liveness bitmap. Also sets the bit for each region,
1641 // containing live data, in the region liveness bitmap.
1642 
1643 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1644  public:
1645   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1646                               BitMap* region_bm,
1647                               BitMap* card_bm) :
1648     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1649 
1650   bool doHeapRegion(HeapRegion* hr) {
1651 
1652     if (hr->is_continues_humongous()) {
1653       // We will ignore these here and process them when their
1654       // associated "starts humongous" region is processed (see
1655       // set_bit_for_heap_region()). Note that we cannot rely on their
1656       // associated "starts humongous" region to have their bit set to
1657       // 1 since, due to the region chunking in the parallel region
1658       // iteration, a "continues humongous" region might be visited
1659       // before its associated "starts humongous".
1660       return false;
1661     }
1662 
1663     HeapWord* ntams = hr->next_top_at_mark_start();
1664     HeapWord* top   = hr->top();
1665 
1666     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1667 
1668     // Mark the allocated-since-marking portion...
1669     if (ntams < top) {
1670       // This definitely means the region has live objects.
1671       set_bit_for_region(hr);
1672 
1673       // Now set the bits in the card bitmap for [ntams, top)
1674       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1675       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1676 
1677       // Note: if we're looking at the last region in heap - top
1678       // could be actually just beyond the end of the heap; end_idx
1679       // will then correspond to a (non-existent) card that is also
1680       // just beyond the heap.
1681       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1682         // end of object is not card aligned - increment to cover
1683         // all the cards spanned by the object
1684         end_idx += 1;
1685       }
1686 
1687       assert(end_idx <= _card_bm->size(),
1688              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1689              end_idx, _card_bm->size());
1690       assert(start_idx < _card_bm->size(),
1691              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1692              start_idx, _card_bm->size());
1693 
1694       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1695     }
1696 
1697     // Set the bit for the region if it contains live data
1698     if (hr->next_marked_bytes() > 0) {
1699       set_bit_for_region(hr);
1700     }
1701 
1702     return false;
1703   }
1704 };
1705 
1706 class G1ParFinalCountTask: public AbstractGangTask {
1707 protected:
1708   G1CollectedHeap* _g1h;
1709   ConcurrentMark* _cm;
1710   BitMap* _actual_region_bm;
1711   BitMap* _actual_card_bm;
1712 
1713   uint    _n_workers;
1714   HeapRegionClaimer _hrclaimer;
1715 
1716 public:
1717   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1718     : AbstractGangTask("G1 final counting"),
1719       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1720       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1721       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1722   }
1723 
1724   void work(uint worker_id) {
1725     assert(worker_id < _n_workers, "invariant");
1726 
1727     FinalCountDataUpdateClosure final_update_cl(_g1h,
1728                                                 _actual_region_bm,
1729                                                 _actual_card_bm);
1730 
1731     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1732   }
1733 };
1734 
1735 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1736   G1CollectedHeap* _g1;
1737   size_t _freed_bytes;
1738   FreeRegionList* _local_cleanup_list;
1739   HeapRegionSetCount _old_regions_removed;
1740   HeapRegionSetCount _humongous_regions_removed;
1741   HRRSCleanupTask* _hrrs_cleanup_task;
1742 
1743 public:
1744   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1745                              FreeRegionList* local_cleanup_list,
1746                              HRRSCleanupTask* hrrs_cleanup_task) :
1747     _g1(g1),
1748     _freed_bytes(0),
1749     _local_cleanup_list(local_cleanup_list),
1750     _old_regions_removed(),
1751     _humongous_regions_removed(),
1752     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1753 
1754   size_t freed_bytes() { return _freed_bytes; }
1755   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1756   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1757 
1758   bool doHeapRegion(HeapRegion *hr) {
1759     if (hr->is_continues_humongous() || hr->is_archive()) {
1760       return false;
1761     }
1762     // We use a claim value of zero here because all regions
1763     // were claimed with value 1 in the FinalCount task.
1764     _g1->reset_gc_time_stamps(hr);
1765     hr->note_end_of_marking();
1766 
1767     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1768       _freed_bytes += hr->used();
1769       hr->set_containing_set(NULL);
1770       if (hr->is_humongous()) {
1771         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1772         _humongous_regions_removed.increment(1u, hr->capacity());
1773         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1774       } else {
1775         _old_regions_removed.increment(1u, hr->capacity());
1776         _g1->free_region(hr, _local_cleanup_list, true);
1777       }
1778     } else {
1779       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1780     }
1781 
1782     return false;
1783   }
1784 };
1785 
1786 class G1ParNoteEndTask: public AbstractGangTask {
1787   friend class G1NoteEndOfConcMarkClosure;
1788 
1789 protected:
1790   G1CollectedHeap* _g1h;
1791   FreeRegionList* _cleanup_list;
1792   HeapRegionClaimer _hrclaimer;
1793 
1794 public:
1795   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1796       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1797   }
1798 
1799   void work(uint worker_id) {
1800     FreeRegionList local_cleanup_list("Local Cleanup List");
1801     HRRSCleanupTask hrrs_cleanup_task;
1802     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1803                                            &hrrs_cleanup_task);
1804     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1805     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1806 
1807     // Now update the lists
1808     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1809     {
1810       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1811       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1812 
1813       // If we iterate over the global cleanup list at the end of
1814       // cleanup to do this printing we will not guarantee to only
1815       // generate output for the newly-reclaimed regions (the list
1816       // might not be empty at the beginning of cleanup; we might
1817       // still be working on its previous contents). So we do the
1818       // printing here, before we append the new regions to the global
1819       // cleanup list.
1820 
1821       G1HRPrinter* hr_printer = _g1h->hr_printer();
1822       if (hr_printer->is_active()) {
1823         FreeRegionListIterator iter(&local_cleanup_list);
1824         while (iter.more_available()) {
1825           HeapRegion* hr = iter.get_next();
1826           hr_printer->cleanup(hr);
1827         }
1828       }
1829 
1830       _cleanup_list->add_ordered(&local_cleanup_list);
1831       assert(local_cleanup_list.is_empty(), "post-condition");
1832 
1833       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1834     }
1835   }
1836 };
1837 
1838 class G1ParScrubRemSetTask: public AbstractGangTask {
1839 protected:
1840   G1RemSet* _g1rs;
1841   BitMap* _region_bm;
1842   BitMap* _card_bm;
1843   HeapRegionClaimer _hrclaimer;
1844 
1845 public:
1846   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1847       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1848   }
1849 
1850   void work(uint worker_id) {
1851     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1852   }
1853 
1854 };
1855 
1856 void ConcurrentMark::cleanup() {
1857   // world is stopped at this checkpoint
1858   assert(SafepointSynchronize::is_at_safepoint(),
1859          "world should be stopped");
1860   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1861 
1862   // If a full collection has happened, we shouldn't do this.
1863   if (has_aborted()) {
1864     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1865     return;
1866   }
1867 
1868   g1h->verify_region_sets_optional();
1869 
1870   if (VerifyDuringGC) {
1871     HandleMark hm;  // handle scope
1872     g1h->prepare_for_verify();
1873     Universe::verify(VerifyOption_G1UsePrevMarking,
1874                      " VerifyDuringGC:(before)");
1875   }
1876   g1h->check_bitmaps("Cleanup Start");
1877 
1878   G1CollectorPolicy* g1p = g1h->g1_policy();
1879   g1p->record_concurrent_mark_cleanup_start();
1880 
1881   double start = os::elapsedTime();
1882 
1883   HeapRegionRemSet::reset_for_cleanup_tasks();
1884 
1885   // Do counting once more with the world stopped for good measure.
1886   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1887 
1888   g1h->workers()->run_task(&g1_par_count_task);
1889 
1890   if (VerifyDuringGC) {
1891     // Verify that the counting data accumulated during marking matches
1892     // that calculated by walking the marking bitmap.
1893 
1894     // Bitmaps to hold expected values
1895     BitMap expected_region_bm(_region_bm.size(), true);
1896     BitMap expected_card_bm(_card_bm.size(), true);
1897 
1898     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1899                                                  &_region_bm,
1900                                                  &_card_bm,
1901                                                  &expected_region_bm,
1902                                                  &expected_card_bm);
1903 
1904     g1h->workers()->run_task(&g1_par_verify_task);
1905 
1906     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1907   }
1908 
1909   size_t start_used_bytes = g1h->used();
1910   g1h->collector_state()->set_mark_in_progress(false);
1911 
1912   double count_end = os::elapsedTime();
1913   double this_final_counting_time = (count_end - start);
1914   _total_counting_time += this_final_counting_time;
1915 
1916   if (G1PrintRegionLivenessInfo) {
1917     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1918     _g1h->heap_region_iterate(&cl);
1919   }
1920 
1921   // Install newly created mark bitMap as "prev".
1922   swapMarkBitMaps();
1923 
1924   g1h->reset_gc_time_stamp();
1925 
1926   uint n_workers = _g1h->workers()->active_workers();
1927 
1928   // Note end of marking in all heap regions.
1929   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1930   g1h->workers()->run_task(&g1_par_note_end_task);
1931   g1h->check_gc_time_stamps();
1932 
1933   if (!cleanup_list_is_empty()) {
1934     // The cleanup list is not empty, so we'll have to process it
1935     // concurrently. Notify anyone else that might be wanting free
1936     // regions that there will be more free regions coming soon.
1937     g1h->set_free_regions_coming();
1938   }
1939 
1940   // call below, since it affects the metric by which we sort the heap
1941   // regions.
1942   if (G1ScrubRemSets) {
1943     double rs_scrub_start = os::elapsedTime();
1944     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1945     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1946 
1947     double rs_scrub_end = os::elapsedTime();
1948     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1949     _total_rs_scrub_time += this_rs_scrub_time;
1950   }
1951 
1952   // this will also free any regions totally full of garbage objects,
1953   // and sort the regions.
1954   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1955 
1956   // Statistics.
1957   double end = os::elapsedTime();
1958   _cleanup_times.add((end - start) * 1000.0);
1959 
1960   if (G1Log::fine()) {
1961     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1962   }
1963 
1964   // Clean up will have freed any regions completely full of garbage.
1965   // Update the soft reference policy with the new heap occupancy.
1966   Universe::update_heap_info_at_gc();
1967 
1968   if (VerifyDuringGC) {
1969     HandleMark hm;  // handle scope
1970     g1h->prepare_for_verify();
1971     Universe::verify(VerifyOption_G1UsePrevMarking,
1972                      " VerifyDuringGC:(after)");
1973   }
1974 
1975   g1h->check_bitmaps("Cleanup End");
1976 
1977   g1h->verify_region_sets_optional();
1978 
1979   // We need to make this be a "collection" so any collection pause that
1980   // races with it goes around and waits for completeCleanup to finish.
1981   g1h->increment_total_collections();
1982 
1983   // Clean out dead classes and update Metaspace sizes.
1984   if (ClassUnloadingWithConcurrentMark) {
1985     ClassLoaderDataGraph::purge();
1986   }
1987   MetaspaceGC::compute_new_size();
1988 
1989   // We reclaimed old regions so we should calculate the sizes to make
1990   // sure we update the old gen/space data.
1991   g1h->g1mm()->update_sizes();
1992   g1h->allocation_context_stats().update_after_mark();
1993 
1994   g1h->trace_heap_after_concurrent_cycle();
1995 }
1996 
1997 void ConcurrentMark::completeCleanup() {
1998   if (has_aborted()) return;
1999 
2000   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2001 
2002   _cleanup_list.verify_optional();
2003   FreeRegionList tmp_free_list("Tmp Free List");
2004 
2005   if (G1ConcRegionFreeingVerbose) {
2006     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2007                            "cleanup list has %u entries",
2008                            _cleanup_list.length());
2009   }
2010 
2011   // No one else should be accessing the _cleanup_list at this point,
2012   // so it is not necessary to take any locks
2013   while (!_cleanup_list.is_empty()) {
2014     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2015     assert(hr != NULL, "Got NULL from a non-empty list");
2016     hr->par_clear();
2017     tmp_free_list.add_ordered(hr);
2018 
2019     // Instead of adding one region at a time to the secondary_free_list,
2020     // we accumulate them in the local list and move them a few at a
2021     // time. This also cuts down on the number of notify_all() calls
2022     // we do during this process. We'll also append the local list when
2023     // _cleanup_list is empty (which means we just removed the last
2024     // region from the _cleanup_list).
2025     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2026         _cleanup_list.is_empty()) {
2027       if (G1ConcRegionFreeingVerbose) {
2028         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2029                                "appending %u entries to the secondary_free_list, "
2030                                "cleanup list still has %u entries",
2031                                tmp_free_list.length(),
2032                                _cleanup_list.length());
2033       }
2034 
2035       {
2036         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2037         g1h->secondary_free_list_add(&tmp_free_list);
2038         SecondaryFreeList_lock->notify_all();
2039       }
2040 #ifndef PRODUCT
2041       if (G1StressConcRegionFreeing) {
2042         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2043           os::sleep(Thread::current(), (jlong) 1, false);
2044         }
2045       }
2046 #endif
2047     }
2048   }
2049   assert(tmp_free_list.is_empty(), "post-condition");
2050 }
2051 
2052 // Supporting Object and Oop closures for reference discovery
2053 // and processing in during marking
2054 
2055 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2056   HeapWord* addr = (HeapWord*)obj;
2057   return addr != NULL &&
2058          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2059 }
2060 
2061 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2062 // Uses the CMTask associated with a worker thread (for serial reference
2063 // processing the CMTask for worker 0 is used) to preserve (mark) and
2064 // trace referent objects.
2065 //
2066 // Using the CMTask and embedded local queues avoids having the worker
2067 // threads operating on the global mark stack. This reduces the risk
2068 // of overflowing the stack - which we would rather avoid at this late
2069 // state. Also using the tasks' local queues removes the potential
2070 // of the workers interfering with each other that could occur if
2071 // operating on the global stack.
2072 
2073 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2074   ConcurrentMark* _cm;
2075   CMTask*         _task;
2076   int             _ref_counter_limit;
2077   int             _ref_counter;
2078   bool            _is_serial;
2079  public:
2080   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2081     _cm(cm), _task(task), _is_serial(is_serial),
2082     _ref_counter_limit(G1RefProcDrainInterval) {
2083     assert(_ref_counter_limit > 0, "sanity");
2084     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2085     _ref_counter = _ref_counter_limit;
2086   }
2087 
2088   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2089   virtual void do_oop(      oop* p) { do_oop_work(p); }
2090 
2091   template <class T> void do_oop_work(T* p) {
2092     if (!_cm->has_overflown()) {
2093       oop obj = oopDesc::load_decode_heap_oop(p);
2094       if (_cm->verbose_high()) {
2095         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2096                                "*" PTR_FORMAT " = " PTR_FORMAT,
2097                                _task->worker_id(), p2i(p), p2i((void*) obj));
2098       }
2099 
2100       _task->deal_with_reference(obj);
2101       _ref_counter--;
2102 
2103       if (_ref_counter == 0) {
2104         // We have dealt with _ref_counter_limit references, pushing them
2105         // and objects reachable from them on to the local stack (and
2106         // possibly the global stack). Call CMTask::do_marking_step() to
2107         // process these entries.
2108         //
2109         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2110         // there's nothing more to do (i.e. we're done with the entries that
2111         // were pushed as a result of the CMTask::deal_with_reference() calls
2112         // above) or we overflow.
2113         //
2114         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2115         // flag while there may still be some work to do. (See the comment at
2116         // the beginning of CMTask::do_marking_step() for those conditions -
2117         // one of which is reaching the specified time target.) It is only
2118         // when CMTask::do_marking_step() returns without setting the
2119         // has_aborted() flag that the marking step has completed.
2120         do {
2121           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2122           _task->do_marking_step(mark_step_duration_ms,
2123                                  false      /* do_termination */,
2124                                  _is_serial);
2125         } while (_task->has_aborted() && !_cm->has_overflown());
2126         _ref_counter = _ref_counter_limit;
2127       }
2128     } else {
2129       if (_cm->verbose_high()) {
2130          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2131       }
2132     }
2133   }
2134 };
2135 
2136 // 'Drain' oop closure used by both serial and parallel reference processing.
2137 // Uses the CMTask associated with a given worker thread (for serial
2138 // reference processing the CMtask for worker 0 is used). Calls the
2139 // do_marking_step routine, with an unbelievably large timeout value,
2140 // to drain the marking data structures of the remaining entries
2141 // added by the 'keep alive' oop closure above.
2142 
2143 class G1CMDrainMarkingStackClosure: public VoidClosure {
2144   ConcurrentMark* _cm;
2145   CMTask*         _task;
2146   bool            _is_serial;
2147  public:
2148   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2149     _cm(cm), _task(task), _is_serial(is_serial) {
2150     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2151   }
2152 
2153   void do_void() {
2154     do {
2155       if (_cm->verbose_high()) {
2156         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2157                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2158       }
2159 
2160       // We call CMTask::do_marking_step() to completely drain the local
2161       // and global marking stacks of entries pushed by the 'keep alive'
2162       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2163       //
2164       // CMTask::do_marking_step() is called in a loop, which we'll exit
2165       // if there's nothing more to do (i.e. we've completely drained the
2166       // entries that were pushed as a a result of applying the 'keep alive'
2167       // closure to the entries on the discovered ref lists) or we overflow
2168       // the global marking stack.
2169       //
2170       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2171       // flag while there may still be some work to do. (See the comment at
2172       // the beginning of CMTask::do_marking_step() for those conditions -
2173       // one of which is reaching the specified time target.) It is only
2174       // when CMTask::do_marking_step() returns without setting the
2175       // has_aborted() flag that the marking step has completed.
2176 
2177       _task->do_marking_step(1000000000.0 /* something very large */,
2178                              true         /* do_termination */,
2179                              _is_serial);
2180     } while (_task->has_aborted() && !_cm->has_overflown());
2181   }
2182 };
2183 
2184 // Implementation of AbstractRefProcTaskExecutor for parallel
2185 // reference processing at the end of G1 concurrent marking
2186 
2187 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2188 private:
2189   G1CollectedHeap* _g1h;
2190   ConcurrentMark*  _cm;
2191   WorkGang*        _workers;
2192   uint             _active_workers;
2193 
2194 public:
2195   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2196                           ConcurrentMark* cm,
2197                           WorkGang* workers,
2198                           uint n_workers) :
2199     _g1h(g1h), _cm(cm),
2200     _workers(workers), _active_workers(n_workers) { }
2201 
2202   // Executes the given task using concurrent marking worker threads.
2203   virtual void execute(ProcessTask& task);
2204   virtual void execute(EnqueueTask& task);
2205 };
2206 
2207 class G1CMRefProcTaskProxy: public AbstractGangTask {
2208   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2209   ProcessTask&     _proc_task;
2210   G1CollectedHeap* _g1h;
2211   ConcurrentMark*  _cm;
2212 
2213 public:
2214   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2215                      G1CollectedHeap* g1h,
2216                      ConcurrentMark* cm) :
2217     AbstractGangTask("Process reference objects in parallel"),
2218     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2219     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2220     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2221   }
2222 
2223   virtual void work(uint worker_id) {
2224     ResourceMark rm;
2225     HandleMark hm;
2226     CMTask* task = _cm->task(worker_id);
2227     G1CMIsAliveClosure g1_is_alive(_g1h);
2228     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2229     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2230 
2231     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2232   }
2233 };
2234 
2235 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2236   assert(_workers != NULL, "Need parallel worker threads.");
2237   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2238 
2239   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2240 
2241   // We need to reset the concurrency level before each
2242   // proxy task execution, so that the termination protocol
2243   // and overflow handling in CMTask::do_marking_step() knows
2244   // how many workers to wait for.
2245   _cm->set_concurrency(_active_workers);
2246   _workers->run_task(&proc_task_proxy);
2247 }
2248 
2249 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2250   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2251   EnqueueTask& _enq_task;
2252 
2253 public:
2254   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2255     AbstractGangTask("Enqueue reference objects in parallel"),
2256     _enq_task(enq_task) { }
2257 
2258   virtual void work(uint worker_id) {
2259     _enq_task.work(worker_id);
2260   }
2261 };
2262 
2263 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2264   assert(_workers != NULL, "Need parallel worker threads.");
2265   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2266 
2267   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2268 
2269   // Not strictly necessary but...
2270   //
2271   // We need to reset the concurrency level before each
2272   // proxy task execution, so that the termination protocol
2273   // and overflow handling in CMTask::do_marking_step() knows
2274   // how many workers to wait for.
2275   _cm->set_concurrency(_active_workers);
2276   _workers->run_task(&enq_task_proxy);
2277 }
2278 
2279 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2280   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2281 }
2282 
2283 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2284   if (has_overflown()) {
2285     // Skip processing the discovered references if we have
2286     // overflown the global marking stack. Reference objects
2287     // only get discovered once so it is OK to not
2288     // de-populate the discovered reference lists. We could have,
2289     // but the only benefit would be that, when marking restarts,
2290     // less reference objects are discovered.
2291     return;
2292   }
2293 
2294   ResourceMark rm;
2295   HandleMark   hm;
2296 
2297   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2298 
2299   // Is alive closure.
2300   G1CMIsAliveClosure g1_is_alive(g1h);
2301 
2302   // Inner scope to exclude the cleaning of the string and symbol
2303   // tables from the displayed time.
2304   {
2305     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2306 
2307     ReferenceProcessor* rp = g1h->ref_processor_cm();
2308 
2309     // See the comment in G1CollectedHeap::ref_processing_init()
2310     // about how reference processing currently works in G1.
2311 
2312     // Set the soft reference policy
2313     rp->setup_policy(clear_all_soft_refs);
2314     assert(_markStack.isEmpty(), "mark stack should be empty");
2315 
2316     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2317     // in serial reference processing. Note these closures are also
2318     // used for serially processing (by the the current thread) the
2319     // JNI references during parallel reference processing.
2320     //
2321     // These closures do not need to synchronize with the worker
2322     // threads involved in parallel reference processing as these
2323     // instances are executed serially by the current thread (e.g.
2324     // reference processing is not multi-threaded and is thus
2325     // performed by the current thread instead of a gang worker).
2326     //
2327     // The gang tasks involved in parallel reference processing create
2328     // their own instances of these closures, which do their own
2329     // synchronization among themselves.
2330     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2331     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2332 
2333     // We need at least one active thread. If reference processing
2334     // is not multi-threaded we use the current (VMThread) thread,
2335     // otherwise we use the work gang from the G1CollectedHeap and
2336     // we utilize all the worker threads we can.
2337     bool processing_is_mt = rp->processing_is_mt();
2338     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2339     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2340 
2341     // Parallel processing task executor.
2342     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2343                                               g1h->workers(), active_workers);
2344     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2345 
2346     // Set the concurrency level. The phase was already set prior to
2347     // executing the remark task.
2348     set_concurrency(active_workers);
2349 
2350     // Set the degree of MT processing here.  If the discovery was done MT,
2351     // the number of threads involved during discovery could differ from
2352     // the number of active workers.  This is OK as long as the discovered
2353     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2354     rp->set_active_mt_degree(active_workers);
2355 
2356     // Process the weak references.
2357     const ReferenceProcessorStats& stats =
2358         rp->process_discovered_references(&g1_is_alive,
2359                                           &g1_keep_alive,
2360                                           &g1_drain_mark_stack,
2361                                           executor,
2362                                           g1h->gc_timer_cm());
2363     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2364 
2365     // The do_oop work routines of the keep_alive and drain_marking_stack
2366     // oop closures will set the has_overflown flag if we overflow the
2367     // global marking stack.
2368 
2369     assert(_markStack.overflow() || _markStack.isEmpty(),
2370             "mark stack should be empty (unless it overflowed)");
2371 
2372     if (_markStack.overflow()) {
2373       // This should have been done already when we tried to push an
2374       // entry on to the global mark stack. But let's do it again.
2375       set_has_overflown();
2376     }
2377 
2378     assert(rp->num_q() == active_workers, "why not");
2379 
2380     rp->enqueue_discovered_references(executor);
2381 
2382     rp->verify_no_references_recorded();
2383     assert(!rp->discovery_enabled(), "Post condition");
2384   }
2385 
2386   if (has_overflown()) {
2387     // We can not trust g1_is_alive if the marking stack overflowed
2388     return;
2389   }
2390 
2391   assert(_markStack.isEmpty(), "Marking should have completed");
2392 
2393   // Unload Klasses, String, Symbols, Code Cache, etc.
2394   {
2395     G1CMTraceTime trace("Unloading", G1Log::finer());
2396 
2397     if (ClassUnloadingWithConcurrentMark) {
2398       bool purged_classes;
2399 
2400       {
2401         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2402         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2403       }
2404 
2405       {
2406         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2407         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2408       }
2409     }
2410 
2411     if (G1StringDedup::is_enabled()) {
2412       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2413       G1StringDedup::unlink(&g1_is_alive);
2414     }
2415   }
2416 }
2417 
2418 void ConcurrentMark::swapMarkBitMaps() {
2419   CMBitMapRO* temp = _prevMarkBitMap;
2420   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2421   _nextMarkBitMap  = (CMBitMap*)  temp;
2422 }
2423 
2424 // Closure for marking entries in SATB buffers.
2425 class CMSATBBufferClosure : public SATBBufferClosure {
2426 private:
2427   CMTask* _task;
2428   G1CollectedHeap* _g1h;
2429 
2430   // This is very similar to CMTask::deal_with_reference, but with
2431   // more relaxed requirements for the argument, so this must be more
2432   // circumspect about treating the argument as an object.
2433   void do_entry(void* entry) const {
2434     _task->increment_refs_reached();
2435     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2436     if (entry < hr->next_top_at_mark_start()) {
2437       // Until we get here, we don't know whether entry refers to a valid
2438       // object; it could instead have been a stale reference.
2439       oop obj = static_cast<oop>(entry);
2440       assert(obj->is_oop(true /* ignore mark word */),
2441              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2442       _task->make_reference_grey(obj, hr);
2443     }
2444   }
2445 
2446 public:
2447   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2448     : _task(task), _g1h(g1h) { }
2449 
2450   virtual void do_buffer(void** buffer, size_t size) {
2451     for (size_t i = 0; i < size; ++i) {
2452       do_entry(buffer[i]);
2453     }
2454   }
2455 };
2456 
2457 class G1RemarkThreadsClosure : public ThreadClosure {
2458   CMSATBBufferClosure _cm_satb_cl;
2459   G1CMOopClosure _cm_cl;
2460   MarkingCodeBlobClosure _code_cl;
2461   int _thread_parity;
2462 
2463  public:
2464   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2465     _cm_satb_cl(task, g1h),
2466     _cm_cl(g1h, g1h->concurrent_mark(), task),
2467     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2468     _thread_parity(Threads::thread_claim_parity()) {}
2469 
2470   void do_thread(Thread* thread) {
2471     if (thread->is_Java_thread()) {
2472       if (thread->claim_oops_do(true, _thread_parity)) {
2473         JavaThread* jt = (JavaThread*)thread;
2474 
2475         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2476         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2477         // * Alive if on the stack of an executing method
2478         // * Weakly reachable otherwise
2479         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2480         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2481         jt->nmethods_do(&_code_cl);
2482 
2483         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2484       }
2485     } else if (thread->is_VM_thread()) {
2486       if (thread->claim_oops_do(true, _thread_parity)) {
2487         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2488       }
2489     }
2490   }
2491 };
2492 
2493 class CMRemarkTask: public AbstractGangTask {
2494 private:
2495   ConcurrentMark* _cm;
2496 public:
2497   void work(uint worker_id) {
2498     // Since all available tasks are actually started, we should
2499     // only proceed if we're supposed to be active.
2500     if (worker_id < _cm->active_tasks()) {
2501       CMTask* task = _cm->task(worker_id);
2502       task->record_start_time();
2503       {
2504         ResourceMark rm;
2505         HandleMark hm;
2506 
2507         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2508         Threads::threads_do(&threads_f);
2509       }
2510 
2511       do {
2512         task->do_marking_step(1000000000.0 /* something very large */,
2513                               true         /* do_termination       */,
2514                               false        /* is_serial            */);
2515       } while (task->has_aborted() && !_cm->has_overflown());
2516       // If we overflow, then we do not want to restart. We instead
2517       // want to abort remark and do concurrent marking again.
2518       task->record_end_time();
2519     }
2520   }
2521 
2522   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2523     AbstractGangTask("Par Remark"), _cm(cm) {
2524     _cm->terminator()->reset_for_reuse(active_workers);
2525   }
2526 };
2527 
2528 void ConcurrentMark::checkpointRootsFinalWork() {
2529   ResourceMark rm;
2530   HandleMark   hm;
2531   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2532 
2533   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2534 
2535   g1h->ensure_parsability(false);
2536 
2537   // this is remark, so we'll use up all active threads
2538   uint active_workers = g1h->workers()->active_workers();
2539   set_concurrency_and_phase(active_workers, false /* concurrent */);
2540   // Leave _parallel_marking_threads at it's
2541   // value originally calculated in the ConcurrentMark
2542   // constructor and pass values of the active workers
2543   // through the gang in the task.
2544 
2545   {
2546     StrongRootsScope srs(active_workers);
2547 
2548     CMRemarkTask remarkTask(this, active_workers);
2549     // We will start all available threads, even if we decide that the
2550     // active_workers will be fewer. The extra ones will just bail out
2551     // immediately.
2552     g1h->workers()->run_task(&remarkTask);
2553   }
2554 
2555   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2556   guarantee(has_overflown() ||
2557             satb_mq_set.completed_buffers_num() == 0,
2558             "Invariant: has_overflown = %s, num buffers = %d",
2559             BOOL_TO_STR(has_overflown()),
2560             satb_mq_set.completed_buffers_num());
2561 
2562   print_stats();
2563 }
2564 
2565 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2566   // Note we are overriding the read-only view of the prev map here, via
2567   // the cast.
2568   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2569 }
2570 
2571 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2572   _nextMarkBitMap->clearRange(mr);
2573 }
2574 
2575 HeapRegion*
2576 ConcurrentMark::claim_region(uint worker_id) {
2577   // "checkpoint" the finger
2578   HeapWord* finger = _finger;
2579 
2580   // _heap_end will not change underneath our feet; it only changes at
2581   // yield points.
2582   while (finger < _heap_end) {
2583     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2584 
2585     // Note on how this code handles humongous regions. In the
2586     // normal case the finger will reach the start of a "starts
2587     // humongous" (SH) region. Its end will either be the end of the
2588     // last "continues humongous" (CH) region in the sequence, or the
2589     // standard end of the SH region (if the SH is the only region in
2590     // the sequence). That way claim_region() will skip over the CH
2591     // regions. However, there is a subtle race between a CM thread
2592     // executing this method and a mutator thread doing a humongous
2593     // object allocation. The two are not mutually exclusive as the CM
2594     // thread does not need to hold the Heap_lock when it gets
2595     // here. So there is a chance that claim_region() will come across
2596     // a free region that's in the progress of becoming a SH or a CH
2597     // region. In the former case, it will either
2598     //   a) Miss the update to the region's end, in which case it will
2599     //      visit every subsequent CH region, will find their bitmaps
2600     //      empty, and do nothing, or
2601     //   b) Will observe the update of the region's end (in which case
2602     //      it will skip the subsequent CH regions).
2603     // If it comes across a region that suddenly becomes CH, the
2604     // scenario will be similar to b). So, the race between
2605     // claim_region() and a humongous object allocation might force us
2606     // to do a bit of unnecessary work (due to some unnecessary bitmap
2607     // iterations) but it should not introduce and correctness issues.
2608     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2609 
2610     // Above heap_region_containing_raw may return NULL as we always scan claim
2611     // until the end of the heap. In this case, just jump to the next region.
2612     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2613 
2614     // Is the gap between reading the finger and doing the CAS too long?
2615     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2616     if (res == finger && curr_region != NULL) {
2617       // we succeeded
2618       HeapWord*   bottom        = curr_region->bottom();
2619       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2620 
2621       if (verbose_low()) {
2622         gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
2623                                "[" PTR_FORMAT ", " PTR_FORMAT "), "
2624                                "limit = " PTR_FORMAT,
2625                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2626       }
2627 
2628       // notice that _finger == end cannot be guaranteed here since,
2629       // someone else might have moved the finger even further
2630       assert(_finger >= end, "the finger should have moved forward");
2631 
2632       if (verbose_low()) {
2633         gclog_or_tty->print_cr("[%u] we were successful with region = "
2634                                PTR_FORMAT, worker_id, p2i(curr_region));
2635       }
2636 
2637       if (limit > bottom) {
2638         if (verbose_low()) {
2639           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
2640                                  "returning it ", worker_id, p2i(curr_region));
2641         }
2642         return curr_region;
2643       } else {
2644         assert(limit == bottom,
2645                "the region limit should be at bottom");
2646         if (verbose_low()) {
2647           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
2648                                  "returning NULL", worker_id, p2i(curr_region));
2649         }
2650         // we return NULL and the caller should try calling
2651         // claim_region() again.
2652         return NULL;
2653       }
2654     } else {
2655       assert(_finger > finger, "the finger should have moved forward");
2656       if (verbose_low()) {
2657         if (curr_region == NULL) {
2658           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2659                                  "global finger = " PTR_FORMAT ", "
2660                                  "our finger = " PTR_FORMAT,
2661                                  worker_id, p2i(_finger), p2i(finger));
2662         } else {
2663           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2664                                  "global finger = " PTR_FORMAT ", "
2665                                  "our finger = " PTR_FORMAT,
2666                                  worker_id, p2i(_finger), p2i(finger));
2667         }
2668       }
2669 
2670       // read it again
2671       finger = _finger;
2672     }
2673   }
2674 
2675   return NULL;
2676 }
2677 
2678 #ifndef PRODUCT
2679 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2680 private:
2681   G1CollectedHeap* _g1h;
2682   const char* _phase;
2683   int _info;
2684 
2685 public:
2686   VerifyNoCSetOops(const char* phase, int info = -1) :
2687     _g1h(G1CollectedHeap::heap()),
2688     _phase(phase),
2689     _info(info)
2690   { }
2691 
2692   void operator()(oop obj) const {
2693     guarantee(obj->is_oop(),
2694               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2695               p2i(obj), _phase, _info);
2696     guarantee(!_g1h->obj_in_cs(obj),
2697               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2698               p2i(obj), _phase, _info);
2699   }
2700 };
2701 
2702 void ConcurrentMark::verify_no_cset_oops() {
2703   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2704   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2705     return;
2706   }
2707 
2708   // Verify entries on the global mark stack
2709   _markStack.iterate(VerifyNoCSetOops("Stack"));
2710 
2711   // Verify entries on the task queues
2712   for (uint i = 0; i < _max_worker_id; ++i) {
2713     CMTaskQueue* queue = _task_queues->queue(i);
2714     queue->iterate(VerifyNoCSetOops("Queue", i));
2715   }
2716 
2717   // Verify the global finger
2718   HeapWord* global_finger = finger();
2719   if (global_finger != NULL && global_finger < _heap_end) {
2720     // The global finger always points to a heap region boundary. We
2721     // use heap_region_containing_raw() to get the containing region
2722     // given that the global finger could be pointing to a free region
2723     // which subsequently becomes continues humongous. If that
2724     // happens, heap_region_containing() will return the bottom of the
2725     // corresponding starts humongous region and the check below will
2726     // not hold any more.
2727     // Since we always iterate over all regions, we might get a NULL HeapRegion
2728     // here.
2729     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2730     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2731               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2732               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2733   }
2734 
2735   // Verify the task fingers
2736   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2737   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2738     CMTask* task = _tasks[i];
2739     HeapWord* task_finger = task->finger();
2740     if (task_finger != NULL && task_finger < _heap_end) {
2741       // See above note on the global finger verification.
2742       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2743       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2744                 !task_hr->in_collection_set(),
2745                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2746                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2747     }
2748   }
2749 }
2750 #endif // PRODUCT
2751 
2752 // Aggregate the counting data that was constructed concurrently
2753 // with marking.
2754 class AggregateCountDataHRClosure: public HeapRegionClosure {
2755   G1CollectedHeap* _g1h;
2756   ConcurrentMark* _cm;
2757   CardTableModRefBS* _ct_bs;
2758   BitMap* _cm_card_bm;
2759   uint _max_worker_id;
2760 
2761  public:
2762   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2763                               BitMap* cm_card_bm,
2764                               uint max_worker_id) :
2765     _g1h(g1h), _cm(g1h->concurrent_mark()),
2766     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2767     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2768 
2769   bool doHeapRegion(HeapRegion* hr) {
2770     if (hr->is_continues_humongous()) {
2771       // We will ignore these here and process them when their
2772       // associated "starts humongous" region is processed.
2773       // Note that we cannot rely on their associated
2774       // "starts humongous" region to have their bit set to 1
2775       // since, due to the region chunking in the parallel region
2776       // iteration, a "continues humongous" region might be visited
2777       // before its associated "starts humongous".
2778       return false;
2779     }
2780 
2781     HeapWord* start = hr->bottom();
2782     HeapWord* limit = hr->next_top_at_mark_start();
2783     HeapWord* end = hr->end();
2784 
2785     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2786            "Preconditions not met - "
2787            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2788            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2789            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2790 
2791     assert(hr->next_marked_bytes() == 0, "Precondition");
2792 
2793     if (start == limit) {
2794       // NTAMS of this region has not been set so nothing to do.
2795       return false;
2796     }
2797 
2798     // 'start' should be in the heap.
2799     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2800     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2801     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2802 
2803     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2804     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2805     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2806 
2807     // If ntams is not card aligned then we bump card bitmap index
2808     // for limit so that we get the all the cards spanned by
2809     // the object ending at ntams.
2810     // Note: if this is the last region in the heap then ntams
2811     // could be actually just beyond the end of the the heap;
2812     // limit_idx will then  correspond to a (non-existent) card
2813     // that is also outside the heap.
2814     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2815       limit_idx += 1;
2816     }
2817 
2818     assert(limit_idx <= end_idx, "or else use atomics");
2819 
2820     // Aggregate the "stripe" in the count data associated with hr.
2821     uint hrm_index = hr->hrm_index();
2822     size_t marked_bytes = 0;
2823 
2824     for (uint i = 0; i < _max_worker_id; i += 1) {
2825       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2826       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2827 
2828       // Fetch the marked_bytes in this region for task i and
2829       // add it to the running total for this region.
2830       marked_bytes += marked_bytes_array[hrm_index];
2831 
2832       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2833       // into the global card bitmap.
2834       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2835 
2836       while (scan_idx < limit_idx) {
2837         assert(task_card_bm->at(scan_idx) == true, "should be");
2838         _cm_card_bm->set_bit(scan_idx);
2839         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2840 
2841         // BitMap::get_next_one_offset() can handle the case when
2842         // its left_offset parameter is greater than its right_offset
2843         // parameter. It does, however, have an early exit if
2844         // left_offset == right_offset. So let's limit the value
2845         // passed in for left offset here.
2846         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2847         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2848       }
2849     }
2850 
2851     // Update the marked bytes for this region.
2852     hr->add_to_marked_bytes(marked_bytes);
2853 
2854     // Next heap region
2855     return false;
2856   }
2857 };
2858 
2859 class G1AggregateCountDataTask: public AbstractGangTask {
2860 protected:
2861   G1CollectedHeap* _g1h;
2862   ConcurrentMark* _cm;
2863   BitMap* _cm_card_bm;
2864   uint _max_worker_id;
2865   uint _active_workers;
2866   HeapRegionClaimer _hrclaimer;
2867 
2868 public:
2869   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2870                            ConcurrentMark* cm,
2871                            BitMap* cm_card_bm,
2872                            uint max_worker_id,
2873                            uint n_workers) :
2874       AbstractGangTask("Count Aggregation"),
2875       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2876       _max_worker_id(max_worker_id),
2877       _active_workers(n_workers),
2878       _hrclaimer(_active_workers) {
2879   }
2880 
2881   void work(uint worker_id) {
2882     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2883 
2884     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2885   }
2886 };
2887 
2888 
2889 void ConcurrentMark::aggregate_count_data() {
2890   uint n_workers = _g1h->workers()->active_workers();
2891 
2892   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2893                                            _max_worker_id, n_workers);
2894 
2895   _g1h->workers()->run_task(&g1_par_agg_task);
2896 }
2897 
2898 // Clear the per-worker arrays used to store the per-region counting data
2899 void ConcurrentMark::clear_all_count_data() {
2900   // Clear the global card bitmap - it will be filled during
2901   // liveness count aggregation (during remark) and the
2902   // final counting task.
2903   _card_bm.clear();
2904 
2905   // Clear the global region bitmap - it will be filled as part
2906   // of the final counting task.
2907   _region_bm.clear();
2908 
2909   uint max_regions = _g1h->max_regions();
2910   assert(_max_worker_id > 0, "uninitialized");
2911 
2912   for (uint i = 0; i < _max_worker_id; i += 1) {
2913     BitMap* task_card_bm = count_card_bitmap_for(i);
2914     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2915 
2916     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2917     assert(marked_bytes_array != NULL, "uninitialized");
2918 
2919     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2920     task_card_bm->clear();
2921   }
2922 }
2923 
2924 void ConcurrentMark::print_stats() {
2925   if (verbose_stats()) {
2926     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2927     for (size_t i = 0; i < _active_tasks; ++i) {
2928       _tasks[i]->print_stats();
2929       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2930     }
2931   }
2932 }
2933 
2934 // abandon current marking iteration due to a Full GC
2935 void ConcurrentMark::abort() {
2936   if (!cmThread()->during_cycle() || _has_aborted) {
2937     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2938     return;
2939   }
2940 
2941   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2942   // concurrent bitmap clearing.
2943   _nextMarkBitMap->clearAll();
2944 
2945   // Note we cannot clear the previous marking bitmap here
2946   // since VerifyDuringGC verifies the objects marked during
2947   // a full GC against the previous bitmap.
2948 
2949   // Clear the liveness counting data
2950   clear_all_count_data();
2951   // Empty mark stack
2952   reset_marking_state();
2953   for (uint i = 0; i < _max_worker_id; ++i) {
2954     _tasks[i]->clear_region_fields();
2955   }
2956   _first_overflow_barrier_sync.abort();
2957   _second_overflow_barrier_sync.abort();
2958   _has_aborted = true;
2959 
2960   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2961   satb_mq_set.abandon_partial_marking();
2962   // This can be called either during or outside marking, we'll read
2963   // the expected_active value from the SATB queue set.
2964   satb_mq_set.set_active_all_threads(
2965                                  false, /* new active value */
2966                                  satb_mq_set.is_active() /* expected_active */);
2967 
2968   _g1h->trace_heap_after_concurrent_cycle();
2969   _g1h->register_concurrent_cycle_end();
2970 }
2971 
2972 static void print_ms_time_info(const char* prefix, const char* name,
2973                                NumberSeq& ns) {
2974   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2975                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2976   if (ns.num() > 0) {
2977     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2978                            prefix, ns.sd(), ns.maximum());
2979   }
2980 }
2981 
2982 void ConcurrentMark::print_summary_info() {
2983   gclog_or_tty->print_cr(" Concurrent marking:");
2984   print_ms_time_info("  ", "init marks", _init_times);
2985   print_ms_time_info("  ", "remarks", _remark_times);
2986   {
2987     print_ms_time_info("     ", "final marks", _remark_mark_times);
2988     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2989 
2990   }
2991   print_ms_time_info("  ", "cleanups", _cleanup_times);
2992   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2993                          _total_counting_time,
2994                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2995                           (double)_cleanup_times.num()
2996                          : 0.0));
2997   if (G1ScrubRemSets) {
2998     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2999                            _total_rs_scrub_time,
3000                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3001                             (double)_cleanup_times.num()
3002                            : 0.0));
3003   }
3004   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3005                          (_init_times.sum() + _remark_times.sum() +
3006                           _cleanup_times.sum())/1000.0);
3007   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3008                 "(%8.2f s marking).",
3009                 cmThread()->vtime_accum(),
3010                 cmThread()->vtime_mark_accum());
3011 }
3012 
3013 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3014   _parallel_workers->print_worker_threads_on(st);
3015 }
3016 
3017 void ConcurrentMark::print_on_error(outputStream* st) const {
3018   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3019       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3020   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3021   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3022 }
3023 
3024 // We take a break if someone is trying to stop the world.
3025 bool ConcurrentMark::do_yield_check(uint worker_id) {
3026   if (SuspendibleThreadSet::should_yield()) {
3027     if (worker_id == 0) {
3028       _g1h->g1_policy()->record_concurrent_pause();
3029     }
3030     SuspendibleThreadSet::yield();
3031     return true;
3032   } else {
3033     return false;
3034   }
3035 }
3036 
3037 #ifndef PRODUCT
3038 // for debugging purposes
3039 void ConcurrentMark::print_finger() {
3040   gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
3041                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3042   for (uint i = 0; i < _max_worker_id; ++i) {
3043     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3044   }
3045   gclog_or_tty->cr();
3046 }
3047 #endif
3048 
3049 // Closure for iteration over bitmaps
3050 class CMBitMapClosure : public BitMapClosure {
3051 private:
3052   // the bitmap that is being iterated over
3053   CMBitMap*                   _nextMarkBitMap;
3054   ConcurrentMark*             _cm;
3055   CMTask*                     _task;
3056 
3057 public:
3058   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3059     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3060 
3061   bool do_bit(size_t offset) {
3062     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3063     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3064     assert( addr < _cm->finger(), "invariant");
3065     assert(addr >= _task->finger(), "invariant");
3066 
3067     // We move that task's local finger along.
3068     _task->move_finger_to(addr);
3069 
3070     _task->scan_object(oop(addr));
3071     // we only partially drain the local queue and global stack
3072     _task->drain_local_queue(true);
3073     _task->drain_global_stack(true);
3074 
3075     // if the has_aborted flag has been raised, we need to bail out of
3076     // the iteration
3077     return !_task->has_aborted();
3078   }
3079 };
3080 
3081 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
3082   ReferenceProcessor* result = NULL;
3083   if (G1UseConcMarkReferenceProcessing) {
3084     result = g1h->ref_processor_cm();
3085     assert(result != NULL, "should not be NULL");
3086   }
3087   return result;
3088 }
3089 
3090 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3091                                ConcurrentMark* cm,
3092                                CMTask* task)
3093   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
3094     _g1h(g1h), _cm(cm), _task(task)
3095 { }
3096 
3097 void CMTask::setup_for_region(HeapRegion* hr) {
3098   assert(hr != NULL,
3099         "claim_region() should have filtered out NULL regions");
3100   assert(!hr->is_continues_humongous(),
3101         "claim_region() should have filtered out continues humongous regions");
3102 
3103   if (_cm->verbose_low()) {
3104     gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3105                            _worker_id, p2i(hr));
3106   }
3107 
3108   _curr_region  = hr;
3109   _finger       = hr->bottom();
3110   update_region_limit();
3111 }
3112 
3113 void CMTask::update_region_limit() {
3114   HeapRegion* hr            = _curr_region;
3115   HeapWord* bottom          = hr->bottom();
3116   HeapWord* limit           = hr->next_top_at_mark_start();
3117 
3118   if (limit == bottom) {
3119     if (_cm->verbose_low()) {
3120       gclog_or_tty->print_cr("[%u] found an empty region "
3121                              "[" PTR_FORMAT ", " PTR_FORMAT ")",
3122                              _worker_id, p2i(bottom), p2i(limit));
3123     }
3124     // The region was collected underneath our feet.
3125     // We set the finger to bottom to ensure that the bitmap
3126     // iteration that will follow this will not do anything.
3127     // (this is not a condition that holds when we set the region up,
3128     // as the region is not supposed to be empty in the first place)
3129     _finger = bottom;
3130   } else if (limit >= _region_limit) {
3131     assert(limit >= _finger, "peace of mind");
3132   } else {
3133     assert(limit < _region_limit, "only way to get here");
3134     // This can happen under some pretty unusual circumstances.  An
3135     // evacuation pause empties the region underneath our feet (NTAMS
3136     // at bottom). We then do some allocation in the region (NTAMS
3137     // stays at bottom), followed by the region being used as a GC
3138     // alloc region (NTAMS will move to top() and the objects
3139     // originally below it will be grayed). All objects now marked in
3140     // the region are explicitly grayed, if below the global finger,
3141     // and we do not need in fact to scan anything else. So, we simply
3142     // set _finger to be limit to ensure that the bitmap iteration
3143     // doesn't do anything.
3144     _finger = limit;
3145   }
3146 
3147   _region_limit = limit;
3148 }
3149 
3150 void CMTask::giveup_current_region() {
3151   assert(_curr_region != NULL, "invariant");
3152   if (_cm->verbose_low()) {
3153     gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3154                            _worker_id, p2i(_curr_region));
3155   }
3156   clear_region_fields();
3157 }
3158 
3159 void CMTask::clear_region_fields() {
3160   // Values for these three fields that indicate that we're not
3161   // holding on to a region.
3162   _curr_region   = NULL;
3163   _finger        = NULL;
3164   _region_limit  = NULL;
3165 }
3166 
3167 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3168   if (cm_oop_closure == NULL) {
3169     assert(_cm_oop_closure != NULL, "invariant");
3170   } else {
3171     assert(_cm_oop_closure == NULL, "invariant");
3172   }
3173   _cm_oop_closure = cm_oop_closure;
3174 }
3175 
3176 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3177   guarantee(nextMarkBitMap != NULL, "invariant");
3178 
3179   if (_cm->verbose_low()) {
3180     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3181   }
3182 
3183   _nextMarkBitMap                = nextMarkBitMap;
3184   clear_region_fields();
3185 
3186   _calls                         = 0;
3187   _elapsed_time_ms               = 0.0;
3188   _termination_time_ms           = 0.0;
3189   _termination_start_time_ms     = 0.0;
3190 }
3191 
3192 bool CMTask::should_exit_termination() {
3193   regular_clock_call();
3194   // This is called when we are in the termination protocol. We should
3195   // quit if, for some reason, this task wants to abort or the global
3196   // stack is not empty (this means that we can get work from it).
3197   return !_cm->mark_stack_empty() || has_aborted();
3198 }
3199 
3200 void CMTask::reached_limit() {
3201   assert(_words_scanned >= _words_scanned_limit ||
3202          _refs_reached >= _refs_reached_limit ,
3203          "shouldn't have been called otherwise");
3204   regular_clock_call();
3205 }
3206 
3207 void CMTask::regular_clock_call() {
3208   if (has_aborted()) return;
3209 
3210   // First, we need to recalculate the words scanned and refs reached
3211   // limits for the next clock call.
3212   recalculate_limits();
3213 
3214   // During the regular clock call we do the following
3215 
3216   // (1) If an overflow has been flagged, then we abort.
3217   if (_cm->has_overflown()) {
3218     set_has_aborted();
3219     return;
3220   }
3221 
3222   // If we are not concurrent (i.e. we're doing remark) we don't need
3223   // to check anything else. The other steps are only needed during
3224   // the concurrent marking phase.
3225   if (!concurrent()) return;
3226 
3227   // (2) If marking has been aborted for Full GC, then we also abort.
3228   if (_cm->has_aborted()) {
3229     set_has_aborted();
3230     return;
3231   }
3232 
3233   double curr_time_ms = os::elapsedVTime() * 1000.0;
3234 
3235   // (4) We check whether we should yield. If we have to, then we abort.
3236   if (SuspendibleThreadSet::should_yield()) {
3237     // We should yield. To do this we abort the task. The caller is
3238     // responsible for yielding.
3239     set_has_aborted();
3240     return;
3241   }
3242 
3243   // (5) We check whether we've reached our time quota. If we have,
3244   // then we abort.
3245   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3246   if (elapsed_time_ms > _time_target_ms) {
3247     set_has_aborted();
3248     _has_timed_out = true;
3249     return;
3250   }
3251 
3252   // (6) Finally, we check whether there are enough completed STAB
3253   // buffers available for processing. If there are, we abort.
3254   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3255   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3256     if (_cm->verbose_low()) {
3257       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3258                              _worker_id);
3259     }
3260     // we do need to process SATB buffers, we'll abort and restart
3261     // the marking task to do so
3262     set_has_aborted();
3263     return;
3264   }
3265 }
3266 
3267 void CMTask::recalculate_limits() {
3268   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3269   _words_scanned_limit      = _real_words_scanned_limit;
3270 
3271   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3272   _refs_reached_limit       = _real_refs_reached_limit;
3273 }
3274 
3275 void CMTask::decrease_limits() {
3276   // This is called when we believe that we're going to do an infrequent
3277   // operation which will increase the per byte scanned cost (i.e. move
3278   // entries to/from the global stack). It basically tries to decrease the
3279   // scanning limit so that the clock is called earlier.
3280 
3281   if (_cm->verbose_medium()) {
3282     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3283   }
3284 
3285   _words_scanned_limit = _real_words_scanned_limit -
3286     3 * words_scanned_period / 4;
3287   _refs_reached_limit  = _real_refs_reached_limit -
3288     3 * refs_reached_period / 4;
3289 }
3290 
3291 void CMTask::move_entries_to_global_stack() {
3292   // local array where we'll store the entries that will be popped
3293   // from the local queue
3294   oop buffer[global_stack_transfer_size];
3295 
3296   int n = 0;
3297   oop obj;
3298   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3299     buffer[n] = obj;
3300     ++n;
3301   }
3302 
3303   if (n > 0) {
3304     // we popped at least one entry from the local queue
3305 
3306     if (!_cm->mark_stack_push(buffer, n)) {
3307       if (_cm->verbose_low()) {
3308         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3309                                _worker_id);
3310       }
3311       set_has_aborted();
3312     } else {
3313       // the transfer was successful
3314 
3315       if (_cm->verbose_medium()) {
3316         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3317                                _worker_id, n);
3318       }
3319     }
3320   }
3321 
3322   // this operation was quite expensive, so decrease the limits
3323   decrease_limits();
3324 }
3325 
3326 void CMTask::get_entries_from_global_stack() {
3327   // local array where we'll store the entries that will be popped
3328   // from the global stack.
3329   oop buffer[global_stack_transfer_size];
3330   int n;
3331   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3332   assert(n <= global_stack_transfer_size,
3333          "we should not pop more than the given limit");
3334   if (n > 0) {
3335     // yes, we did actually pop at least one entry
3336     if (_cm->verbose_medium()) {
3337       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3338                              _worker_id, n);
3339     }
3340     for (int i = 0; i < n; ++i) {
3341       bool success = _task_queue->push(buffer[i]);
3342       // We only call this when the local queue is empty or under a
3343       // given target limit. So, we do not expect this push to fail.
3344       assert(success, "invariant");
3345     }
3346   }
3347 
3348   // this operation was quite expensive, so decrease the limits
3349   decrease_limits();
3350 }
3351 
3352 void CMTask::drain_local_queue(bool partially) {
3353   if (has_aborted()) return;
3354 
3355   // Decide what the target size is, depending whether we're going to
3356   // drain it partially (so that other tasks can steal if they run out
3357   // of things to do) or totally (at the very end).
3358   size_t target_size;
3359   if (partially) {
3360     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3361   } else {
3362     target_size = 0;
3363   }
3364 
3365   if (_task_queue->size() > target_size) {
3366     if (_cm->verbose_high()) {
3367       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3368                              _worker_id, target_size);
3369     }
3370 
3371     oop obj;
3372     bool ret = _task_queue->pop_local(obj);
3373     while (ret) {
3374       if (_cm->verbose_high()) {
3375         gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3376                                p2i((void*) obj));
3377       }
3378 
3379       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3380       assert(!_g1h->is_on_master_free_list(
3381                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3382 
3383       scan_object(obj);
3384 
3385       if (_task_queue->size() <= target_size || has_aborted()) {
3386         ret = false;
3387       } else {
3388         ret = _task_queue->pop_local(obj);
3389       }
3390     }
3391 
3392     if (_cm->verbose_high()) {
3393       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3394                              _worker_id, _task_queue->size());
3395     }
3396   }
3397 }
3398 
3399 void CMTask::drain_global_stack(bool partially) {
3400   if (has_aborted()) return;
3401 
3402   // We have a policy to drain the local queue before we attempt to
3403   // drain the global stack.
3404   assert(partially || _task_queue->size() == 0, "invariant");
3405 
3406   // Decide what the target size is, depending whether we're going to
3407   // drain it partially (so that other tasks can steal if they run out
3408   // of things to do) or totally (at the very end).  Notice that,
3409   // because we move entries from the global stack in chunks or
3410   // because another task might be doing the same, we might in fact
3411   // drop below the target. But, this is not a problem.
3412   size_t target_size;
3413   if (partially) {
3414     target_size = _cm->partial_mark_stack_size_target();
3415   } else {
3416     target_size = 0;
3417   }
3418 
3419   if (_cm->mark_stack_size() > target_size) {
3420     if (_cm->verbose_low()) {
3421       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3422                              _worker_id, target_size);
3423     }
3424 
3425     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3426       get_entries_from_global_stack();
3427       drain_local_queue(partially);
3428     }
3429 
3430     if (_cm->verbose_low()) {
3431       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3432                              _worker_id, _cm->mark_stack_size());
3433     }
3434   }
3435 }
3436 
3437 // SATB Queue has several assumptions on whether to call the par or
3438 // non-par versions of the methods. this is why some of the code is
3439 // replicated. We should really get rid of the single-threaded version
3440 // of the code to simplify things.
3441 void CMTask::drain_satb_buffers() {
3442   if (has_aborted()) return;
3443 
3444   // We set this so that the regular clock knows that we're in the
3445   // middle of draining buffers and doesn't set the abort flag when it
3446   // notices that SATB buffers are available for draining. It'd be
3447   // very counter productive if it did that. :-)
3448   _draining_satb_buffers = true;
3449 
3450   CMSATBBufferClosure satb_cl(this, _g1h);
3451   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3452 
3453   // This keeps claiming and applying the closure to completed buffers
3454   // until we run out of buffers or we need to abort.
3455   while (!has_aborted() &&
3456          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3457     if (_cm->verbose_medium()) {
3458       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3459     }
3460     regular_clock_call();
3461   }
3462 
3463   _draining_satb_buffers = false;
3464 
3465   assert(has_aborted() ||
3466          concurrent() ||
3467          satb_mq_set.completed_buffers_num() == 0, "invariant");
3468 
3469   // again, this was a potentially expensive operation, decrease the
3470   // limits to get the regular clock call early
3471   decrease_limits();
3472 }
3473 
3474 void CMTask::print_stats() {
3475   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3476                          _worker_id, _calls);
3477   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3478                          _elapsed_time_ms, _termination_time_ms);
3479   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3480                          _step_times_ms.num(), _step_times_ms.avg(),
3481                          _step_times_ms.sd());
3482   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3483                          _step_times_ms.maximum(), _step_times_ms.sum());
3484 }
3485 
3486 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3487   return _task_queues->steal(worker_id, hash_seed, obj);
3488 }
3489 
3490 /*****************************************************************************
3491 
3492     The do_marking_step(time_target_ms, ...) method is the building
3493     block of the parallel marking framework. It can be called in parallel
3494     with other invocations of do_marking_step() on different tasks
3495     (but only one per task, obviously) and concurrently with the
3496     mutator threads, or during remark, hence it eliminates the need
3497     for two versions of the code. When called during remark, it will
3498     pick up from where the task left off during the concurrent marking
3499     phase. Interestingly, tasks are also claimable during evacuation
3500     pauses too, since do_marking_step() ensures that it aborts before
3501     it needs to yield.
3502 
3503     The data structures that it uses to do marking work are the
3504     following:
3505 
3506       (1) Marking Bitmap. If there are gray objects that appear only
3507       on the bitmap (this happens either when dealing with an overflow
3508       or when the initial marking phase has simply marked the roots
3509       and didn't push them on the stack), then tasks claim heap
3510       regions whose bitmap they then scan to find gray objects. A
3511       global finger indicates where the end of the last claimed region
3512       is. A local finger indicates how far into the region a task has
3513       scanned. The two fingers are used to determine how to gray an
3514       object (i.e. whether simply marking it is OK, as it will be
3515       visited by a task in the future, or whether it needs to be also
3516       pushed on a stack).
3517 
3518       (2) Local Queue. The local queue of the task which is accessed
3519       reasonably efficiently by the task. Other tasks can steal from
3520       it when they run out of work. Throughout the marking phase, a
3521       task attempts to keep its local queue short but not totally
3522       empty, so that entries are available for stealing by other
3523       tasks. Only when there is no more work, a task will totally
3524       drain its local queue.
3525 
3526       (3) Global Mark Stack. This handles local queue overflow. During
3527       marking only sets of entries are moved between it and the local
3528       queues, as access to it requires a mutex and more fine-grain
3529       interaction with it which might cause contention. If it
3530       overflows, then the marking phase should restart and iterate
3531       over the bitmap to identify gray objects. Throughout the marking
3532       phase, tasks attempt to keep the global mark stack at a small
3533       length but not totally empty, so that entries are available for
3534       popping by other tasks. Only when there is no more work, tasks
3535       will totally drain the global mark stack.
3536 
3537       (4) SATB Buffer Queue. This is where completed SATB buffers are
3538       made available. Buffers are regularly removed from this queue
3539       and scanned for roots, so that the queue doesn't get too
3540       long. During remark, all completed buffers are processed, as
3541       well as the filled in parts of any uncompleted buffers.
3542 
3543     The do_marking_step() method tries to abort when the time target
3544     has been reached. There are a few other cases when the
3545     do_marking_step() method also aborts:
3546 
3547       (1) When the marking phase has been aborted (after a Full GC).
3548 
3549       (2) When a global overflow (on the global stack) has been
3550       triggered. Before the task aborts, it will actually sync up with
3551       the other tasks to ensure that all the marking data structures
3552       (local queues, stacks, fingers etc.)  are re-initialized so that
3553       when do_marking_step() completes, the marking phase can
3554       immediately restart.
3555 
3556       (3) When enough completed SATB buffers are available. The
3557       do_marking_step() method only tries to drain SATB buffers right
3558       at the beginning. So, if enough buffers are available, the
3559       marking step aborts and the SATB buffers are processed at
3560       the beginning of the next invocation.
3561 
3562       (4) To yield. when we have to yield then we abort and yield
3563       right at the end of do_marking_step(). This saves us from a lot
3564       of hassle as, by yielding we might allow a Full GC. If this
3565       happens then objects will be compacted underneath our feet, the
3566       heap might shrink, etc. We save checking for this by just
3567       aborting and doing the yield right at the end.
3568 
3569     From the above it follows that the do_marking_step() method should
3570     be called in a loop (or, otherwise, regularly) until it completes.
3571 
3572     If a marking step completes without its has_aborted() flag being
3573     true, it means it has completed the current marking phase (and
3574     also all other marking tasks have done so and have all synced up).
3575 
3576     A method called regular_clock_call() is invoked "regularly" (in
3577     sub ms intervals) throughout marking. It is this clock method that
3578     checks all the abort conditions which were mentioned above and
3579     decides when the task should abort. A work-based scheme is used to
3580     trigger this clock method: when the number of object words the
3581     marking phase has scanned or the number of references the marking
3582     phase has visited reach a given limit. Additional invocations to
3583     the method clock have been planted in a few other strategic places
3584     too. The initial reason for the clock method was to avoid calling
3585     vtime too regularly, as it is quite expensive. So, once it was in
3586     place, it was natural to piggy-back all the other conditions on it
3587     too and not constantly check them throughout the code.
3588 
3589     If do_termination is true then do_marking_step will enter its
3590     termination protocol.
3591 
3592     The value of is_serial must be true when do_marking_step is being
3593     called serially (i.e. by the VMThread) and do_marking_step should
3594     skip any synchronization in the termination and overflow code.
3595     Examples include the serial remark code and the serial reference
3596     processing closures.
3597 
3598     The value of is_serial must be false when do_marking_step is
3599     being called by any of the worker threads in a work gang.
3600     Examples include the concurrent marking code (CMMarkingTask),
3601     the MT remark code, and the MT reference processing closures.
3602 
3603  *****************************************************************************/
3604 
3605 void CMTask::do_marking_step(double time_target_ms,
3606                              bool do_termination,
3607                              bool is_serial) {
3608   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3609   assert(concurrent() == _cm->concurrent(), "they should be the same");
3610 
3611   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3612   assert(_task_queues != NULL, "invariant");
3613   assert(_task_queue != NULL, "invariant");
3614   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3615 
3616   assert(!_claimed,
3617          "only one thread should claim this task at any one time");
3618 
3619   // OK, this doesn't safeguard again all possible scenarios, as it is
3620   // possible for two threads to set the _claimed flag at the same
3621   // time. But it is only for debugging purposes anyway and it will
3622   // catch most problems.
3623   _claimed = true;
3624 
3625   _start_time_ms = os::elapsedVTime() * 1000.0;
3626 
3627   // If do_stealing is true then do_marking_step will attempt to
3628   // steal work from the other CMTasks. It only makes sense to
3629   // enable stealing when the termination protocol is enabled
3630   // and do_marking_step() is not being called serially.
3631   bool do_stealing = do_termination && !is_serial;
3632 
3633   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3634   _time_target_ms = time_target_ms - diff_prediction_ms;
3635 
3636   // set up the variables that are used in the work-based scheme to
3637   // call the regular clock method
3638   _words_scanned = 0;
3639   _refs_reached  = 0;
3640   recalculate_limits();
3641 
3642   // clear all flags
3643   clear_has_aborted();
3644   _has_timed_out = false;
3645   _draining_satb_buffers = false;
3646 
3647   ++_calls;
3648 
3649   if (_cm->verbose_low()) {
3650     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3651                            "target = %1.2lfms >>>>>>>>>>",
3652                            _worker_id, _calls, _time_target_ms);
3653   }
3654 
3655   // Set up the bitmap and oop closures. Anything that uses them is
3656   // eventually called from this method, so it is OK to allocate these
3657   // statically.
3658   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3659   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3660   set_cm_oop_closure(&cm_oop_closure);
3661 
3662   if (_cm->has_overflown()) {
3663     // This can happen if the mark stack overflows during a GC pause
3664     // and this task, after a yield point, restarts. We have to abort
3665     // as we need to get into the overflow protocol which happens
3666     // right at the end of this task.
3667     set_has_aborted();
3668   }
3669 
3670   // First drain any available SATB buffers. After this, we will not
3671   // look at SATB buffers before the next invocation of this method.
3672   // If enough completed SATB buffers are queued up, the regular clock
3673   // will abort this task so that it restarts.
3674   drain_satb_buffers();
3675   // ...then partially drain the local queue and the global stack
3676   drain_local_queue(true);
3677   drain_global_stack(true);
3678 
3679   do {
3680     if (!has_aborted() && _curr_region != NULL) {
3681       // This means that we're already holding on to a region.
3682       assert(_finger != NULL, "if region is not NULL, then the finger "
3683              "should not be NULL either");
3684 
3685       // We might have restarted this task after an evacuation pause
3686       // which might have evacuated the region we're holding on to
3687       // underneath our feet. Let's read its limit again to make sure
3688       // that we do not iterate over a region of the heap that
3689       // contains garbage (update_region_limit() will also move
3690       // _finger to the start of the region if it is found empty).
3691       update_region_limit();
3692       // We will start from _finger not from the start of the region,
3693       // as we might be restarting this task after aborting half-way
3694       // through scanning this region. In this case, _finger points to
3695       // the address where we last found a marked object. If this is a
3696       // fresh region, _finger points to start().
3697       MemRegion mr = MemRegion(_finger, _region_limit);
3698 
3699       if (_cm->verbose_low()) {
3700         gclog_or_tty->print_cr("[%u] we're scanning part "
3701                                "[" PTR_FORMAT ", " PTR_FORMAT ") "
3702                                "of region " HR_FORMAT,
3703                                _worker_id, p2i(_finger), p2i(_region_limit),
3704                                HR_FORMAT_PARAMS(_curr_region));
3705       }
3706 
3707       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3708              "humongous regions should go around loop once only");
3709 
3710       // Some special cases:
3711       // If the memory region is empty, we can just give up the region.
3712       // If the current region is humongous then we only need to check
3713       // the bitmap for the bit associated with the start of the object,
3714       // scan the object if it's live, and give up the region.
3715       // Otherwise, let's iterate over the bitmap of the part of the region
3716       // that is left.
3717       // If the iteration is successful, give up the region.
3718       if (mr.is_empty()) {
3719         giveup_current_region();
3720         regular_clock_call();
3721       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3722         if (_nextMarkBitMap->isMarked(mr.start())) {
3723           // The object is marked - apply the closure
3724           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3725           bitmap_closure.do_bit(offset);
3726         }
3727         // Even if this task aborted while scanning the humongous object
3728         // we can (and should) give up the current region.
3729         giveup_current_region();
3730         regular_clock_call();
3731       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3732         giveup_current_region();
3733         regular_clock_call();
3734       } else {
3735         assert(has_aborted(), "currently the only way to do so");
3736         // The only way to abort the bitmap iteration is to return
3737         // false from the do_bit() method. However, inside the
3738         // do_bit() method we move the _finger to point to the
3739         // object currently being looked at. So, if we bail out, we
3740         // have definitely set _finger to something non-null.
3741         assert(_finger != NULL, "invariant");
3742 
3743         // Region iteration was actually aborted. So now _finger
3744         // points to the address of the object we last scanned. If we
3745         // leave it there, when we restart this task, we will rescan
3746         // the object. It is easy to avoid this. We move the finger by
3747         // enough to point to the next possible object header (the
3748         // bitmap knows by how much we need to move it as it knows its
3749         // granularity).
3750         assert(_finger < _region_limit, "invariant");
3751         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3752         // Check if bitmap iteration was aborted while scanning the last object
3753         if (new_finger >= _region_limit) {
3754           giveup_current_region();
3755         } else {
3756           move_finger_to(new_finger);
3757         }
3758       }
3759     }
3760     // At this point we have either completed iterating over the
3761     // region we were holding on to, or we have aborted.
3762 
3763     // We then partially drain the local queue and the global stack.
3764     // (Do we really need this?)
3765     drain_local_queue(true);
3766     drain_global_stack(true);
3767 
3768     // Read the note on the claim_region() method on why it might
3769     // return NULL with potentially more regions available for
3770     // claiming and why we have to check out_of_regions() to determine
3771     // whether we're done or not.
3772     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3773       // We are going to try to claim a new region. We should have
3774       // given up on the previous one.
3775       // Separated the asserts so that we know which one fires.
3776       assert(_curr_region  == NULL, "invariant");
3777       assert(_finger       == NULL, "invariant");
3778       assert(_region_limit == NULL, "invariant");
3779       if (_cm->verbose_low()) {
3780         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3781       }
3782       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3783       if (claimed_region != NULL) {
3784         // Yes, we managed to claim one
3785         if (_cm->verbose_low()) {
3786           gclog_or_tty->print_cr("[%u] we successfully claimed "
3787                                  "region " PTR_FORMAT,
3788                                  _worker_id, p2i(claimed_region));
3789         }
3790 
3791         setup_for_region(claimed_region);
3792         assert(_curr_region == claimed_region, "invariant");
3793       }
3794       // It is important to call the regular clock here. It might take
3795       // a while to claim a region if, for example, we hit a large
3796       // block of empty regions. So we need to call the regular clock
3797       // method once round the loop to make sure it's called
3798       // frequently enough.
3799       regular_clock_call();
3800     }
3801 
3802     if (!has_aborted() && _curr_region == NULL) {
3803       assert(_cm->out_of_regions(),
3804              "at this point we should be out of regions");
3805     }
3806   } while ( _curr_region != NULL && !has_aborted());
3807 
3808   if (!has_aborted()) {
3809     // We cannot check whether the global stack is empty, since other
3810     // tasks might be pushing objects to it concurrently.
3811     assert(_cm->out_of_regions(),
3812            "at this point we should be out of regions");
3813 
3814     if (_cm->verbose_low()) {
3815       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
3816     }
3817 
3818     // Try to reduce the number of available SATB buffers so that
3819     // remark has less work to do.
3820     drain_satb_buffers();
3821   }
3822 
3823   // Since we've done everything else, we can now totally drain the
3824   // local queue and global stack.
3825   drain_local_queue(false);
3826   drain_global_stack(false);
3827 
3828   // Attempt at work stealing from other task's queues.
3829   if (do_stealing && !has_aborted()) {
3830     // We have not aborted. This means that we have finished all that
3831     // we could. Let's try to do some stealing...
3832 
3833     // We cannot check whether the global stack is empty, since other
3834     // tasks might be pushing objects to it concurrently.
3835     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3836            "only way to reach here");
3837 
3838     if (_cm->verbose_low()) {
3839       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
3840     }
3841 
3842     while (!has_aborted()) {
3843       oop obj;
3844       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3845         if (_cm->verbose_medium()) {
3846           gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
3847                                  _worker_id, p2i((void*) obj));
3848         }
3849 
3850         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3851                "any stolen object should be marked");
3852         scan_object(obj);
3853 
3854         // And since we're towards the end, let's totally drain the
3855         // local queue and global stack.
3856         drain_local_queue(false);
3857         drain_global_stack(false);
3858       } else {
3859         break;
3860       }
3861     }
3862   }
3863 
3864   // If we are about to wrap up and go into termination, check if we
3865   // should raise the overflow flag.
3866   if (do_termination && !has_aborted()) {
3867     if (_cm->force_overflow()->should_force()) {
3868       _cm->set_has_overflown();
3869       regular_clock_call();
3870     }
3871   }
3872 
3873   // We still haven't aborted. Now, let's try to get into the
3874   // termination protocol.
3875   if (do_termination && !has_aborted()) {
3876     // We cannot check whether the global stack is empty, since other
3877     // tasks might be concurrently pushing objects on it.
3878     // Separated the asserts so that we know which one fires.
3879     assert(_cm->out_of_regions(), "only way to reach here");
3880     assert(_task_queue->size() == 0, "only way to reach here");
3881 
3882     if (_cm->verbose_low()) {
3883       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
3884     }
3885 
3886     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3887 
3888     // The CMTask class also extends the TerminatorTerminator class,
3889     // hence its should_exit_termination() method will also decide
3890     // whether to exit the termination protocol or not.
3891     bool finished = (is_serial ||
3892                      _cm->terminator()->offer_termination(this));
3893     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3894     _termination_time_ms +=
3895       termination_end_time_ms - _termination_start_time_ms;
3896 
3897     if (finished) {
3898       // We're all done.
3899 
3900       if (_worker_id == 0) {
3901         // let's allow task 0 to do this
3902         if (concurrent()) {
3903           assert(_cm->concurrent_marking_in_progress(), "invariant");
3904           // we need to set this to false before the next
3905           // safepoint. This way we ensure that the marking phase
3906           // doesn't observe any more heap expansions.
3907           _cm->clear_concurrent_marking_in_progress();
3908         }
3909       }
3910 
3911       // We can now guarantee that the global stack is empty, since
3912       // all other tasks have finished. We separated the guarantees so
3913       // that, if a condition is false, we can immediately find out
3914       // which one.
3915       guarantee(_cm->out_of_regions(), "only way to reach here");
3916       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3917       guarantee(_task_queue->size() == 0, "only way to reach here");
3918       guarantee(!_cm->has_overflown(), "only way to reach here");
3919       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3920 
3921       if (_cm->verbose_low()) {
3922         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
3923       }
3924     } else {
3925       // Apparently there's more work to do. Let's abort this task. It
3926       // will restart it and we can hopefully find more things to do.
3927 
3928       if (_cm->verbose_low()) {
3929         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
3930                                _worker_id);
3931       }
3932 
3933       set_has_aborted();
3934     }
3935   }
3936 
3937   // Mainly for debugging purposes to make sure that a pointer to the
3938   // closure which was statically allocated in this frame doesn't
3939   // escape it by accident.
3940   set_cm_oop_closure(NULL);
3941   double end_time_ms = os::elapsedVTime() * 1000.0;
3942   double elapsed_time_ms = end_time_ms - _start_time_ms;
3943   // Update the step history.
3944   _step_times_ms.add(elapsed_time_ms);
3945 
3946   if (has_aborted()) {
3947     // The task was aborted for some reason.
3948     if (_has_timed_out) {
3949       double diff_ms = elapsed_time_ms - _time_target_ms;
3950       // Keep statistics of how well we did with respect to hitting
3951       // our target only if we actually timed out (if we aborted for
3952       // other reasons, then the results might get skewed).
3953       _marking_step_diffs_ms.add(diff_ms);
3954     }
3955 
3956     if (_cm->has_overflown()) {
3957       // This is the interesting one. We aborted because a global
3958       // overflow was raised. This means we have to restart the
3959       // marking phase and start iterating over regions. However, in
3960       // order to do this we have to make sure that all tasks stop
3961       // what they are doing and re-initialize in a safe manner. We
3962       // will achieve this with the use of two barrier sync points.
3963 
3964       if (_cm->verbose_low()) {
3965         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
3966       }
3967 
3968       if (!is_serial) {
3969         // We only need to enter the sync barrier if being called
3970         // from a parallel context
3971         _cm->enter_first_sync_barrier(_worker_id);
3972 
3973         // When we exit this sync barrier we know that all tasks have
3974         // stopped doing marking work. So, it's now safe to
3975         // re-initialize our data structures. At the end of this method,
3976         // task 0 will clear the global data structures.
3977       }
3978 
3979       // We clear the local state of this task...
3980       clear_region_fields();
3981 
3982       if (!is_serial) {
3983         // ...and enter the second barrier.
3984         _cm->enter_second_sync_barrier(_worker_id);
3985       }
3986       // At this point, if we're during the concurrent phase of
3987       // marking, everything has been re-initialized and we're
3988       // ready to restart.
3989     }
3990 
3991     if (_cm->verbose_low()) {
3992       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
3993                              "elapsed = %1.2lfms <<<<<<<<<<",
3994                              _worker_id, _time_target_ms, elapsed_time_ms);
3995       if (_cm->has_aborted()) {
3996         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
3997                                _worker_id);
3998       }
3999     }
4000   } else {
4001     if (_cm->verbose_low()) {
4002       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4003                              "elapsed = %1.2lfms <<<<<<<<<<",
4004                              _worker_id, _time_target_ms, elapsed_time_ms);
4005     }
4006   }
4007 
4008   _claimed = false;
4009 }
4010 
4011 CMTask::CMTask(uint worker_id,
4012                ConcurrentMark* cm,
4013                size_t* marked_bytes,
4014                BitMap* card_bm,
4015                CMTaskQueue* task_queue,
4016                CMTaskQueueSet* task_queues)
4017   : _g1h(G1CollectedHeap::heap()),
4018     _worker_id(worker_id), _cm(cm),
4019     _claimed(false),
4020     _nextMarkBitMap(NULL), _hash_seed(17),
4021     _task_queue(task_queue),
4022     _task_queues(task_queues),
4023     _cm_oop_closure(NULL),
4024     _marked_bytes_array(marked_bytes),
4025     _card_bm(card_bm) {
4026   guarantee(task_queue != NULL, "invariant");
4027   guarantee(task_queues != NULL, "invariant");
4028 
4029   _marking_step_diffs_ms.add(0.5);
4030 }
4031 
4032 // These are formatting macros that are used below to ensure
4033 // consistent formatting. The *_H_* versions are used to format the
4034 // header for a particular value and they should be kept consistent
4035 // with the corresponding macro. Also note that most of the macros add
4036 // the necessary white space (as a prefix) which makes them a bit
4037 // easier to compose.
4038 
4039 // All the output lines are prefixed with this string to be able to
4040 // identify them easily in a large log file.
4041 #define G1PPRL_LINE_PREFIX            "###"
4042 
4043 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4044 #ifdef _LP64
4045 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4046 #else // _LP64
4047 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4048 #endif // _LP64
4049 
4050 // For per-region info
4051 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4052 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4053 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4054 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4055 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4056 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4057 
4058 // For summary info
4059 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4060 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4061 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4062 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4063 
4064 G1PrintRegionLivenessInfoClosure::
4065 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4066   : _out(out),
4067     _total_used_bytes(0), _total_capacity_bytes(0),
4068     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4069     _hum_used_bytes(0), _hum_capacity_bytes(0),
4070     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4071     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4072   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4073   MemRegion g1_reserved = g1h->g1_reserved();
4074   double now = os::elapsedTime();
4075 
4076   // Print the header of the output.
4077   _out->cr();
4078   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4079   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4080                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4081                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4082                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4083                  HeapRegion::GrainBytes);
4084   _out->print_cr(G1PPRL_LINE_PREFIX);
4085   _out->print_cr(G1PPRL_LINE_PREFIX
4086                 G1PPRL_TYPE_H_FORMAT
4087                 G1PPRL_ADDR_BASE_H_FORMAT
4088                 G1PPRL_BYTE_H_FORMAT
4089                 G1PPRL_BYTE_H_FORMAT
4090                 G1PPRL_BYTE_H_FORMAT
4091                 G1PPRL_DOUBLE_H_FORMAT
4092                 G1PPRL_BYTE_H_FORMAT
4093                 G1PPRL_BYTE_H_FORMAT,
4094                 "type", "address-range",
4095                 "used", "prev-live", "next-live", "gc-eff",
4096                 "remset", "code-roots");
4097   _out->print_cr(G1PPRL_LINE_PREFIX
4098                 G1PPRL_TYPE_H_FORMAT
4099                 G1PPRL_ADDR_BASE_H_FORMAT
4100                 G1PPRL_BYTE_H_FORMAT
4101                 G1PPRL_BYTE_H_FORMAT
4102                 G1PPRL_BYTE_H_FORMAT
4103                 G1PPRL_DOUBLE_H_FORMAT
4104                 G1PPRL_BYTE_H_FORMAT
4105                 G1PPRL_BYTE_H_FORMAT,
4106                 "", "",
4107                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4108                 "(bytes)", "(bytes)");
4109 }
4110 
4111 // It takes as a parameter a reference to one of the _hum_* fields, it
4112 // deduces the corresponding value for a region in a humongous region
4113 // series (either the region size, or what's left if the _hum_* field
4114 // is < the region size), and updates the _hum_* field accordingly.
4115 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4116   size_t bytes = 0;
4117   // The > 0 check is to deal with the prev and next live bytes which
4118   // could be 0.
4119   if (*hum_bytes > 0) {
4120     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4121     *hum_bytes -= bytes;
4122   }
4123   return bytes;
4124 }
4125 
4126 // It deduces the values for a region in a humongous region series
4127 // from the _hum_* fields and updates those accordingly. It assumes
4128 // that that _hum_* fields have already been set up from the "starts
4129 // humongous" region and we visit the regions in address order.
4130 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4131                                                      size_t* capacity_bytes,
4132                                                      size_t* prev_live_bytes,
4133                                                      size_t* next_live_bytes) {
4134   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4135   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4136   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4137   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4138   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4139 }
4140 
4141 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4142   const char* type       = r->get_type_str();
4143   HeapWord* bottom       = r->bottom();
4144   HeapWord* end          = r->end();
4145   size_t capacity_bytes  = r->capacity();
4146   size_t used_bytes      = r->used();
4147   size_t prev_live_bytes = r->live_bytes();
4148   size_t next_live_bytes = r->next_live_bytes();
4149   double gc_eff          = r->gc_efficiency();
4150   size_t remset_bytes    = r->rem_set()->mem_size();
4151   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4152 
4153   if (r->is_starts_humongous()) {
4154     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4155            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4156            "they should have been zeroed after the last time we used them");
4157     // Set up the _hum_* fields.
4158     _hum_capacity_bytes  = capacity_bytes;
4159     _hum_used_bytes      = used_bytes;
4160     _hum_prev_live_bytes = prev_live_bytes;
4161     _hum_next_live_bytes = next_live_bytes;
4162     get_hum_bytes(&used_bytes, &capacity_bytes,
4163                   &prev_live_bytes, &next_live_bytes);
4164     end = bottom + HeapRegion::GrainWords;
4165   } else if (r->is_continues_humongous()) {
4166     get_hum_bytes(&used_bytes, &capacity_bytes,
4167                   &prev_live_bytes, &next_live_bytes);
4168     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4169   }
4170 
4171   _total_used_bytes      += used_bytes;
4172   _total_capacity_bytes  += capacity_bytes;
4173   _total_prev_live_bytes += prev_live_bytes;
4174   _total_next_live_bytes += next_live_bytes;
4175   _total_remset_bytes    += remset_bytes;
4176   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4177 
4178   // Print a line for this particular region.
4179   _out->print_cr(G1PPRL_LINE_PREFIX
4180                  G1PPRL_TYPE_FORMAT
4181                  G1PPRL_ADDR_BASE_FORMAT
4182                  G1PPRL_BYTE_FORMAT
4183                  G1PPRL_BYTE_FORMAT
4184                  G1PPRL_BYTE_FORMAT
4185                  G1PPRL_DOUBLE_FORMAT
4186                  G1PPRL_BYTE_FORMAT
4187                  G1PPRL_BYTE_FORMAT,
4188                  type, p2i(bottom), p2i(end),
4189                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4190                  remset_bytes, strong_code_roots_bytes);
4191 
4192   return false;
4193 }
4194 
4195 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4196   // add static memory usages to remembered set sizes
4197   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4198   // Print the footer of the output.
4199   _out->print_cr(G1PPRL_LINE_PREFIX);
4200   _out->print_cr(G1PPRL_LINE_PREFIX
4201                  " SUMMARY"
4202                  G1PPRL_SUM_MB_FORMAT("capacity")
4203                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4204                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4205                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4206                  G1PPRL_SUM_MB_FORMAT("remset")
4207                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4208                  bytes_to_mb(_total_capacity_bytes),
4209                  bytes_to_mb(_total_used_bytes),
4210                  perc(_total_used_bytes, _total_capacity_bytes),
4211                  bytes_to_mb(_total_prev_live_bytes),
4212                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4213                  bytes_to_mb(_total_next_live_bytes),
4214                  perc(_total_next_live_bytes, _total_capacity_bytes),
4215                  bytes_to_mb(_total_remset_bytes),
4216                  bytes_to_mb(_total_strong_code_roots_bytes));
4217   _out->cr();
4218 }