New src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512 
 513   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 514   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 515 
 516   // Create & start a ConcurrentMark thread.
 517   _cmThread = new ConcurrentMarkThread(this);
 518   assert(cmThread() != NULL, "CM Thread should have been created");
 519   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 520   if (_cmThread->osthread() == NULL) {
 521       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 522   }
 523 
 524   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 525   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 526   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 527 
 528   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 529   satb_qs.set_buffer_size(G1SATBBufferSize);
 530 
 531   _root_regions.init(_g1h, this);
 532 
 533   if (ConcGCThreads > ParallelGCThreads) {
 534     warning("Can't have more ConcGCThreads (%u) "
 535             "than ParallelGCThreads (%u).",
 536             ConcGCThreads, ParallelGCThreads);
 537     return;
 538   }
 539   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 540     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 541     // if both are set
 542     _sleep_factor             = 0.0;
 543     _marking_task_overhead    = 1.0;
 544   } else if (G1MarkingOverheadPercent > 0) {
 545     // We will calculate the number of parallel marking threads based
 546     // on a target overhead with respect to the soft real-time goal
 547     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 548     double overall_cm_overhead =
 549       (double) MaxGCPauseMillis * marking_overhead /
 550       (double) GCPauseIntervalMillis;
 551     double cpu_ratio = 1.0 / (double) os::processor_count();
 552     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 553     double marking_task_overhead =
 554       overall_cm_overhead / marking_thread_num *
 555                                               (double) os::processor_count();
 556     double sleep_factor =
 557                        (1.0 - marking_task_overhead) / marking_task_overhead;
 558 
 559     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 560     _sleep_factor             = sleep_factor;
 561     _marking_task_overhead    = marking_task_overhead;
 562   } else {
 563     // Calculate the number of parallel marking threads by scaling
 564     // the number of parallel GC threads.
 565     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 566     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 567     _sleep_factor             = 0.0;
 568     _marking_task_overhead    = 1.0;
 569   }
 570 
 571   assert(ConcGCThreads > 0, "Should have been set");
 572   _parallel_marking_threads = ConcGCThreads;
 573   _max_parallel_marking_threads = _parallel_marking_threads;
 574 
 575   if (parallel_marking_threads() > 1) {
 576     _cleanup_task_overhead = 1.0;
 577   } else {
 578     _cleanup_task_overhead = marking_task_overhead();
 579   }
 580   _cleanup_sleep_factor =
 581                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 582 
 583 #if 0
 584   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 585   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 586   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 587   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 588   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 589 #endif
 590 
 591   _parallel_workers = new WorkGang("G1 Marker",
 592        _max_parallel_marking_threads, false, true);
 593   if (_parallel_workers == NULL) {
 594     vm_exit_during_initialization("Failed necessary allocation.");
 595   } else {
 596     _parallel_workers->initialize_workers();
 597   }
 598 
 599   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 600     size_t mark_stack_size =
 601       MIN2(MarkStackSizeMax,
 602           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 603     // Verify that the calculated value for MarkStackSize is in range.
 604     // It would be nice to use the private utility routine from Arguments.
 605     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 606       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 607               "must be between 1 and " SIZE_FORMAT,
 608               mark_stack_size, MarkStackSizeMax);
 609       return;
 610     }
 611     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 612   } else {
 613     // Verify MarkStackSize is in range.
 614     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 615       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 616         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 617           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 618                   "must be between 1 and " SIZE_FORMAT,
 619                   MarkStackSize, MarkStackSizeMax);
 620           return;
 621         }
 622       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 623         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 624           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 625                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 626                   MarkStackSize, MarkStackSizeMax);
 627           return;
 628         }
 629       }
 630     }
 631   }
 632 
 633   if (!_markStack.allocate(MarkStackSize)) {
 634     warning("Failed to allocate CM marking stack");
 635     return;
 636   }
 637 
 638   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 639   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 640 
 641   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 642   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 643 
 644   BitMap::idx_t card_bm_size = _card_bm.size();
 645 
 646   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 647   _active_tasks = _max_worker_id;
 648 
 649   uint max_regions = _g1h->max_regions();
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     CMTaskQueue* task_queue = new CMTaskQueue();
 652     task_queue->initialize();
 653     _task_queues->register_queue(i, task_queue);
 654 
 655     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 656     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 657 
 658     _tasks[i] = new CMTask(i, this,
 659                            _count_marked_bytes[i],
 660                            &_count_card_bitmaps[i],
 661                            task_queue, _task_queues);
 662 
 663     _accum_task_vtime[i] = 0.0;
 664   }
 665 
 666   // Calculate the card number for the bottom of the heap. Used
 667   // in biasing indexes into the accounting card bitmaps.
 668   _heap_bottom_card_num =
 669     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 670                                 CardTableModRefBS::card_shift);
 671 
 672   // Clear all the liveness counting data
 673   clear_all_count_data();
 674 
 675   // so that the call below can read a sensible value
 676   _heap_start = g1h->reserved_region().start();
 677   set_non_marking_state();
 678   _completed_initialization = true;
 679 }
 680 
 681 void ConcurrentMark::reset() {
 682   // Starting values for these two. This should be called in a STW
 683   // phase.
 684   MemRegion reserved = _g1h->g1_reserved();
 685   _heap_start = reserved.start();
 686   _heap_end   = reserved.end();
 687 
 688   // Separated the asserts so that we know which one fires.
 689   assert(_heap_start != NULL, "heap bounds should look ok");
 690   assert(_heap_end != NULL, "heap bounds should look ok");
 691   assert(_heap_start < _heap_end, "heap bounds should look ok");
 692 
 693   // Reset all the marking data structures and any necessary flags
 694   reset_marking_state();
 695 
 696   // We do reset all of them, since different phases will use
 697   // different number of active threads. So, it's easiest to have all
 698   // of them ready.
 699   for (uint i = 0; i < _max_worker_id; ++i) {
 700     _tasks[i]->reset(_nextMarkBitMap);
 701   }
 702 
 703   // we need this to make sure that the flag is on during the evac
 704   // pause with initial mark piggy-backed
 705   set_concurrent_marking_in_progress();
 706 }
 707 
 708 
 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 710   _markStack.set_should_expand();
 711   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 712   if (clear_overflow) {
 713     clear_has_overflown();
 714   } else {
 715     assert(has_overflown(), "pre-condition");
 716   }
 717   _finger = _heap_start;
 718 
 719   for (uint i = 0; i < _max_worker_id; ++i) {
 720     CMTaskQueue* queue = _task_queues->queue(i);
 721     queue->set_empty();
 722   }
 723 }
 724 
 725 void ConcurrentMark::set_concurrency(uint active_tasks) {
 726   assert(active_tasks <= _max_worker_id, "we should not have more");
 727 
 728   _active_tasks = active_tasks;
 729   // Need to update the three data structures below according to the
 730   // number of active threads for this phase.
 731   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 732   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 733   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 734 }
 735 
 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 737   set_concurrency(active_tasks);
 738 
 739   _concurrent = concurrent;
 740   // We propagate this to all tasks, not just the active ones.
 741   for (uint i = 0; i < _max_worker_id; ++i)
 742     _tasks[i]->set_concurrent(concurrent);
 743 
 744   if (concurrent) {
 745     set_concurrent_marking_in_progress();
 746   } else {
 747     // We currently assume that the concurrent flag has been set to
 748     // false before we start remark. At this point we should also be
 749     // in a STW phase.
 750     assert(!concurrent_marking_in_progress(), "invariant");
 751     assert(out_of_regions(),
 752            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 753            p2i(_finger), p2i(_heap_end));
 754   }
 755 }
 756 
 757 void ConcurrentMark::set_non_marking_state() {
 758   // We set the global marking state to some default values when we're
 759   // not doing marking.
 760   reset_marking_state();
 761   _active_tasks = 0;
 762   clear_concurrent_marking_in_progress();
 763 }
 764 
 765 ConcurrentMark::~ConcurrentMark() {
 766   // The ConcurrentMark instance is never freed.
 767   ShouldNotReachHere();
 768 }
 769 
 770 void ConcurrentMark::clearNextBitmap() {
 771   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 772 
 773   // Make sure that the concurrent mark thread looks to still be in
 774   // the current cycle.
 775   guarantee(cmThread()->during_cycle(), "invariant");
 776 
 777   // We are finishing up the current cycle by clearing the next
 778   // marking bitmap and getting it ready for the next cycle. During
 779   // this time no other cycle can start. So, let's make sure that this
 780   // is the case.
 781   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 782 
 783   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 784   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 785   _parallel_workers->run_task(&task);
 786 
 787   // Clear the liveness counting data. If the marking has been aborted, the abort()
 788   // call already did that.
 789   if (cl.complete()) {
 790     clear_all_count_data();
 791   }
 792 
 793   // Repeat the asserts from above.
 794   guarantee(cmThread()->during_cycle(), "invariant");
 795   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 796 }
 797 
 798 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 799   CMBitMap* _bitmap;
 800   bool _error;
 801  public:
 802   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 803   }
 804 
 805   virtual bool doHeapRegion(HeapRegion* r) {
 806     // This closure can be called concurrently to the mutator, so we must make sure
 807     // that the result of the getNextMarkedWordAddress() call is compared to the
 808     // value passed to it as limit to detect any found bits.
 809     // end never changes in G1.
 810     HeapWord* end = r->end();
 811     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 812   }
 813 };
 814 
 815 bool ConcurrentMark::nextMarkBitmapIsClear() {
 816   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 817   _g1h->heap_region_iterate(&cl);
 818   return cl.complete();
 819 }
 820 
 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 822 public:
 823   bool doHeapRegion(HeapRegion* r) {
 824     r->note_start_of_marking();
 825     return false;
 826   }
 827 };
 828 
 829 void ConcurrentMark::checkpointRootsInitialPre() {
 830   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 831   G1CollectorPolicy* g1p = g1h->g1_policy();
 832 
 833   _has_aborted = false;
 834 
 835   // Initialize marking structures. This has to be done in a STW phase.
 836   reset();
 837 
 838   // For each region note start of marking.
 839   NoteStartOfMarkHRClosure startcl;
 840   g1h->heap_region_iterate(&startcl);
 841 }
 842 
 843 
 844 void ConcurrentMark::checkpointRootsInitialPost() {
 845   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 846 
 847   // If we force an overflow during remark, the remark operation will
 848   // actually abort and we'll restart concurrent marking. If we always
 849   // force an overflow during remark we'll never actually complete the
 850   // marking phase. So, we initialize this here, at the start of the
 851   // cycle, so that at the remaining overflow number will decrease at
 852   // every remark and we'll eventually not need to cause one.
 853   force_overflow_stw()->init();
 854 
 855   // Start Concurrent Marking weak-reference discovery.
 856   ReferenceProcessor* rp = g1h->ref_processor_cm();
 857   // enable ("weak") refs discovery
 858   rp->enable_discovery();
 859   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 860 
 861   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 862   // This is the start of  the marking cycle, we're expected all
 863   // threads to have SATB queues with active set to false.
 864   satb_mq_set.set_active_all_threads(true, /* new active value */
 865                                      false /* expected_active */);
 866 
 867   _root_regions.prepare_for_scan();
 868 
 869   // update_g1_committed() will be called at the end of an evac pause
 870   // when marking is on. So, it's also called at the end of the
 871   // initial-mark pause to update the heap end, if the heap expands
 872   // during it. No need to call it here.
 873 }
 874 
 875 /*
 876  * Notice that in the next two methods, we actually leave the STS
 877  * during the barrier sync and join it immediately afterwards. If we
 878  * do not do this, the following deadlock can occur: one thread could
 879  * be in the barrier sync code, waiting for the other thread to also
 880  * sync up, whereas another one could be trying to yield, while also
 881  * waiting for the other threads to sync up too.
 882  *
 883  * Note, however, that this code is also used during remark and in
 884  * this case we should not attempt to leave / enter the STS, otherwise
 885  * we'll either hit an assert (debug / fastdebug) or deadlock
 886  * (product). So we should only leave / enter the STS if we are
 887  * operating concurrently.
 888  *
 889  * Because the thread that does the sync barrier has left the STS, it
 890  * is possible to be suspended for a Full GC or an evacuation pause
 891  * could occur. This is actually safe, since the entering the sync
 892  * barrier is one of the last things do_marking_step() does, and it
 893  * doesn't manipulate any data structures afterwards.
 894  */
 895 
 896 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 897   bool barrier_aborted;
 898   {
 899     SuspendibleThreadSetLeaver sts_leave(concurrent());
 900     barrier_aborted = !_first_overflow_barrier_sync.enter();
 901   }
 902 
 903   // at this point everyone should have synced up and not be doing any
 904   // more work
 905 
 906   if (barrier_aborted) {
 907     // If the barrier aborted we ignore the overflow condition and
 908     // just abort the whole marking phase as quickly as possible.
 909     return;
 910   }
 911 
 912   // If we're executing the concurrent phase of marking, reset the marking
 913   // state; otherwise the marking state is reset after reference processing,
 914   // during the remark pause.
 915   // If we reset here as a result of an overflow during the remark we will
 916   // see assertion failures from any subsequent set_concurrency_and_phase()
 917   // calls.
 918   if (concurrent()) {
 919     // let the task associated with with worker 0 do this
 920     if (worker_id == 0) {
 921       // task 0 is responsible for clearing the global data structures
 922       // We should be here because of an overflow. During STW we should
 923       // not clear the overflow flag since we rely on it being true when
 924       // we exit this method to abort the pause and restart concurrent
 925       // marking.
 926       reset_marking_state(true /* clear_overflow */);
 927       force_overflow()->update();
 928 
 929       if (G1Log::fine()) {
 930         gclog_or_tty->gclog_stamp();
 931         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 932       }
 933     }
 934   }
 935 
 936   // after this, each task should reset its own data structures then
 937   // then go into the second barrier
 938 }
 939 
 940 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 941   SuspendibleThreadSetLeaver sts_leave(concurrent());
 942   _second_overflow_barrier_sync.enter();
 943 
 944   // at this point everything should be re-initialized and ready to go
 945 }
 946 
 947 #ifndef PRODUCT
 948 void ForceOverflowSettings::init() {
 949   _num_remaining = G1ConcMarkForceOverflow;
 950   _force = false;
 951   update();
 952 }
 953 
 954 void ForceOverflowSettings::update() {
 955   if (_num_remaining > 0) {
 956     _num_remaining -= 1;
 957     _force = true;
 958   } else {
 959     _force = false;
 960   }
 961 }
 962 
 963 bool ForceOverflowSettings::should_force() {
 964   if (_force) {
 965     _force = false;
 966     return true;
 967   } else {
 968     return false;
 969   }
 970 }
 971 #endif // !PRODUCT
 972 
 973 class CMConcurrentMarkingTask: public AbstractGangTask {
 974 private:
 975   ConcurrentMark*       _cm;
 976   ConcurrentMarkThread* _cmt;
 977 
 978 public:
 979   void work(uint worker_id) {
 980     assert(Thread::current()->is_ConcurrentGC_thread(),
 981            "this should only be done by a conc GC thread");
 982     ResourceMark rm;
 983 
 984     double start_vtime = os::elapsedVTime();
 985 
 986     {
 987       SuspendibleThreadSetJoiner sts_join;
 988 
 989       assert(worker_id < _cm->active_tasks(), "invariant");
 990       CMTask* the_task = _cm->task(worker_id);
 991       the_task->record_start_time();
 992       if (!_cm->has_aborted()) {
 993         do {
 994           double start_vtime_sec = os::elapsedVTime();
 995           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 996 
 997           the_task->do_marking_step(mark_step_duration_ms,
 998                                     true  /* do_termination */,
 999                                     false /* is_serial*/);
1000 
1001           double end_vtime_sec = os::elapsedVTime();
1002           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1003           _cm->clear_has_overflown();
1004 
1005           _cm->do_yield_check(worker_id);
1006 
1007           jlong sleep_time_ms;
1008           if (!_cm->has_aborted() && the_task->has_aborted()) {
1009             sleep_time_ms =
1010               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1011             {
1012               SuspendibleThreadSetLeaver sts_leave;
1013               os::sleep(Thread::current(), sleep_time_ms, false);
1014             }
1015           }
1016         } while (!_cm->has_aborted() && the_task->has_aborted());
1017       }
1018       the_task->record_end_time();
1019       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1020     }
1021 
1022     double end_vtime = os::elapsedVTime();
1023     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1024   }
1025 
1026   CMConcurrentMarkingTask(ConcurrentMark* cm,
1027                           ConcurrentMarkThread* cmt) :
1028       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1029 
1030   ~CMConcurrentMarkingTask() { }
1031 };
1032 
1033 // Calculates the number of active workers for a concurrent
1034 // phase.
1035 uint ConcurrentMark::calc_parallel_marking_threads() {
1036   uint n_conc_workers = 0;
1037   if (!UseDynamicNumberOfGCThreads ||
1038       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1039        !ForceDynamicNumberOfGCThreads)) {
1040     n_conc_workers = max_parallel_marking_threads();
1041   } else {
1042     n_conc_workers =
1043       AdaptiveSizePolicy::calc_default_active_workers(
1044                                    max_parallel_marking_threads(),
1045                                    1, /* Minimum workers */
1046                                    parallel_marking_threads(),
1047                                    Threads::number_of_non_daemon_threads());
1048     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1049     // that scaling has already gone into "_max_parallel_marking_threads".
1050   }
1051   assert(n_conc_workers > 0, "Always need at least 1");
1052   return n_conc_workers;
1053 }
1054 
1055 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1056   // Currently, only survivors can be root regions.
1057   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1058   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1059 
1060   const uintx interval = PrefetchScanIntervalInBytes;
1061   HeapWord* curr = hr->bottom();
1062   const HeapWord* end = hr->top();
1063   while (curr < end) {
1064     Prefetch::read(curr, interval);
1065     oop obj = oop(curr);
1066     int size = obj->oop_iterate_size(&cl);
1067     assert(size == obj->size(), "sanity");
1068     curr += size;
1069   }
1070 }
1071 
1072 class CMRootRegionScanTask : public AbstractGangTask {
1073 private:
1074   ConcurrentMark* _cm;
1075 
1076 public:
1077   CMRootRegionScanTask(ConcurrentMark* cm) :
1078     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1079 
1080   void work(uint worker_id) {
1081     assert(Thread::current()->is_ConcurrentGC_thread(),
1082            "this should only be done by a conc GC thread");
1083 
1084     CMRootRegions* root_regions = _cm->root_regions();
1085     HeapRegion* hr = root_regions->claim_next();
1086     while (hr != NULL) {
1087       _cm->scanRootRegion(hr, worker_id);
1088       hr = root_regions->claim_next();
1089     }
1090   }
1091 };
1092 
1093 void ConcurrentMark::scanRootRegions() {
1094   double scan_start = os::elapsedTime();
1095 
1096   // Start of concurrent marking.
1097   ClassLoaderDataGraph::clear_claimed_marks();
1098 
1099   // scan_in_progress() will have been set to true only if there was
1100   // at least one root region to scan. So, if it's false, we
1101   // should not attempt to do any further work.
1102   if (root_regions()->scan_in_progress()) {
1103     if (G1Log::fine()) {
1104       gclog_or_tty->gclog_stamp();
1105       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1106     }
1107 
1108     _parallel_marking_threads = calc_parallel_marking_threads();
1109     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1110            "Maximum number of marking threads exceeded");
1111     uint active_workers = MAX2(1U, parallel_marking_threads());
1112 
1113     CMRootRegionScanTask task(this);
1114     _parallel_workers->set_active_workers(active_workers);
1115     _parallel_workers->run_task(&task);
1116 
1117     if (G1Log::fine()) {
1118       gclog_or_tty->gclog_stamp();
1119       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1120     }
1121 
1122     // It's possible that has_aborted() is true here without actually
1123     // aborting the survivor scan earlier. This is OK as it's
1124     // mainly used for sanity checking.
1125     root_regions()->scan_finished();
1126   }
1127 }
1128 
1129 void ConcurrentMark::markFromRoots() {
1130   // we might be tempted to assert that:
1131   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1132   //        "inconsistent argument?");
1133   // However that wouldn't be right, because it's possible that
1134   // a safepoint is indeed in progress as a younger generation
1135   // stop-the-world GC happens even as we mark in this generation.
1136 
1137   _restart_for_overflow = false;
1138   force_overflow_conc()->init();
1139 
1140   // _g1h has _n_par_threads
1141   _parallel_marking_threads = calc_parallel_marking_threads();
1142   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1143     "Maximum number of marking threads exceeded");
1144 
1145   uint active_workers = MAX2(1U, parallel_marking_threads());
1146   assert(active_workers > 0, "Should have been set");
1147 
1148   // Parallel task terminator is set in "set_concurrency_and_phase()"
1149   set_concurrency_and_phase(active_workers, true /* concurrent */);
1150 
1151   CMConcurrentMarkingTask markingTask(this, cmThread());
1152   _parallel_workers->set_active_workers(active_workers);
1153   _parallel_workers->run_task(&markingTask);
1154   print_stats();
1155 }
1156 
1157 // Helper class to get rid of some boilerplate code.
1158 class G1CMTraceTime : public StackObj {
1159   GCTraceTimeImpl _gc_trace_time;
1160   static bool doit_and_prepend(bool doit) {
1161     if (doit) {
1162       gclog_or_tty->put(' ');
1163     }
1164     return doit;
1165   }
1166 
1167  public:
1168   G1CMTraceTime(const char* title, bool doit)
1169     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1170   }
1171 };
1172 
1173 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1174   // world is stopped at this checkpoint
1175   assert(SafepointSynchronize::is_at_safepoint(),
1176          "world should be stopped");
1177 
1178   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1179 
1180   // If a full collection has happened, we shouldn't do this.
1181   if (has_aborted()) {
1182     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1183     return;
1184   }
1185 
1186   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1187 
1188   if (VerifyDuringGC) {
1189     HandleMark hm;  // handle scope
1190     g1h->prepare_for_verify();
1191     Universe::verify(VerifyOption_G1UsePrevMarking,
1192                      " VerifyDuringGC:(before)");
1193   }
1194   g1h->check_bitmaps("Remark Start");
1195 
1196   G1CollectorPolicy* g1p = g1h->g1_policy();
1197   g1p->record_concurrent_mark_remark_start();
1198 
1199   double start = os::elapsedTime();
1200 
1201   checkpointRootsFinalWork();
1202 
1203   double mark_work_end = os::elapsedTime();
1204 
1205   weakRefsWork(clear_all_soft_refs);
1206 
1207   if (has_overflown()) {
1208     // Oops.  We overflowed.  Restart concurrent marking.
1209     _restart_for_overflow = true;
1210     if (G1TraceMarkStackOverflow) {
1211       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1212     }
1213 
1214     // Verify the heap w.r.t. the previous marking bitmap.
1215     if (VerifyDuringGC) {
1216       HandleMark hm;  // handle scope
1217       g1h->prepare_for_verify();
1218       Universe::verify(VerifyOption_G1UsePrevMarking,
1219                        " VerifyDuringGC:(overflow)");
1220     }
1221 
1222     // Clear the marking state because we will be restarting
1223     // marking due to overflowing the global mark stack.
1224     reset_marking_state();
1225   } else {
1226     {
1227       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1228 
1229       // Aggregate the per-task counting data that we have accumulated
1230       // while marking.
1231       aggregate_count_data();
1232     }
1233 
1234     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1235     // We're done with marking.
1236     // This is the end of  the marking cycle, we're expected all
1237     // threads to have SATB queues with active set to true.
1238     satb_mq_set.set_active_all_threads(false, /* new active value */
1239                                        true /* expected_active */);
1240 
1241     if (VerifyDuringGC) {
1242       HandleMark hm;  // handle scope
1243       g1h->prepare_for_verify();
1244       Universe::verify(VerifyOption_G1UseNextMarking,
1245                        " VerifyDuringGC:(after)");
1246     }
1247     g1h->check_bitmaps("Remark End");
1248     assert(!restart_for_overflow(), "sanity");
1249     // Completely reset the marking state since marking completed
1250     set_non_marking_state();
1251   }
1252 
1253   // Expand the marking stack, if we have to and if we can.
1254   if (_markStack.should_expand()) {
1255     _markStack.expand();
1256   }
1257 
1258   // Statistics
1259   double now = os::elapsedTime();
1260   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1261   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1262   _remark_times.add((now - start) * 1000.0);
1263 
1264   g1p->record_concurrent_mark_remark_end();
1265 
1266   G1CMIsAliveClosure is_alive(g1h);
1267   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1268 }
1269 
1270 // Base class of the closures that finalize and verify the
1271 // liveness counting data.
1272 class CMCountDataClosureBase: public HeapRegionClosure {
1273 protected:
1274   G1CollectedHeap* _g1h;
1275   ConcurrentMark* _cm;
1276   CardTableModRefBS* _ct_bs;
1277 
1278   BitMap* _region_bm;
1279   BitMap* _card_bm;
1280 
1281   // Takes a region that's not empty (i.e., it has at least one
1282   // live object in it and sets its corresponding bit on the region
1283   // bitmap to 1.
1284   void set_bit_for_region(HeapRegion* hr) {
1285     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1286     _region_bm->par_at_put(index, true);
1287   }
1288 
1289 public:
1290   CMCountDataClosureBase(G1CollectedHeap* g1h,
1291                          BitMap* region_bm, BitMap* card_bm):
1292     _g1h(g1h), _cm(g1h->concurrent_mark()),
1293     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1294     _region_bm(region_bm), _card_bm(card_bm) { }
1295 };
1296 
1297 // Closure that calculates the # live objects per region. Used
1298 // for verification purposes during the cleanup pause.
1299 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1300   CMBitMapRO* _bm;
1301   size_t _region_marked_bytes;
1302 
1303 public:
1304   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1305                          BitMap* region_bm, BitMap* card_bm) :
1306     CMCountDataClosureBase(g1h, region_bm, card_bm),
1307     _bm(bm), _region_marked_bytes(0) { }
1308 
1309   bool doHeapRegion(HeapRegion* hr) {
1310     HeapWord* ntams = hr->next_top_at_mark_start();
1311     HeapWord* start = hr->bottom();
1312 
1313     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1314            "Preconditions not met - "
1315            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1316            p2i(start), p2i(ntams), p2i(hr->end()));
1317 
1318     // Find the first marked object at or after "start".
1319     start = _bm->getNextMarkedWordAddress(start, ntams);
1320 
1321     size_t marked_bytes = 0;
1322 
1323     while (start < ntams) {
1324       oop obj = oop(start);
1325       int obj_sz = obj->size();
1326       HeapWord* obj_end = start + obj_sz;
1327 
1328       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1329       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1330 
1331       // Note: if we're looking at the last region in heap - obj_end
1332       // could be actually just beyond the end of the heap; end_idx
1333       // will then correspond to a (non-existent) card that is also
1334       // just beyond the heap.
1335       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1336         // end of object is not card aligned - increment to cover
1337         // all the cards spanned by the object
1338         end_idx += 1;
1339       }
1340 
1341       // Set the bits in the card BM for the cards spanned by this object.
1342       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1343 
1344       // Add the size of this object to the number of marked bytes.
1345       marked_bytes += (size_t)obj_sz * HeapWordSize;
1346 
1347       // This will happen if we are handling a humongous object that spans
1348       // several heap regions.
1349       if (obj_end > hr->end()) {
1350         break;
1351       }
1352       // Find the next marked object after this one.
1353       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1354     }
1355 
1356     // Mark the allocated-since-marking portion...
1357     HeapWord* top = hr->top();
1358     if (ntams < top) {
1359       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1360       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1361 
1362       // Note: if we're looking at the last region in heap - top
1363       // could be actually just beyond the end of the heap; end_idx
1364       // will then correspond to a (non-existent) card that is also
1365       // just beyond the heap.
1366       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1367         // end of object is not card aligned - increment to cover
1368         // all the cards spanned by the object
1369         end_idx += 1;
1370       }
1371       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1372 
1373       // This definitely means the region has live objects.
1374       set_bit_for_region(hr);
1375     }
1376 
1377     // Update the live region bitmap.
1378     if (marked_bytes > 0) {
1379       set_bit_for_region(hr);
1380     }
1381 
1382     // Set the marked bytes for the current region so that
1383     // it can be queried by a calling verification routine
1384     _region_marked_bytes = marked_bytes;
1385 
1386     return false;
1387   }
1388 
1389   size_t region_marked_bytes() const { return _region_marked_bytes; }
1390 };
1391 
1392 // Heap region closure used for verifying the counting data
1393 // that was accumulated concurrently and aggregated during
1394 // the remark pause. This closure is applied to the heap
1395 // regions during the STW cleanup pause.
1396 
1397 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1398   G1CollectedHeap* _g1h;
1399   ConcurrentMark* _cm;
1400   CalcLiveObjectsClosure _calc_cl;
1401   BitMap* _region_bm;   // Region BM to be verified
1402   BitMap* _card_bm;     // Card BM to be verified
1403 
1404   BitMap* _exp_region_bm; // Expected Region BM values
1405   BitMap* _exp_card_bm;   // Expected card BM values
1406 
1407   int _failures;
1408 
1409 public:
1410   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1411                                 BitMap* region_bm,
1412                                 BitMap* card_bm,
1413                                 BitMap* exp_region_bm,
1414                                 BitMap* exp_card_bm) :
1415     _g1h(g1h), _cm(g1h->concurrent_mark()),
1416     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1417     _region_bm(region_bm), _card_bm(card_bm),
1418     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1419     _failures(0) { }
1420 
1421   int failures() const { return _failures; }
1422 
1423   bool doHeapRegion(HeapRegion* hr) {
1424     int failures = 0;
1425 
1426     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1427     // this region and set the corresponding bits in the expected region
1428     // and card bitmaps.
1429     bool res = _calc_cl.doHeapRegion(hr);
1430     assert(res == false, "should be continuing");
1431 
1432     // Verify the marked bytes for this region.
1433     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1434     size_t act_marked_bytes = hr->next_marked_bytes();
1435 
1436     if (exp_marked_bytes > act_marked_bytes) {
1437       if (hr->is_starts_humongous()) {
1438         // For start_humongous regions, the size of the whole object will be
1439         // in exp_marked_bytes.
1440         HeapRegion* region = hr;
1441         int num_regions;
1442         for (num_regions = 0; region != NULL; num_regions++) {
1443           region = _g1h->next_region_in_humongous(region);
1444         }
1445         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1446           failures += 1;
1447         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1448           failures += 1;
1449         }
1450       } else {
1451         // We're not OK if expected marked bytes > actual marked bytes. It means
1452         // we have missed accounting some objects during the actual marking.
1453         failures += 1;
1454       }
1455     }
1456 
1457     // Verify the bit, for this region, in the actual and expected
1458     // (which was just calculated) region bit maps.
1459     // We're not OK if the bit in the calculated expected region
1460     // bitmap is set and the bit in the actual region bitmap is not.
1461     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1462 
1463     bool expected = _exp_region_bm->at(index);
1464     bool actual = _region_bm->at(index);
1465     if (expected && !actual) {
1466       failures += 1;
1467     }
1468 
1469     // Verify that the card bit maps for the cards spanned by the current
1470     // region match. We have an error if we have a set bit in the expected
1471     // bit map and the corresponding bit in the actual bitmap is not set.
1472 
1473     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1474     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1475 
1476     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1477       expected = _exp_card_bm->at(i);
1478       actual = _card_bm->at(i);
1479 
1480       if (expected && !actual) {
1481         failures += 1;
1482       }
1483     }
1484 
1485     _failures += failures;
1486 
1487     // We could stop iteration over the heap when we
1488     // find the first violating region by returning true.
1489     return false;
1490   }
1491 };
1492 
1493 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1494 protected:
1495   G1CollectedHeap* _g1h;
1496   ConcurrentMark* _cm;
1497   BitMap* _actual_region_bm;
1498   BitMap* _actual_card_bm;
1499 
1500   uint    _n_workers;
1501 
1502   BitMap* _expected_region_bm;
1503   BitMap* _expected_card_bm;
1504 
1505   int  _failures;
1506 
1507   HeapRegionClaimer _hrclaimer;
1508 
1509 public:
1510   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1511                             BitMap* region_bm, BitMap* card_bm,
1512                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1513     : AbstractGangTask("G1 verify final counting"),
1514       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1515       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1516       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1517       _failures(0),
1518       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1519     assert(VerifyDuringGC, "don't call this otherwise");
1520     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1521     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1522   }
1523 
1524   void work(uint worker_id) {
1525     assert(worker_id < _n_workers, "invariant");
1526 
1527     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1528                                             _actual_region_bm, _actual_card_bm,
1529                                             _expected_region_bm,
1530                                             _expected_card_bm);
1531 
1532     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1533 
1534     Atomic::add(verify_cl.failures(), &_failures);
1535   }
1536 
1537   int failures() const { return _failures; }
1538 };
1539 
1540 // Closure that finalizes the liveness counting data.
1541 // Used during the cleanup pause.
1542 // Sets the bits corresponding to the interval [NTAMS, top]
1543 // (which contains the implicitly live objects) in the
1544 // card liveness bitmap. Also sets the bit for each region,
1545 // containing live data, in the region liveness bitmap.
1546 
1547 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1548  public:
1549   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1550                               BitMap* region_bm,
1551                               BitMap* card_bm) :
1552     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1553 
1554   bool doHeapRegion(HeapRegion* hr) {
1555     HeapWord* ntams = hr->next_top_at_mark_start();
1556     HeapWord* top   = hr->top();
1557 
1558     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1559 
1560     // Mark the allocated-since-marking portion...
1561     if (ntams < top) {
1562       // This definitely means the region has live objects.
1563       set_bit_for_region(hr);
1564 
1565       // Now set the bits in the card bitmap for [ntams, top)
1566       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1567       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1568 
1569       // Note: if we're looking at the last region in heap - top
1570       // could be actually just beyond the end of the heap; end_idx
1571       // will then correspond to a (non-existent) card that is also
1572       // just beyond the heap.
1573       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1574         // end of object is not card aligned - increment to cover
1575         // all the cards spanned by the object
1576         end_idx += 1;
1577       }
1578 
1579       assert(end_idx <= _card_bm->size(),
1580              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1581              end_idx, _card_bm->size());
1582       assert(start_idx < _card_bm->size(),
1583              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1584              start_idx, _card_bm->size());
1585 
1586       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1587     }
1588 
1589     // Set the bit for the region if it contains live data
1590     if (hr->next_marked_bytes() > 0) {
1591       set_bit_for_region(hr);
1592     }
1593 
1594     return false;
1595   }
1596 };
1597 
1598 class G1ParFinalCountTask: public AbstractGangTask {
1599 protected:
1600   G1CollectedHeap* _g1h;
1601   ConcurrentMark* _cm;
1602   BitMap* _actual_region_bm;
1603   BitMap* _actual_card_bm;
1604 
1605   uint    _n_workers;
1606   HeapRegionClaimer _hrclaimer;
1607 
1608 public:
1609   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1610     : AbstractGangTask("G1 final counting"),
1611       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1612       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1613       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1614   }
1615 
1616   void work(uint worker_id) {
1617     assert(worker_id < _n_workers, "invariant");
1618 
1619     FinalCountDataUpdateClosure final_update_cl(_g1h,
1620                                                 _actual_region_bm,
1621                                                 _actual_card_bm);
1622 
1623     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1624   }
1625 };
1626 
1627 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1628   G1CollectedHeap* _g1;
1629   size_t _freed_bytes;
1630   FreeRegionList* _local_cleanup_list;
1631   HeapRegionSetCount _old_regions_removed;
1632   HeapRegionSetCount _humongous_regions_removed;
1633   HRRSCleanupTask* _hrrs_cleanup_task;
1634 
1635 public:
1636   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1637                              FreeRegionList* local_cleanup_list,
1638                              HRRSCleanupTask* hrrs_cleanup_task) :
1639     _g1(g1),
1640     _freed_bytes(0),
1641     _local_cleanup_list(local_cleanup_list),
1642     _old_regions_removed(),
1643     _humongous_regions_removed(),
1644     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1645 
1646   size_t freed_bytes() { return _freed_bytes; }
1647   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1648   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1649 
1650   bool doHeapRegion(HeapRegion *hr) {
1651     if (hr->is_archive()) {
1652       return false;
1653     }
1654     // We use a claim value of zero here because all regions
1655     // were claimed with value 1 in the FinalCount task.
1656     _g1->reset_gc_time_stamps(hr);
1657     hr->note_end_of_marking();
1658 
1659     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1660       _freed_bytes += hr->used();
1661       hr->set_containing_set(NULL);
1662       if (hr->is_humongous()) {
1663         _humongous_regions_removed.increment(1u, hr->capacity());
1664         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1665       } else {
1666         _old_regions_removed.increment(1u, hr->capacity());
1667         _g1->free_region(hr, _local_cleanup_list, true);
1668       }
1669     } else {
1670       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1671     }
1672 
1673     return false;
1674   }
1675 };
1676 
1677 class G1ParNoteEndTask: public AbstractGangTask {
1678   friend class G1NoteEndOfConcMarkClosure;
1679 
1680 protected:
1681   G1CollectedHeap* _g1h;
1682   FreeRegionList* _cleanup_list;
1683   HeapRegionClaimer _hrclaimer;
1684 
1685 public:
1686   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1687       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1688   }
1689 
1690   void work(uint worker_id) {
1691     FreeRegionList local_cleanup_list("Local Cleanup List");
1692     HRRSCleanupTask hrrs_cleanup_task;
1693     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1694                                            &hrrs_cleanup_task);
1695     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1696     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1697 
1698     // Now update the lists
1699     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1700     {
1701       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1702       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1703 
1704       // If we iterate over the global cleanup list at the end of
1705       // cleanup to do this printing we will not guarantee to only
1706       // generate output for the newly-reclaimed regions (the list
1707       // might not be empty at the beginning of cleanup; we might
1708       // still be working on its previous contents). So we do the
1709       // printing here, before we append the new regions to the global
1710       // cleanup list.
1711 
1712       G1HRPrinter* hr_printer = _g1h->hr_printer();
1713       if (hr_printer->is_active()) {
1714         FreeRegionListIterator iter(&local_cleanup_list);
1715         while (iter.more_available()) {
1716           HeapRegion* hr = iter.get_next();
1717           hr_printer->cleanup(hr);
1718         }
1719       }
1720 
1721       _cleanup_list->add_ordered(&local_cleanup_list);
1722       assert(local_cleanup_list.is_empty(), "post-condition");
1723 
1724       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1725     }
1726   }
1727 };
1728 
1729 class G1ParScrubRemSetTask: public AbstractGangTask {
1730 protected:
1731   G1RemSet* _g1rs;
1732   BitMap* _region_bm;
1733   BitMap* _card_bm;
1734   HeapRegionClaimer _hrclaimer;
1735 
1736 public:
1737   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1738       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1739   }
1740 
1741   void work(uint worker_id) {
1742     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1743   }
1744 
1745 };
1746 
1747 void ConcurrentMark::cleanup() {
1748   // world is stopped at this checkpoint
1749   assert(SafepointSynchronize::is_at_safepoint(),
1750          "world should be stopped");
1751   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1752 
1753   // If a full collection has happened, we shouldn't do this.
1754   if (has_aborted()) {
1755     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1756     return;
1757   }
1758 
1759   g1h->verify_region_sets_optional();
1760 
1761   if (VerifyDuringGC) {
1762     HandleMark hm;  // handle scope
1763     g1h->prepare_for_verify();
1764     Universe::verify(VerifyOption_G1UsePrevMarking,
1765                      " VerifyDuringGC:(before)");
1766   }
1767   g1h->check_bitmaps("Cleanup Start");
1768 
1769   G1CollectorPolicy* g1p = g1h->g1_policy();
1770   g1p->record_concurrent_mark_cleanup_start();
1771 
1772   double start = os::elapsedTime();
1773 
1774   HeapRegionRemSet::reset_for_cleanup_tasks();
1775 
1776   // Do counting once more with the world stopped for good measure.
1777   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1778 
1779   g1h->workers()->run_task(&g1_par_count_task);
1780 
1781   if (VerifyDuringGC) {
1782     // Verify that the counting data accumulated during marking matches
1783     // that calculated by walking the marking bitmap.
1784 
1785     // Bitmaps to hold expected values
1786     BitMap expected_region_bm(_region_bm.size(), true);
1787     BitMap expected_card_bm(_card_bm.size(), true);
1788 
1789     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1790                                                  &_region_bm,
1791                                                  &_card_bm,
1792                                                  &expected_region_bm,
1793                                                  &expected_card_bm);
1794 
1795     g1h->workers()->run_task(&g1_par_verify_task);
1796 
1797     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1798   }
1799 
1800   size_t start_used_bytes = g1h->used();
1801   g1h->collector_state()->set_mark_in_progress(false);
1802 
1803   double count_end = os::elapsedTime();
1804   double this_final_counting_time = (count_end - start);
1805   _total_counting_time += this_final_counting_time;
1806 
1807   if (G1PrintRegionLivenessInfo) {
1808     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1809     _g1h->heap_region_iterate(&cl);
1810   }
1811 
1812   // Install newly created mark bitMap as "prev".
1813   swapMarkBitMaps();
1814 
1815   g1h->reset_gc_time_stamp();
1816 
1817   uint n_workers = _g1h->workers()->active_workers();
1818 
1819   // Note end of marking in all heap regions.
1820   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1821   g1h->workers()->run_task(&g1_par_note_end_task);
1822   g1h->check_gc_time_stamps();
1823 
1824   if (!cleanup_list_is_empty()) {
1825     // The cleanup list is not empty, so we'll have to process it
1826     // concurrently. Notify anyone else that might be wanting free
1827     // regions that there will be more free regions coming soon.
1828     g1h->set_free_regions_coming();
1829   }
1830 
1831   // call below, since it affects the metric by which we sort the heap
1832   // regions.
1833   if (G1ScrubRemSets) {
1834     double rs_scrub_start = os::elapsedTime();
1835     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1836     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1837 
1838     double rs_scrub_end = os::elapsedTime();
1839     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1840     _total_rs_scrub_time += this_rs_scrub_time;
1841   }
1842 
1843   // this will also free any regions totally full of garbage objects,
1844   // and sort the regions.
1845   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1846 
1847   // Statistics.
1848   double end = os::elapsedTime();
1849   _cleanup_times.add((end - start) * 1000.0);
1850 
1851   if (G1Log::fine()) {
1852     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1853   }
1854 
1855   // Clean up will have freed any regions completely full of garbage.
1856   // Update the soft reference policy with the new heap occupancy.
1857   Universe::update_heap_info_at_gc();
1858 
1859   if (VerifyDuringGC) {
1860     HandleMark hm;  // handle scope
1861     g1h->prepare_for_verify();
1862     Universe::verify(VerifyOption_G1UsePrevMarking,
1863                      " VerifyDuringGC:(after)");
1864   }
1865 
1866   g1h->check_bitmaps("Cleanup End");
1867 
1868   g1h->verify_region_sets_optional();
1869 
1870   // We need to make this be a "collection" so any collection pause that
1871   // races with it goes around and waits for completeCleanup to finish.
1872   g1h->increment_total_collections();
1873 
1874   // Clean out dead classes and update Metaspace sizes.
1875   if (ClassUnloadingWithConcurrentMark) {
1876     ClassLoaderDataGraph::purge();
1877   }
1878   MetaspaceGC::compute_new_size();
1879 
1880   // We reclaimed old regions so we should calculate the sizes to make
1881   // sure we update the old gen/space data.
1882   g1h->g1mm()->update_sizes();
1883   g1h->allocation_context_stats().update_after_mark();
1884 
1885   g1h->trace_heap_after_concurrent_cycle();
1886 }
1887 
1888 void ConcurrentMark::completeCleanup() {
1889   if (has_aborted()) return;
1890 
1891   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1892 
1893   _cleanup_list.verify_optional();
1894   FreeRegionList tmp_free_list("Tmp Free List");
1895 
1896   if (G1ConcRegionFreeingVerbose) {
1897     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1898                            "cleanup list has %u entries",
1899                            _cleanup_list.length());
1900   }
1901 
1902   // No one else should be accessing the _cleanup_list at this point,
1903   // so it is not necessary to take any locks
1904   while (!_cleanup_list.is_empty()) {
1905     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1906     assert(hr != NULL, "Got NULL from a non-empty list");
1907     hr->par_clear();
1908     tmp_free_list.add_ordered(hr);
1909 
1910     // Instead of adding one region at a time to the secondary_free_list,
1911     // we accumulate them in the local list and move them a few at a
1912     // time. This also cuts down on the number of notify_all() calls
1913     // we do during this process. We'll also append the local list when
1914     // _cleanup_list is empty (which means we just removed the last
1915     // region from the _cleanup_list).
1916     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1917         _cleanup_list.is_empty()) {
1918       if (G1ConcRegionFreeingVerbose) {
1919         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1920                                "appending %u entries to the secondary_free_list, "
1921                                "cleanup list still has %u entries",
1922                                tmp_free_list.length(),
1923                                _cleanup_list.length());
1924       }
1925 
1926       {
1927         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1928         g1h->secondary_free_list_add(&tmp_free_list);
1929         SecondaryFreeList_lock->notify_all();
1930       }
1931 #ifndef PRODUCT
1932       if (G1StressConcRegionFreeing) {
1933         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1934           os::sleep(Thread::current(), (jlong) 1, false);
1935         }
1936       }
1937 #endif
1938     }
1939   }
1940   assert(tmp_free_list.is_empty(), "post-condition");
1941 }
1942 
1943 // Supporting Object and Oop closures for reference discovery
1944 // and processing in during marking
1945 
1946 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1947   HeapWord* addr = (HeapWord*)obj;
1948   return addr != NULL &&
1949          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1950 }
1951 
1952 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1953 // Uses the CMTask associated with a worker thread (for serial reference
1954 // processing the CMTask for worker 0 is used) to preserve (mark) and
1955 // trace referent objects.
1956 //
1957 // Using the CMTask and embedded local queues avoids having the worker
1958 // threads operating on the global mark stack. This reduces the risk
1959 // of overflowing the stack - which we would rather avoid at this late
1960 // state. Also using the tasks' local queues removes the potential
1961 // of the workers interfering with each other that could occur if
1962 // operating on the global stack.
1963 
1964 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1965   ConcurrentMark* _cm;
1966   CMTask*         _task;
1967   int             _ref_counter_limit;
1968   int             _ref_counter;
1969   bool            _is_serial;
1970  public:
1971   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1972     _cm(cm), _task(task), _is_serial(is_serial),
1973     _ref_counter_limit(G1RefProcDrainInterval) {
1974     assert(_ref_counter_limit > 0, "sanity");
1975     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1976     _ref_counter = _ref_counter_limit;
1977   }
1978 
1979   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1980   virtual void do_oop(      oop* p) { do_oop_work(p); }
1981 
1982   template <class T> void do_oop_work(T* p) {
1983     if (!_cm->has_overflown()) {
1984       oop obj = oopDesc::load_decode_heap_oop(p);
1985       _task->deal_with_reference(obj);
1986       _ref_counter--;
1987 
1988       if (_ref_counter == 0) {
1989         // We have dealt with _ref_counter_limit references, pushing them
1990         // and objects reachable from them on to the local stack (and
1991         // possibly the global stack). Call CMTask::do_marking_step() to
1992         // process these entries.
1993         //
1994         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1995         // there's nothing more to do (i.e. we're done with the entries that
1996         // were pushed as a result of the CMTask::deal_with_reference() calls
1997         // above) or we overflow.
1998         //
1999         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2000         // flag while there may still be some work to do. (See the comment at
2001         // the beginning of CMTask::do_marking_step() for those conditions -
2002         // one of which is reaching the specified time target.) It is only
2003         // when CMTask::do_marking_step() returns without setting the
2004         // has_aborted() flag that the marking step has completed.
2005         do {
2006           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2007           _task->do_marking_step(mark_step_duration_ms,
2008                                  false      /* do_termination */,
2009                                  _is_serial);
2010         } while (_task->has_aborted() && !_cm->has_overflown());
2011         _ref_counter = _ref_counter_limit;
2012       }
2013     }
2014   }
2015 };
2016 
2017 // 'Drain' oop closure used by both serial and parallel reference processing.
2018 // Uses the CMTask associated with a given worker thread (for serial
2019 // reference processing the CMtask for worker 0 is used). Calls the
2020 // do_marking_step routine, with an unbelievably large timeout value,
2021 // to drain the marking data structures of the remaining entries
2022 // added by the 'keep alive' oop closure above.
2023 
2024 class G1CMDrainMarkingStackClosure: public VoidClosure {
2025   ConcurrentMark* _cm;
2026   CMTask*         _task;
2027   bool            _is_serial;
2028  public:
2029   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2030     _cm(cm), _task(task), _is_serial(is_serial) {
2031     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2032   }
2033 
2034   void do_void() {
2035     do {
2036       // We call CMTask::do_marking_step() to completely drain the local
2037       // and global marking stacks of entries pushed by the 'keep alive'
2038       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2039       //
2040       // CMTask::do_marking_step() is called in a loop, which we'll exit
2041       // if there's nothing more to do (i.e. we've completely drained the
2042       // entries that were pushed as a a result of applying the 'keep alive'
2043       // closure to the entries on the discovered ref lists) or we overflow
2044       // the global marking stack.
2045       //
2046       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2047       // flag while there may still be some work to do. (See the comment at
2048       // the beginning of CMTask::do_marking_step() for those conditions -
2049       // one of which is reaching the specified time target.) It is only
2050       // when CMTask::do_marking_step() returns without setting the
2051       // has_aborted() flag that the marking step has completed.
2052 
2053       _task->do_marking_step(1000000000.0 /* something very large */,
2054                              true         /* do_termination */,
2055                              _is_serial);
2056     } while (_task->has_aborted() && !_cm->has_overflown());
2057   }
2058 };
2059 
2060 // Implementation of AbstractRefProcTaskExecutor for parallel
2061 // reference processing at the end of G1 concurrent marking
2062 
2063 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2064 private:
2065   G1CollectedHeap* _g1h;
2066   ConcurrentMark*  _cm;
2067   WorkGang*        _workers;
2068   uint             _active_workers;
2069 
2070 public:
2071   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2072                           ConcurrentMark* cm,
2073                           WorkGang* workers,
2074                           uint n_workers) :
2075     _g1h(g1h), _cm(cm),
2076     _workers(workers), _active_workers(n_workers) { }
2077 
2078   // Executes the given task using concurrent marking worker threads.
2079   virtual void execute(ProcessTask& task);
2080   virtual void execute(EnqueueTask& task);
2081 };
2082 
2083 class G1CMRefProcTaskProxy: public AbstractGangTask {
2084   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2085   ProcessTask&     _proc_task;
2086   G1CollectedHeap* _g1h;
2087   ConcurrentMark*  _cm;
2088 
2089 public:
2090   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2091                      G1CollectedHeap* g1h,
2092                      ConcurrentMark* cm) :
2093     AbstractGangTask("Process reference objects in parallel"),
2094     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2095     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2096     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2097   }
2098 
2099   virtual void work(uint worker_id) {
2100     ResourceMark rm;
2101     HandleMark hm;
2102     CMTask* task = _cm->task(worker_id);
2103     G1CMIsAliveClosure g1_is_alive(_g1h);
2104     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2105     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2106 
2107     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2108   }
2109 };
2110 
2111 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2112   assert(_workers != NULL, "Need parallel worker threads.");
2113   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2114 
2115   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2116 
2117   // We need to reset the concurrency level before each
2118   // proxy task execution, so that the termination protocol
2119   // and overflow handling in CMTask::do_marking_step() knows
2120   // how many workers to wait for.
2121   _cm->set_concurrency(_active_workers);
2122   _workers->run_task(&proc_task_proxy);
2123 }
2124 
2125 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2126   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2127   EnqueueTask& _enq_task;
2128 
2129 public:
2130   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2131     AbstractGangTask("Enqueue reference objects in parallel"),
2132     _enq_task(enq_task) { }
2133 
2134   virtual void work(uint worker_id) {
2135     _enq_task.work(worker_id);
2136   }
2137 };
2138 
2139 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2140   assert(_workers != NULL, "Need parallel worker threads.");
2141   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2142 
2143   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2144 
2145   // Not strictly necessary but...
2146   //
2147   // We need to reset the concurrency level before each
2148   // proxy task execution, so that the termination protocol
2149   // and overflow handling in CMTask::do_marking_step() knows
2150   // how many workers to wait for.
2151   _cm->set_concurrency(_active_workers);
2152   _workers->run_task(&enq_task_proxy);
2153 }
2154 
2155 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2156   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2157 }
2158 
2159 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2160   if (has_overflown()) {
2161     // Skip processing the discovered references if we have
2162     // overflown the global marking stack. Reference objects
2163     // only get discovered once so it is OK to not
2164     // de-populate the discovered reference lists. We could have,
2165     // but the only benefit would be that, when marking restarts,
2166     // less reference objects are discovered.
2167     return;
2168   }
2169 
2170   ResourceMark rm;
2171   HandleMark   hm;
2172 
2173   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2174 
2175   // Is alive closure.
2176   G1CMIsAliveClosure g1_is_alive(g1h);
2177 
2178   // Inner scope to exclude the cleaning of the string and symbol
2179   // tables from the displayed time.
2180   {
2181     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2182 
2183     ReferenceProcessor* rp = g1h->ref_processor_cm();
2184 
2185     // See the comment in G1CollectedHeap::ref_processing_init()
2186     // about how reference processing currently works in G1.
2187 
2188     // Set the soft reference policy
2189     rp->setup_policy(clear_all_soft_refs);
2190     assert(_markStack.isEmpty(), "mark stack should be empty");
2191 
2192     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2193     // in serial reference processing. Note these closures are also
2194     // used for serially processing (by the the current thread) the
2195     // JNI references during parallel reference processing.
2196     //
2197     // These closures do not need to synchronize with the worker
2198     // threads involved in parallel reference processing as these
2199     // instances are executed serially by the current thread (e.g.
2200     // reference processing is not multi-threaded and is thus
2201     // performed by the current thread instead of a gang worker).
2202     //
2203     // The gang tasks involved in parallel reference processing create
2204     // their own instances of these closures, which do their own
2205     // synchronization among themselves.
2206     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2207     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2208 
2209     // We need at least one active thread. If reference processing
2210     // is not multi-threaded we use the current (VMThread) thread,
2211     // otherwise we use the work gang from the G1CollectedHeap and
2212     // we utilize all the worker threads we can.
2213     bool processing_is_mt = rp->processing_is_mt();
2214     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2215     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2216 
2217     // Parallel processing task executor.
2218     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2219                                               g1h->workers(), active_workers);
2220     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2221 
2222     // Set the concurrency level. The phase was already set prior to
2223     // executing the remark task.
2224     set_concurrency(active_workers);
2225 
2226     // Set the degree of MT processing here.  If the discovery was done MT,
2227     // the number of threads involved during discovery could differ from
2228     // the number of active workers.  This is OK as long as the discovered
2229     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2230     rp->set_active_mt_degree(active_workers);
2231 
2232     // Process the weak references.
2233     const ReferenceProcessorStats& stats =
2234         rp->process_discovered_references(&g1_is_alive,
2235                                           &g1_keep_alive,
2236                                           &g1_drain_mark_stack,
2237                                           executor,
2238                                           g1h->gc_timer_cm());
2239     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2240 
2241     // The do_oop work routines of the keep_alive and drain_marking_stack
2242     // oop closures will set the has_overflown flag if we overflow the
2243     // global marking stack.
2244 
2245     assert(_markStack.overflow() || _markStack.isEmpty(),
2246             "mark stack should be empty (unless it overflowed)");
2247 
2248     if (_markStack.overflow()) {
2249       // This should have been done already when we tried to push an
2250       // entry on to the global mark stack. But let's do it again.
2251       set_has_overflown();
2252     }
2253 
2254     assert(rp->num_q() == active_workers, "why not");
2255 
2256     rp->enqueue_discovered_references(executor);
2257 
2258     rp->verify_no_references_recorded();
2259     assert(!rp->discovery_enabled(), "Post condition");
2260   }
2261 
2262   if (has_overflown()) {
2263     // We can not trust g1_is_alive if the marking stack overflowed
2264     return;
2265   }
2266 
2267   assert(_markStack.isEmpty(), "Marking should have completed");
2268 
2269   // Unload Klasses, String, Symbols, Code Cache, etc.
2270   {
2271     G1CMTraceTime trace("Unloading", G1Log::finer());
2272 
2273     if (ClassUnloadingWithConcurrentMark) {
2274       bool purged_classes;
2275 
2276       {
2277         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2278         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2279       }
2280 
2281       {
2282         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2283         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2284       }
2285     }
2286 
2287     if (G1StringDedup::is_enabled()) {
2288       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2289       G1StringDedup::unlink(&g1_is_alive);
2290     }
2291   }
2292 }
2293 
2294 void ConcurrentMark::swapMarkBitMaps() {
2295   CMBitMapRO* temp = _prevMarkBitMap;
2296   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2297   _nextMarkBitMap  = (CMBitMap*)  temp;
2298 }
2299 
2300 // Closure for marking entries in SATB buffers.
2301 class CMSATBBufferClosure : public SATBBufferClosure {
2302 private:
2303   CMTask* _task;
2304   G1CollectedHeap* _g1h;
2305 
2306   // This is very similar to CMTask::deal_with_reference, but with
2307   // more relaxed requirements for the argument, so this must be more
2308   // circumspect about treating the argument as an object.
2309   void do_entry(void* entry) const {
2310     _task->increment_refs_reached();
2311     HeapRegion* hr = _g1h->heap_region_containing(entry);
2312     if (entry < hr->next_top_at_mark_start()) {
2313       // Until we get here, we don't know whether entry refers to a valid
2314       // object; it could instead have been a stale reference.
2315       oop obj = static_cast<oop>(entry);
2316       assert(obj->is_oop(true /* ignore mark word */),
2317              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2318       _task->make_reference_grey(obj, hr);
2319     }
2320   }
2321 
2322 public:
2323   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2324     : _task(task), _g1h(g1h) { }
2325 
2326   virtual void do_buffer(void** buffer, size_t size) {
2327     for (size_t i = 0; i < size; ++i) {
2328       do_entry(buffer[i]);
2329     }
2330   }
2331 };
2332 
2333 class G1RemarkThreadsClosure : public ThreadClosure {
2334   CMSATBBufferClosure _cm_satb_cl;
2335   G1CMOopClosure _cm_cl;
2336   MarkingCodeBlobClosure _code_cl;
2337   int _thread_parity;
2338 
2339  public:
2340   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2341     _cm_satb_cl(task, g1h),
2342     _cm_cl(g1h, g1h->concurrent_mark(), task),
2343     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2344     _thread_parity(Threads::thread_claim_parity()) {}
2345 
2346   void do_thread(Thread* thread) {
2347     if (thread->is_Java_thread()) {
2348       if (thread->claim_oops_do(true, _thread_parity)) {
2349         JavaThread* jt = (JavaThread*)thread;
2350 
2351         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2352         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2353         // * Alive if on the stack of an executing method
2354         // * Weakly reachable otherwise
2355         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2356         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2357         jt->nmethods_do(&_code_cl);
2358 
2359         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2360       }
2361     } else if (thread->is_VM_thread()) {
2362       if (thread->claim_oops_do(true, _thread_parity)) {
2363         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2364       }
2365     }
2366   }
2367 };
2368 
2369 class CMRemarkTask: public AbstractGangTask {
2370 private:
2371   ConcurrentMark* _cm;
2372 public:
2373   void work(uint worker_id) {
2374     // Since all available tasks are actually started, we should
2375     // only proceed if we're supposed to be active.
2376     if (worker_id < _cm->active_tasks()) {
2377       CMTask* task = _cm->task(worker_id);
2378       task->record_start_time();
2379       {
2380         ResourceMark rm;
2381         HandleMark hm;
2382 
2383         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2384         Threads::threads_do(&threads_f);
2385       }
2386 
2387       do {
2388         task->do_marking_step(1000000000.0 /* something very large */,
2389                               true         /* do_termination       */,
2390                               false        /* is_serial            */);
2391       } while (task->has_aborted() && !_cm->has_overflown());
2392       // If we overflow, then we do not want to restart. We instead
2393       // want to abort remark and do concurrent marking again.
2394       task->record_end_time();
2395     }
2396   }
2397 
2398   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2399     AbstractGangTask("Par Remark"), _cm(cm) {
2400     _cm->terminator()->reset_for_reuse(active_workers);
2401   }
2402 };
2403 
2404 void ConcurrentMark::checkpointRootsFinalWork() {
2405   ResourceMark rm;
2406   HandleMark   hm;
2407   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2408 
2409   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2410 
2411   g1h->ensure_parsability(false);
2412 
2413   // this is remark, so we'll use up all active threads
2414   uint active_workers = g1h->workers()->active_workers();
2415   set_concurrency_and_phase(active_workers, false /* concurrent */);
2416   // Leave _parallel_marking_threads at it's
2417   // value originally calculated in the ConcurrentMark
2418   // constructor and pass values of the active workers
2419   // through the gang in the task.
2420 
2421   {
2422     StrongRootsScope srs(active_workers);
2423 
2424     CMRemarkTask remarkTask(this, active_workers);
2425     // We will start all available threads, even if we decide that the
2426     // active_workers will be fewer. The extra ones will just bail out
2427     // immediately.
2428     g1h->workers()->run_task(&remarkTask);
2429   }
2430 
2431   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2432   guarantee(has_overflown() ||
2433             satb_mq_set.completed_buffers_num() == 0,
2434             "Invariant: has_overflown = %s, num buffers = %d",
2435             BOOL_TO_STR(has_overflown()),
2436             satb_mq_set.completed_buffers_num());
2437 
2438   print_stats();
2439 }
2440 
2441 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2442   // Note we are overriding the read-only view of the prev map here, via
2443   // the cast.
2444   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2445 }
2446 
2447 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2448   _nextMarkBitMap->clearRange(mr);
2449 }
2450 
2451 HeapRegion*
2452 ConcurrentMark::claim_region(uint worker_id) {
2453   // "checkpoint" the finger
2454   HeapWord* finger = _finger;
2455 
2456   // _heap_end will not change underneath our feet; it only changes at
2457   // yield points.
2458   while (finger < _heap_end) {
2459     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2460 
2461     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2462 
2463     // Above heap_region_containing may return NULL as we always scan claim
2464     // until the end of the heap. In this case, just jump to the next region.
2465     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2466 
2467     // Is the gap between reading the finger and doing the CAS too long?
2468     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2469     if (res == finger && curr_region != NULL) {
2470       // we succeeded
2471       HeapWord*   bottom        = curr_region->bottom();
2472       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2473 
2474       // notice that _finger == end cannot be guaranteed here since,
2475       // someone else might have moved the finger even further
2476       assert(_finger >= end, "the finger should have moved forward");
2477 
2478       if (limit > bottom) {
2479         return curr_region;
2480       } else {
2481         assert(limit == bottom,
2482                "the region limit should be at bottom");
2483         // we return NULL and the caller should try calling
2484         // claim_region() again.
2485         return NULL;
2486       }
2487     } else {
2488       assert(_finger > finger, "the finger should have moved forward");
2489       // read it again
2490       finger = _finger;
2491     }
2492   }
2493 
2494   return NULL;
2495 }
2496 
2497 #ifndef PRODUCT
2498 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2499 private:
2500   G1CollectedHeap* _g1h;
2501   const char* _phase;
2502   int _info;
2503 
2504 public:
2505   VerifyNoCSetOops(const char* phase, int info = -1) :
2506     _g1h(G1CollectedHeap::heap()),
2507     _phase(phase),
2508     _info(info)
2509   { }
2510 
2511   void operator()(oop obj) const {
2512     guarantee(obj->is_oop(),
2513               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2514               p2i(obj), _phase, _info);
2515     guarantee(!_g1h->obj_in_cs(obj),
2516               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2517               p2i(obj), _phase, _info);
2518   }
2519 };
2520 
2521 void ConcurrentMark::verify_no_cset_oops() {
2522   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2523   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2524     return;
2525   }
2526 
2527   // Verify entries on the global mark stack
2528   _markStack.iterate(VerifyNoCSetOops("Stack"));
2529 
2530   // Verify entries on the task queues
2531   for (uint i = 0; i < _max_worker_id; ++i) {
2532     CMTaskQueue* queue = _task_queues->queue(i);
2533     queue->iterate(VerifyNoCSetOops("Queue", i));
2534   }
2535 
2536   // Verify the global finger
2537   HeapWord* global_finger = finger();
2538   if (global_finger != NULL && global_finger < _heap_end) {
2539     // Since we always iterate over all regions, we might get a NULL HeapRegion
2540     // here.
2541     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2542     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2543               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2544               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2545   }
2546 
2547   // Verify the task fingers
2548   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2549   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2550     CMTask* task = _tasks[i];
2551     HeapWord* task_finger = task->finger();
2552     if (task_finger != NULL && task_finger < _heap_end) {
2553       // See above note on the global finger verification.
2554       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2555       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2556                 !task_hr->in_collection_set(),
2557                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2558                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2559     }
2560   }
2561 }
2562 #endif // PRODUCT
2563 
2564 // Aggregate the counting data that was constructed concurrently
2565 // with marking.
2566 class AggregateCountDataHRClosure: public HeapRegionClosure {
2567   G1CollectedHeap* _g1h;
2568   ConcurrentMark* _cm;
2569   CardTableModRefBS* _ct_bs;
2570   BitMap* _cm_card_bm;
2571   uint _max_worker_id;
2572 
2573  public:
2574   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2575                               BitMap* cm_card_bm,
2576                               uint max_worker_id) :
2577     _g1h(g1h), _cm(g1h->concurrent_mark()),
2578     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2579     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2580 
2581   bool doHeapRegion(HeapRegion* hr) {
2582     HeapWord* start = hr->bottom();
2583     HeapWord* limit = hr->next_top_at_mark_start();
2584     HeapWord* end = hr->end();
2585 
2586     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2587            "Preconditions not met - "
2588            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2589            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2590            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2591 
2592     assert(hr->next_marked_bytes() == 0, "Precondition");
2593 
2594     if (start == limit) {
2595       // NTAMS of this region has not been set so nothing to do.
2596       return false;
2597     }
2598 
2599     // 'start' should be in the heap.
2600     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2601     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2602     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2603 
2604     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2605     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2606     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2607 
2608     // If ntams is not card aligned then we bump card bitmap index
2609     // for limit so that we get the all the cards spanned by
2610     // the object ending at ntams.
2611     // Note: if this is the last region in the heap then ntams
2612     // could be actually just beyond the end of the the heap;
2613     // limit_idx will then  correspond to a (non-existent) card
2614     // that is also outside the heap.
2615     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2616       limit_idx += 1;
2617     }
2618 
2619     assert(limit_idx <= end_idx, "or else use atomics");
2620 
2621     // Aggregate the "stripe" in the count data associated with hr.
2622     uint hrm_index = hr->hrm_index();
2623     size_t marked_bytes = 0;
2624 
2625     for (uint i = 0; i < _max_worker_id; i += 1) {
2626       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2627       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2628 
2629       // Fetch the marked_bytes in this region for task i and
2630       // add it to the running total for this region.
2631       marked_bytes += marked_bytes_array[hrm_index];
2632 
2633       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2634       // into the global card bitmap.
2635       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2636 
2637       while (scan_idx < limit_idx) {
2638         assert(task_card_bm->at(scan_idx) == true, "should be");
2639         _cm_card_bm->set_bit(scan_idx);
2640         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2641 
2642         // BitMap::get_next_one_offset() can handle the case when
2643         // its left_offset parameter is greater than its right_offset
2644         // parameter. It does, however, have an early exit if
2645         // left_offset == right_offset. So let's limit the value
2646         // passed in for left offset here.
2647         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2648         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2649       }
2650     }
2651 
2652     // Update the marked bytes for this region.
2653     hr->add_to_marked_bytes(marked_bytes);
2654 
2655     // Next heap region
2656     return false;
2657   }
2658 };
2659 
2660 class G1AggregateCountDataTask: public AbstractGangTask {
2661 protected:
2662   G1CollectedHeap* _g1h;
2663   ConcurrentMark* _cm;
2664   BitMap* _cm_card_bm;
2665   uint _max_worker_id;
2666   uint _active_workers;
2667   HeapRegionClaimer _hrclaimer;
2668 
2669 public:
2670   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2671                            ConcurrentMark* cm,
2672                            BitMap* cm_card_bm,
2673                            uint max_worker_id,
2674                            uint n_workers) :
2675       AbstractGangTask("Count Aggregation"),
2676       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2677       _max_worker_id(max_worker_id),
2678       _active_workers(n_workers),
2679       _hrclaimer(_active_workers) {
2680   }
2681 
2682   void work(uint worker_id) {
2683     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2684 
2685     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2686   }
2687 };
2688 
2689 
2690 void ConcurrentMark::aggregate_count_data() {
2691   uint n_workers = _g1h->workers()->active_workers();
2692 
2693   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2694                                            _max_worker_id, n_workers);
2695 
2696   _g1h->workers()->run_task(&g1_par_agg_task);
2697 }
2698 
2699 // Clear the per-worker arrays used to store the per-region counting data
2700 void ConcurrentMark::clear_all_count_data() {
2701   // Clear the global card bitmap - it will be filled during
2702   // liveness count aggregation (during remark) and the
2703   // final counting task.
2704   _card_bm.clear();
2705 
2706   // Clear the global region bitmap - it will be filled as part
2707   // of the final counting task.
2708   _region_bm.clear();
2709 
2710   uint max_regions = _g1h->max_regions();
2711   assert(_max_worker_id > 0, "uninitialized");
2712 
2713   for (uint i = 0; i < _max_worker_id; i += 1) {
2714     BitMap* task_card_bm = count_card_bitmap_for(i);
2715     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2716 
2717     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2718     assert(marked_bytes_array != NULL, "uninitialized");
2719 
2720     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2721     task_card_bm->clear();
2722   }
2723 }
2724 
2725 void ConcurrentMark::print_stats() {
2726   if (G1MarkingVerboseLevel > 0) {
2727     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2728     for (size_t i = 0; i < _active_tasks; ++i) {
2729       _tasks[i]->print_stats();
2730       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2731     }
2732   }
2733 }
2734 
2735 // abandon current marking iteration due to a Full GC
2736 void ConcurrentMark::abort() {
2737   if (!cmThread()->during_cycle() || _has_aborted) {
2738     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2739     return;
2740   }
2741 
2742   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2743   // concurrent bitmap clearing.
2744   _nextMarkBitMap->clearAll();
2745 
2746   // Note we cannot clear the previous marking bitmap here
2747   // since VerifyDuringGC verifies the objects marked during
2748   // a full GC against the previous bitmap.
2749 
2750   // Clear the liveness counting data
2751   clear_all_count_data();
2752   // Empty mark stack
2753   reset_marking_state();
2754   for (uint i = 0; i < _max_worker_id; ++i) {
2755     _tasks[i]->clear_region_fields();
2756   }
2757   _first_overflow_barrier_sync.abort();
2758   _second_overflow_barrier_sync.abort();
2759   _has_aborted = true;
2760 
2761   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2762   satb_mq_set.abandon_partial_marking();
2763   // This can be called either during or outside marking, we'll read
2764   // the expected_active value from the SATB queue set.
2765   satb_mq_set.set_active_all_threads(
2766                                  false, /* new active value */
2767                                  satb_mq_set.is_active() /* expected_active */);
2768 
2769   _g1h->trace_heap_after_concurrent_cycle();
2770   _g1h->register_concurrent_cycle_end();
2771 }
2772 
2773 static void print_ms_time_info(const char* prefix, const char* name,
2774                                NumberSeq& ns) {
2775   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2776                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2777   if (ns.num() > 0) {
2778     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2779                            prefix, ns.sd(), ns.maximum());
2780   }
2781 }
2782 
2783 void ConcurrentMark::print_summary_info() {
2784   gclog_or_tty->print_cr(" Concurrent marking:");
2785   print_ms_time_info("  ", "init marks", _init_times);
2786   print_ms_time_info("  ", "remarks", _remark_times);
2787   {
2788     print_ms_time_info("     ", "final marks", _remark_mark_times);
2789     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2790 
2791   }
2792   print_ms_time_info("  ", "cleanups", _cleanup_times);
2793   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2794                          _total_counting_time,
2795                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2796                           (double)_cleanup_times.num()
2797                          : 0.0));
2798   if (G1ScrubRemSets) {
2799     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2800                            _total_rs_scrub_time,
2801                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2802                             (double)_cleanup_times.num()
2803                            : 0.0));
2804   }
2805   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2806                          (_init_times.sum() + _remark_times.sum() +
2807                           _cleanup_times.sum())/1000.0);
2808   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2809                 "(%8.2f s marking).",
2810                 cmThread()->vtime_accum(),
2811                 cmThread()->vtime_mark_accum());
2812 }
2813 
2814 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2815   _parallel_workers->print_worker_threads_on(st);
2816 }
2817 
2818 void ConcurrentMark::print_on_error(outputStream* st) const {
2819   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2820       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2821   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2822   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2823 }
2824 
2825 // We take a break if someone is trying to stop the world.
2826 bool ConcurrentMark::do_yield_check(uint worker_id) {
2827   if (SuspendibleThreadSet::should_yield()) {
2828     if (worker_id == 0) {
2829       _g1h->g1_policy()->record_concurrent_pause();
2830     }
2831     SuspendibleThreadSet::yield();
2832     return true;
2833   } else {
2834     return false;
2835   }
2836 }
2837 
2838 // Closure for iteration over bitmaps
2839 class CMBitMapClosure : public BitMapClosure {
2840 private:
2841   // the bitmap that is being iterated over
2842   CMBitMap*                   _nextMarkBitMap;
2843   ConcurrentMark*             _cm;
2844   CMTask*                     _task;
2845 
2846 public:
2847   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2848     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2849 
2850   bool do_bit(size_t offset) {
2851     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2852     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2853     assert( addr < _cm->finger(), "invariant");
2854     assert(addr >= _task->finger(), "invariant");
2855 
2856     // We move that task's local finger along.
2857     _task->move_finger_to(addr);
2858 
2859     _task->scan_object(oop(addr));
2860     // we only partially drain the local queue and global stack
2861     _task->drain_local_queue(true);
2862     _task->drain_global_stack(true);
2863 
2864     // if the has_aborted flag has been raised, we need to bail out of
2865     // the iteration
2866     return !_task->has_aborted();
2867   }
2868 };
2869 
2870 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2871   ReferenceProcessor* result = NULL;
2872   if (G1UseConcMarkReferenceProcessing) {
2873     result = g1h->ref_processor_cm();
2874     assert(result != NULL, "should not be NULL");
2875   }
2876   return result;
2877 }
2878 
2879 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2880                                ConcurrentMark* cm,
2881                                CMTask* task)
2882   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2883     _g1h(g1h), _cm(cm), _task(task)
2884 { }
2885 
2886 void CMTask::setup_for_region(HeapRegion* hr) {
2887   assert(hr != NULL,
2888         "claim_region() should have filtered out NULL regions");
2889   _curr_region  = hr;
2890   _finger       = hr->bottom();
2891   update_region_limit();
2892 }
2893 
2894 void CMTask::update_region_limit() {
2895   HeapRegion* hr            = _curr_region;
2896   HeapWord* bottom          = hr->bottom();
2897   HeapWord* limit           = hr->next_top_at_mark_start();
2898 
2899   if (limit == bottom) {
2900     // The region was collected underneath our feet.
2901     // We set the finger to bottom to ensure that the bitmap
2902     // iteration that will follow this will not do anything.
2903     // (this is not a condition that holds when we set the region up,
2904     // as the region is not supposed to be empty in the first place)
2905     _finger = bottom;
2906   } else if (limit >= _region_limit) {
2907     assert(limit >= _finger, "peace of mind");
2908   } else {
2909     assert(limit < _region_limit, "only way to get here");
2910     // This can happen under some pretty unusual circumstances.  An
2911     // evacuation pause empties the region underneath our feet (NTAMS
2912     // at bottom). We then do some allocation in the region (NTAMS
2913     // stays at bottom), followed by the region being used as a GC
2914     // alloc region (NTAMS will move to top() and the objects
2915     // originally below it will be grayed). All objects now marked in
2916     // the region are explicitly grayed, if below the global finger,
2917     // and we do not need in fact to scan anything else. So, we simply
2918     // set _finger to be limit to ensure that the bitmap iteration
2919     // doesn't do anything.
2920     _finger = limit;
2921   }
2922 
2923   _region_limit = limit;
2924 }
2925 
2926 void CMTask::giveup_current_region() {
2927   assert(_curr_region != NULL, "invariant");
2928   clear_region_fields();
2929 }
2930 
2931 void CMTask::clear_region_fields() {
2932   // Values for these three fields that indicate that we're not
2933   // holding on to a region.
2934   _curr_region   = NULL;
2935   _finger        = NULL;
2936   _region_limit  = NULL;
2937 }
2938 
2939 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2940   if (cm_oop_closure == NULL) {
2941     assert(_cm_oop_closure != NULL, "invariant");
2942   } else {
2943     assert(_cm_oop_closure == NULL, "invariant");
2944   }
2945   _cm_oop_closure = cm_oop_closure;
2946 }
2947 
2948 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2949   guarantee(nextMarkBitMap != NULL, "invariant");
2950   _nextMarkBitMap                = nextMarkBitMap;
2951   clear_region_fields();
2952 
2953   _calls                         = 0;
2954   _elapsed_time_ms               = 0.0;
2955   _termination_time_ms           = 0.0;
2956   _termination_start_time_ms     = 0.0;
2957 }
2958 
2959 bool CMTask::should_exit_termination() {
2960   regular_clock_call();
2961   // This is called when we are in the termination protocol. We should
2962   // quit if, for some reason, this task wants to abort or the global
2963   // stack is not empty (this means that we can get work from it).
2964   return !_cm->mark_stack_empty() || has_aborted();
2965 }
2966 
2967 void CMTask::reached_limit() {
2968   assert(_words_scanned >= _words_scanned_limit ||
2969          _refs_reached >= _refs_reached_limit ,
2970          "shouldn't have been called otherwise");
2971   regular_clock_call();
2972 }
2973 
2974 void CMTask::regular_clock_call() {
2975   if (has_aborted()) return;
2976 
2977   // First, we need to recalculate the words scanned and refs reached
2978   // limits for the next clock call.
2979   recalculate_limits();
2980 
2981   // During the regular clock call we do the following
2982 
2983   // (1) If an overflow has been flagged, then we abort.
2984   if (_cm->has_overflown()) {
2985     set_has_aborted();
2986     return;
2987   }
2988 
2989   // If we are not concurrent (i.e. we're doing remark) we don't need
2990   // to check anything else. The other steps are only needed during
2991   // the concurrent marking phase.
2992   if (!concurrent()) return;
2993 
2994   // (2) If marking has been aborted for Full GC, then we also abort.
2995   if (_cm->has_aborted()) {
2996     set_has_aborted();
2997     return;
2998   }
2999 
3000   double curr_time_ms = os::elapsedVTime() * 1000.0;
3001 
3002   // (4) We check whether we should yield. If we have to, then we abort.
3003   if (SuspendibleThreadSet::should_yield()) {
3004     // We should yield. To do this we abort the task. The caller is
3005     // responsible for yielding.
3006     set_has_aborted();
3007     return;
3008   }
3009 
3010   // (5) We check whether we've reached our time quota. If we have,
3011   // then we abort.
3012   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3013   if (elapsed_time_ms > _time_target_ms) {
3014     set_has_aborted();
3015     _has_timed_out = true;
3016     return;
3017   }
3018 
3019   // (6) Finally, we check whether there are enough completed STAB
3020   // buffers available for processing. If there are, we abort.
3021   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3022   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3023     // we do need to process SATB buffers, we'll abort and restart
3024     // the marking task to do so
3025     set_has_aborted();
3026     return;
3027   }
3028 }
3029 
3030 void CMTask::recalculate_limits() {
3031   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3032   _words_scanned_limit      = _real_words_scanned_limit;
3033 
3034   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3035   _refs_reached_limit       = _real_refs_reached_limit;
3036 }
3037 
3038 void CMTask::decrease_limits() {
3039   // This is called when we believe that we're going to do an infrequent
3040   // operation which will increase the per byte scanned cost (i.e. move
3041   // entries to/from the global stack). It basically tries to decrease the
3042   // scanning limit so that the clock is called earlier.
3043 
3044   _words_scanned_limit = _real_words_scanned_limit -
3045     3 * words_scanned_period / 4;
3046   _refs_reached_limit  = _real_refs_reached_limit -
3047     3 * refs_reached_period / 4;
3048 }
3049 
3050 void CMTask::move_entries_to_global_stack() {
3051   // local array where we'll store the entries that will be popped
3052   // from the local queue
3053   oop buffer[global_stack_transfer_size];
3054 
3055   int n = 0;
3056   oop obj;
3057   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3058     buffer[n] = obj;
3059     ++n;
3060   }
3061 
3062   if (n > 0) {
3063     // we popped at least one entry from the local queue
3064 
3065     if (!_cm->mark_stack_push(buffer, n)) {
3066       set_has_aborted();
3067     }
3068   }
3069 
3070   // this operation was quite expensive, so decrease the limits
3071   decrease_limits();
3072 }
3073 
3074 void CMTask::get_entries_from_global_stack() {
3075   // local array where we'll store the entries that will be popped
3076   // from the global stack.
3077   oop buffer[global_stack_transfer_size];
3078   int n;
3079   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3080   assert(n <= global_stack_transfer_size,
3081          "we should not pop more than the given limit");
3082   if (n > 0) {
3083     // yes, we did actually pop at least one entry
3084     for (int i = 0; i < n; ++i) {
3085       bool success = _task_queue->push(buffer[i]);
3086       // We only call this when the local queue is empty or under a
3087       // given target limit. So, we do not expect this push to fail.
3088       assert(success, "invariant");
3089     }
3090   }
3091 
3092   // this operation was quite expensive, so decrease the limits
3093   decrease_limits();
3094 }
3095 
3096 void CMTask::drain_local_queue(bool partially) {
3097   if (has_aborted()) return;
3098 
3099   // Decide what the target size is, depending whether we're going to
3100   // drain it partially (so that other tasks can steal if they run out
3101   // of things to do) or totally (at the very end).
3102   size_t target_size;
3103   if (partially) {
3104     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3105   } else {
3106     target_size = 0;
3107   }
3108 
3109   if (_task_queue->size() > target_size) {
3110     oop obj;
3111     bool ret = _task_queue->pop_local(obj);
3112     while (ret) {
3113       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3114       assert(!_g1h->is_on_master_free_list(
3115                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3116 
3117       scan_object(obj);
3118 
3119       if (_task_queue->size() <= target_size || has_aborted()) {
3120         ret = false;
3121       } else {
3122         ret = _task_queue->pop_local(obj);
3123       }
3124     }
3125   }
3126 }
3127 
3128 void CMTask::drain_global_stack(bool partially) {
3129   if (has_aborted()) return;
3130 
3131   // We have a policy to drain the local queue before we attempt to
3132   // drain the global stack.
3133   assert(partially || _task_queue->size() == 0, "invariant");
3134 
3135   // Decide what the target size is, depending whether we're going to
3136   // drain it partially (so that other tasks can steal if they run out
3137   // of things to do) or totally (at the very end).  Notice that,
3138   // because we move entries from the global stack in chunks or
3139   // because another task might be doing the same, we might in fact
3140   // drop below the target. But, this is not a problem.
3141   size_t target_size;
3142   if (partially) {
3143     target_size = _cm->partial_mark_stack_size_target();
3144   } else {
3145     target_size = 0;
3146   }
3147 
3148   if (_cm->mark_stack_size() > target_size) {
3149     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3150       get_entries_from_global_stack();
3151       drain_local_queue(partially);
3152     }
3153   }
3154 }
3155 
3156 // SATB Queue has several assumptions on whether to call the par or
3157 // non-par versions of the methods. this is why some of the code is
3158 // replicated. We should really get rid of the single-threaded version
3159 // of the code to simplify things.
3160 void CMTask::drain_satb_buffers() {
3161   if (has_aborted()) return;
3162 
3163   // We set this so that the regular clock knows that we're in the
3164   // middle of draining buffers and doesn't set the abort flag when it
3165   // notices that SATB buffers are available for draining. It'd be
3166   // very counter productive if it did that. :-)
3167   _draining_satb_buffers = true;
3168 
3169   CMSATBBufferClosure satb_cl(this, _g1h);
3170   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3171 
3172   // This keeps claiming and applying the closure to completed buffers
3173   // until we run out of buffers or we need to abort.
3174   while (!has_aborted() &&
3175          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3176     regular_clock_call();
3177   }
3178 
3179   _draining_satb_buffers = false;
3180 
3181   assert(has_aborted() ||
3182          concurrent() ||
3183          satb_mq_set.completed_buffers_num() == 0, "invariant");
3184 
3185   // again, this was a potentially expensive operation, decrease the
3186   // limits to get the regular clock call early
3187   decrease_limits();
3188 }
3189 
3190 void CMTask::print_stats() {
3191   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3192                          _worker_id, _calls);
3193   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3194                          _elapsed_time_ms, _termination_time_ms);
3195   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3196                          _step_times_ms.num(), _step_times_ms.avg(),
3197                          _step_times_ms.sd());
3198   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3199                          _step_times_ms.maximum(), _step_times_ms.sum());
3200 }
3201 
3202 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3203   return _task_queues->steal(worker_id, hash_seed, obj);
3204 }
3205 
3206 /*****************************************************************************
3207 
3208     The do_marking_step(time_target_ms, ...) method is the building
3209     block of the parallel marking framework. It can be called in parallel
3210     with other invocations of do_marking_step() on different tasks
3211     (but only one per task, obviously) and concurrently with the
3212     mutator threads, or during remark, hence it eliminates the need
3213     for two versions of the code. When called during remark, it will
3214     pick up from where the task left off during the concurrent marking
3215     phase. Interestingly, tasks are also claimable during evacuation
3216     pauses too, since do_marking_step() ensures that it aborts before
3217     it needs to yield.
3218 
3219     The data structures that it uses to do marking work are the
3220     following:
3221 
3222       (1) Marking Bitmap. If there are gray objects that appear only
3223       on the bitmap (this happens either when dealing with an overflow
3224       or when the initial marking phase has simply marked the roots
3225       and didn't push them on the stack), then tasks claim heap
3226       regions whose bitmap they then scan to find gray objects. A
3227       global finger indicates where the end of the last claimed region
3228       is. A local finger indicates how far into the region a task has
3229       scanned. The two fingers are used to determine how to gray an
3230       object (i.e. whether simply marking it is OK, as it will be
3231       visited by a task in the future, or whether it needs to be also
3232       pushed on a stack).
3233 
3234       (2) Local Queue. The local queue of the task which is accessed
3235       reasonably efficiently by the task. Other tasks can steal from
3236       it when they run out of work. Throughout the marking phase, a
3237       task attempts to keep its local queue short but not totally
3238       empty, so that entries are available for stealing by other
3239       tasks. Only when there is no more work, a task will totally
3240       drain its local queue.
3241 
3242       (3) Global Mark Stack. This handles local queue overflow. During
3243       marking only sets of entries are moved between it and the local
3244       queues, as access to it requires a mutex and more fine-grain
3245       interaction with it which might cause contention. If it
3246       overflows, then the marking phase should restart and iterate
3247       over the bitmap to identify gray objects. Throughout the marking
3248       phase, tasks attempt to keep the global mark stack at a small
3249       length but not totally empty, so that entries are available for
3250       popping by other tasks. Only when there is no more work, tasks
3251       will totally drain the global mark stack.
3252 
3253       (4) SATB Buffer Queue. This is where completed SATB buffers are
3254       made available. Buffers are regularly removed from this queue
3255       and scanned for roots, so that the queue doesn't get too
3256       long. During remark, all completed buffers are processed, as
3257       well as the filled in parts of any uncompleted buffers.
3258 
3259     The do_marking_step() method tries to abort when the time target
3260     has been reached. There are a few other cases when the
3261     do_marking_step() method also aborts:
3262 
3263       (1) When the marking phase has been aborted (after a Full GC).
3264 
3265       (2) When a global overflow (on the global stack) has been
3266       triggered. Before the task aborts, it will actually sync up with
3267       the other tasks to ensure that all the marking data structures
3268       (local queues, stacks, fingers etc.)  are re-initialized so that
3269       when do_marking_step() completes, the marking phase can
3270       immediately restart.
3271 
3272       (3) When enough completed SATB buffers are available. The
3273       do_marking_step() method only tries to drain SATB buffers right
3274       at the beginning. So, if enough buffers are available, the
3275       marking step aborts and the SATB buffers are processed at
3276       the beginning of the next invocation.
3277 
3278       (4) To yield. when we have to yield then we abort and yield
3279       right at the end of do_marking_step(). This saves us from a lot
3280       of hassle as, by yielding we might allow a Full GC. If this
3281       happens then objects will be compacted underneath our feet, the
3282       heap might shrink, etc. We save checking for this by just
3283       aborting and doing the yield right at the end.
3284 
3285     From the above it follows that the do_marking_step() method should
3286     be called in a loop (or, otherwise, regularly) until it completes.
3287 
3288     If a marking step completes without its has_aborted() flag being
3289     true, it means it has completed the current marking phase (and
3290     also all other marking tasks have done so and have all synced up).
3291 
3292     A method called regular_clock_call() is invoked "regularly" (in
3293     sub ms intervals) throughout marking. It is this clock method that
3294     checks all the abort conditions which were mentioned above and
3295     decides when the task should abort. A work-based scheme is used to
3296     trigger this clock method: when the number of object words the
3297     marking phase has scanned or the number of references the marking
3298     phase has visited reach a given limit. Additional invocations to
3299     the method clock have been planted in a few other strategic places
3300     too. The initial reason for the clock method was to avoid calling
3301     vtime too regularly, as it is quite expensive. So, once it was in
3302     place, it was natural to piggy-back all the other conditions on it
3303     too and not constantly check them throughout the code.
3304 
3305     If do_termination is true then do_marking_step will enter its
3306     termination protocol.
3307 
3308     The value of is_serial must be true when do_marking_step is being
3309     called serially (i.e. by the VMThread) and do_marking_step should
3310     skip any synchronization in the termination and overflow code.
3311     Examples include the serial remark code and the serial reference
3312     processing closures.
3313 
3314     The value of is_serial must be false when do_marking_step is
3315     being called by any of the worker threads in a work gang.
3316     Examples include the concurrent marking code (CMMarkingTask),
3317     the MT remark code, and the MT reference processing closures.
3318 
3319  *****************************************************************************/
3320 
3321 void CMTask::do_marking_step(double time_target_ms,
3322                              bool do_termination,
3323                              bool is_serial) {
3324   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3325   assert(concurrent() == _cm->concurrent(), "they should be the same");
3326 
3327   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3328   assert(_task_queues != NULL, "invariant");
3329   assert(_task_queue != NULL, "invariant");
3330   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3331 
3332   assert(!_claimed,
3333          "only one thread should claim this task at any one time");
3334 
3335   // OK, this doesn't safeguard again all possible scenarios, as it is
3336   // possible for two threads to set the _claimed flag at the same
3337   // time. But it is only for debugging purposes anyway and it will
3338   // catch most problems.
3339   _claimed = true;
3340 
3341   _start_time_ms = os::elapsedVTime() * 1000.0;
3342 
3343   // If do_stealing is true then do_marking_step will attempt to
3344   // steal work from the other CMTasks. It only makes sense to
3345   // enable stealing when the termination protocol is enabled
3346   // and do_marking_step() is not being called serially.
3347   bool do_stealing = do_termination && !is_serial;
3348 
3349   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3350   _time_target_ms = time_target_ms - diff_prediction_ms;
3351 
3352   // set up the variables that are used in the work-based scheme to
3353   // call the regular clock method
3354   _words_scanned = 0;
3355   _refs_reached  = 0;
3356   recalculate_limits();
3357 
3358   // clear all flags
3359   clear_has_aborted();
3360   _has_timed_out = false;
3361   _draining_satb_buffers = false;
3362 
3363   ++_calls;
3364 
3365   // Set up the bitmap and oop closures. Anything that uses them is
3366   // eventually called from this method, so it is OK to allocate these
3367   // statically.
3368   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3369   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3370   set_cm_oop_closure(&cm_oop_closure);
3371 
3372   if (_cm->has_overflown()) {
3373     // This can happen if the mark stack overflows during a GC pause
3374     // and this task, after a yield point, restarts. We have to abort
3375     // as we need to get into the overflow protocol which happens
3376     // right at the end of this task.
3377     set_has_aborted();
3378   }
3379 
3380   // First drain any available SATB buffers. After this, we will not
3381   // look at SATB buffers before the next invocation of this method.
3382   // If enough completed SATB buffers are queued up, the regular clock
3383   // will abort this task so that it restarts.
3384   drain_satb_buffers();
3385   // ...then partially drain the local queue and the global stack
3386   drain_local_queue(true);
3387   drain_global_stack(true);
3388 
3389   do {
3390     if (!has_aborted() && _curr_region != NULL) {
3391       // This means that we're already holding on to a region.
3392       assert(_finger != NULL, "if region is not NULL, then the finger "
3393              "should not be NULL either");
3394 
3395       // We might have restarted this task after an evacuation pause
3396       // which might have evacuated the region we're holding on to
3397       // underneath our feet. Let's read its limit again to make sure
3398       // that we do not iterate over a region of the heap that
3399       // contains garbage (update_region_limit() will also move
3400       // _finger to the start of the region if it is found empty).
3401       update_region_limit();
3402       // We will start from _finger not from the start of the region,
3403       // as we might be restarting this task after aborting half-way
3404       // through scanning this region. In this case, _finger points to
3405       // the address where we last found a marked object. If this is a
3406       // fresh region, _finger points to start().
3407       MemRegion mr = MemRegion(_finger, _region_limit);
3408 
3409       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3410              "humongous regions should go around loop once only");
3411 
3412       // Some special cases:
3413       // If the memory region is empty, we can just give up the region.
3414       // If the current region is humongous then we only need to check
3415       // the bitmap for the bit associated with the start of the object,
3416       // scan the object if it's live, and give up the region.
3417       // Otherwise, let's iterate over the bitmap of the part of the region
3418       // that is left.
3419       // If the iteration is successful, give up the region.
3420       if (mr.is_empty()) {
3421         giveup_current_region();
3422         regular_clock_call();
3423       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3424         if (_nextMarkBitMap->isMarked(mr.start())) {
3425           // The object is marked - apply the closure
3426           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3427           bitmap_closure.do_bit(offset);
3428         }
3429         // Even if this task aborted while scanning the humongous object
3430         // we can (and should) give up the current region.
3431         giveup_current_region();
3432         regular_clock_call();
3433       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3434         giveup_current_region();
3435         regular_clock_call();
3436       } else {
3437         assert(has_aborted(), "currently the only way to do so");
3438         // The only way to abort the bitmap iteration is to return
3439         // false from the do_bit() method. However, inside the
3440         // do_bit() method we move the _finger to point to the
3441         // object currently being looked at. So, if we bail out, we
3442         // have definitely set _finger to something non-null.
3443         assert(_finger != NULL, "invariant");
3444 
3445         // Region iteration was actually aborted. So now _finger
3446         // points to the address of the object we last scanned. If we
3447         // leave it there, when we restart this task, we will rescan
3448         // the object. It is easy to avoid this. We move the finger by
3449         // enough to point to the next possible object header (the
3450         // bitmap knows by how much we need to move it as it knows its
3451         // granularity).
3452         assert(_finger < _region_limit, "invariant");
3453         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3454         // Check if bitmap iteration was aborted while scanning the last object
3455         if (new_finger >= _region_limit) {
3456           giveup_current_region();
3457         } else {
3458           move_finger_to(new_finger);
3459         }
3460       }
3461     }
3462     // At this point we have either completed iterating over the
3463     // region we were holding on to, or we have aborted.
3464 
3465     // We then partially drain the local queue and the global stack.
3466     // (Do we really need this?)
3467     drain_local_queue(true);
3468     drain_global_stack(true);
3469 
3470     // Read the note on the claim_region() method on why it might
3471     // return NULL with potentially more regions available for
3472     // claiming and why we have to check out_of_regions() to determine
3473     // whether we're done or not.
3474     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3475       // We are going to try to claim a new region. We should have
3476       // given up on the previous one.
3477       // Separated the asserts so that we know which one fires.
3478       assert(_curr_region  == NULL, "invariant");
3479       assert(_finger       == NULL, "invariant");
3480       assert(_region_limit == NULL, "invariant");
3481       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3482       if (claimed_region != NULL) {
3483         // Yes, we managed to claim one
3484         setup_for_region(claimed_region);
3485         assert(_curr_region == claimed_region, "invariant");
3486       }
3487       // It is important to call the regular clock here. It might take
3488       // a while to claim a region if, for example, we hit a large
3489       // block of empty regions. So we need to call the regular clock
3490       // method once round the loop to make sure it's called
3491       // frequently enough.
3492       regular_clock_call();
3493     }
3494 
3495     if (!has_aborted() && _curr_region == NULL) {
3496       assert(_cm->out_of_regions(),
3497              "at this point we should be out of regions");
3498     }
3499   } while ( _curr_region != NULL && !has_aborted());
3500 
3501   if (!has_aborted()) {
3502     // We cannot check whether the global stack is empty, since other
3503     // tasks might be pushing objects to it concurrently.
3504     assert(_cm->out_of_regions(),
3505            "at this point we should be out of regions");
3506     // Try to reduce the number of available SATB buffers so that
3507     // remark has less work to do.
3508     drain_satb_buffers();
3509   }
3510 
3511   // Since we've done everything else, we can now totally drain the
3512   // local queue and global stack.
3513   drain_local_queue(false);
3514   drain_global_stack(false);
3515 
3516   // Attempt at work stealing from other task's queues.
3517   if (do_stealing && !has_aborted()) {
3518     // We have not aborted. This means that we have finished all that
3519     // we could. Let's try to do some stealing...
3520 
3521     // We cannot check whether the global stack is empty, since other
3522     // tasks might be pushing objects to it concurrently.
3523     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3524            "only way to reach here");
3525     while (!has_aborted()) {
3526       oop obj;
3527       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3528         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3529                "any stolen object should be marked");
3530         scan_object(obj);
3531 
3532         // And since we're towards the end, let's totally drain the
3533         // local queue and global stack.
3534         drain_local_queue(false);
3535         drain_global_stack(false);
3536       } else {
3537         break;
3538       }
3539     }
3540   }
3541 
3542   // If we are about to wrap up and go into termination, check if we
3543   // should raise the overflow flag.
3544   if (do_termination && !has_aborted()) {
3545     if (_cm->force_overflow()->should_force()) {
3546       _cm->set_has_overflown();
3547       regular_clock_call();
3548     }
3549   }
3550 
3551   // We still haven't aborted. Now, let's try to get into the
3552   // termination protocol.
3553   if (do_termination && !has_aborted()) {
3554     // We cannot check whether the global stack is empty, since other
3555     // tasks might be concurrently pushing objects on it.
3556     // Separated the asserts so that we know which one fires.
3557     assert(_cm->out_of_regions(), "only way to reach here");
3558     assert(_task_queue->size() == 0, "only way to reach here");
3559     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3560 
3561     // The CMTask class also extends the TerminatorTerminator class,
3562     // hence its should_exit_termination() method will also decide
3563     // whether to exit the termination protocol or not.
3564     bool finished = (is_serial ||
3565                      _cm->terminator()->offer_termination(this));
3566     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3567     _termination_time_ms +=
3568       termination_end_time_ms - _termination_start_time_ms;
3569 
3570     if (finished) {
3571       // We're all done.
3572 
3573       if (_worker_id == 0) {
3574         // let's allow task 0 to do this
3575         if (concurrent()) {
3576           assert(_cm->concurrent_marking_in_progress(), "invariant");
3577           // we need to set this to false before the next
3578           // safepoint. This way we ensure that the marking phase
3579           // doesn't observe any more heap expansions.
3580           _cm->clear_concurrent_marking_in_progress();
3581         }
3582       }
3583 
3584       // We can now guarantee that the global stack is empty, since
3585       // all other tasks have finished. We separated the guarantees so
3586       // that, if a condition is false, we can immediately find out
3587       // which one.
3588       guarantee(_cm->out_of_regions(), "only way to reach here");
3589       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3590       guarantee(_task_queue->size() == 0, "only way to reach here");
3591       guarantee(!_cm->has_overflown(), "only way to reach here");
3592       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3593     } else {
3594       // Apparently there's more work to do. Let's abort this task. It
3595       // will restart it and we can hopefully find more things to do.
3596       set_has_aborted();
3597     }
3598   }
3599 
3600   // Mainly for debugging purposes to make sure that a pointer to the
3601   // closure which was statically allocated in this frame doesn't
3602   // escape it by accident.
3603   set_cm_oop_closure(NULL);
3604   double end_time_ms = os::elapsedVTime() * 1000.0;
3605   double elapsed_time_ms = end_time_ms - _start_time_ms;
3606   // Update the step history.
3607   _step_times_ms.add(elapsed_time_ms);
3608 
3609   if (has_aborted()) {
3610     // The task was aborted for some reason.
3611     if (_has_timed_out) {
3612       double diff_ms = elapsed_time_ms - _time_target_ms;
3613       // Keep statistics of how well we did with respect to hitting
3614       // our target only if we actually timed out (if we aborted for
3615       // other reasons, then the results might get skewed).
3616       _marking_step_diffs_ms.add(diff_ms);
3617     }
3618 
3619     if (_cm->has_overflown()) {
3620       // This is the interesting one. We aborted because a global
3621       // overflow was raised. This means we have to restart the
3622       // marking phase and start iterating over regions. However, in
3623       // order to do this we have to make sure that all tasks stop
3624       // what they are doing and re-initialize in a safe manner. We
3625       // will achieve this with the use of two barrier sync points.
3626 
3627       if (!is_serial) {
3628         // We only need to enter the sync barrier if being called
3629         // from a parallel context
3630         _cm->enter_first_sync_barrier(_worker_id);
3631 
3632         // When we exit this sync barrier we know that all tasks have
3633         // stopped doing marking work. So, it's now safe to
3634         // re-initialize our data structures. At the end of this method,
3635         // task 0 will clear the global data structures.
3636       }
3637 
3638       // We clear the local state of this task...
3639       clear_region_fields();
3640 
3641       if (!is_serial) {
3642         // ...and enter the second barrier.
3643         _cm->enter_second_sync_barrier(_worker_id);
3644       }
3645       // At this point, if we're during the concurrent phase of
3646       // marking, everything has been re-initialized and we're
3647       // ready to restart.
3648     }
3649   }
3650 
3651   _claimed = false;
3652 }
3653 
3654 CMTask::CMTask(uint worker_id,
3655                ConcurrentMark* cm,
3656                size_t* marked_bytes,
3657                BitMap* card_bm,
3658                CMTaskQueue* task_queue,
3659                CMTaskQueueSet* task_queues)
3660   : _g1h(G1CollectedHeap::heap()),
3661     _worker_id(worker_id), _cm(cm),
3662     _claimed(false),
3663     _nextMarkBitMap(NULL), _hash_seed(17),
3664     _task_queue(task_queue),
3665     _task_queues(task_queues),
3666     _cm_oop_closure(NULL),
3667     _marked_bytes_array(marked_bytes),
3668     _card_bm(card_bm) {
3669   guarantee(task_queue != NULL, "invariant");
3670   guarantee(task_queues != NULL, "invariant");
3671 
3672   _marking_step_diffs_ms.add(0.5);
3673 }
3674 
3675 // These are formatting macros that are used below to ensure
3676 // consistent formatting. The *_H_* versions are used to format the
3677 // header for a particular value and they should be kept consistent
3678 // with the corresponding macro. Also note that most of the macros add
3679 // the necessary white space (as a prefix) which makes them a bit
3680 // easier to compose.
3681 
3682 // All the output lines are prefixed with this string to be able to
3683 // identify them easily in a large log file.
3684 #define G1PPRL_LINE_PREFIX            "###"
3685 
3686 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3687 #ifdef _LP64
3688 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3689 #else // _LP64
3690 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3691 #endif // _LP64
3692 
3693 // For per-region info
3694 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3695 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3696 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3697 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3698 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3699 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3700 
3701 // For summary info
3702 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3703 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3704 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3705 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3706 
3707 G1PrintRegionLivenessInfoClosure::
3708 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3709   : _out(out),
3710     _total_used_bytes(0), _total_capacity_bytes(0),
3711     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3712     _hum_used_bytes(0), _hum_capacity_bytes(0),
3713     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3714     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3715   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3716   MemRegion g1_reserved = g1h->g1_reserved();
3717   double now = os::elapsedTime();
3718 
3719   // Print the header of the output.
3720   _out->cr();
3721   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3722   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3723                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3724                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3725                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3726                  HeapRegion::GrainBytes);
3727   _out->print_cr(G1PPRL_LINE_PREFIX);
3728   _out->print_cr(G1PPRL_LINE_PREFIX
3729                 G1PPRL_TYPE_H_FORMAT
3730                 G1PPRL_ADDR_BASE_H_FORMAT
3731                 G1PPRL_BYTE_H_FORMAT
3732                 G1PPRL_BYTE_H_FORMAT
3733                 G1PPRL_BYTE_H_FORMAT
3734                 G1PPRL_DOUBLE_H_FORMAT
3735                 G1PPRL_BYTE_H_FORMAT
3736                 G1PPRL_BYTE_H_FORMAT,
3737                 "type", "address-range",
3738                 "used", "prev-live", "next-live", "gc-eff",
3739                 "remset", "code-roots");
3740   _out->print_cr(G1PPRL_LINE_PREFIX
3741                 G1PPRL_TYPE_H_FORMAT
3742                 G1PPRL_ADDR_BASE_H_FORMAT
3743                 G1PPRL_BYTE_H_FORMAT
3744                 G1PPRL_BYTE_H_FORMAT
3745                 G1PPRL_BYTE_H_FORMAT
3746                 G1PPRL_DOUBLE_H_FORMAT
3747                 G1PPRL_BYTE_H_FORMAT
3748                 G1PPRL_BYTE_H_FORMAT,
3749                 "", "",
3750                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3751                 "(bytes)", "(bytes)");
3752 }
3753 
3754 // It takes as a parameter a reference to one of the _hum_* fields, it
3755 // deduces the corresponding value for a region in a humongous region
3756 // series (either the region size, or what's left if the _hum_* field
3757 // is < the region size), and updates the _hum_* field accordingly.
3758 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3759   size_t bytes = 0;
3760   // The > 0 check is to deal with the prev and next live bytes which
3761   // could be 0.
3762   if (*hum_bytes > 0) {
3763     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3764     *hum_bytes -= bytes;
3765   }
3766   return bytes;
3767 }
3768 
3769 // It deduces the values for a region in a humongous region series
3770 // from the _hum_* fields and updates those accordingly. It assumes
3771 // that that _hum_* fields have already been set up from the "starts
3772 // humongous" region and we visit the regions in address order.
3773 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3774                                                      size_t* capacity_bytes,
3775                                                      size_t* prev_live_bytes,
3776                                                      size_t* next_live_bytes) {
3777   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3778   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3779   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3780   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3781   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3782 }
3783 
3784 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3785   const char* type       = r->get_type_str();
3786   HeapWord* bottom       = r->bottom();
3787   HeapWord* end          = r->end();
3788   size_t capacity_bytes  = r->capacity();
3789   size_t used_bytes      = r->used();
3790   size_t prev_live_bytes = r->live_bytes();
3791   size_t next_live_bytes = r->next_live_bytes();
3792   double gc_eff          = r->gc_efficiency();
3793   size_t remset_bytes    = r->rem_set()->mem_size();
3794   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3795 
3796   if (r->is_starts_humongous()) {
3797     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3798            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3799            "they should have been zeroed after the last time we used them");
3800     // Set up the _hum_* fields.
3801     _hum_capacity_bytes  = capacity_bytes;
3802     _hum_used_bytes      = used_bytes;
3803     _hum_prev_live_bytes = prev_live_bytes;
3804     _hum_next_live_bytes = next_live_bytes;
3805     get_hum_bytes(&used_bytes, &capacity_bytes,
3806                   &prev_live_bytes, &next_live_bytes);
3807     end = bottom + HeapRegion::GrainWords;
3808   } else if (r->is_continues_humongous()) {
3809     get_hum_bytes(&used_bytes, &capacity_bytes,
3810                   &prev_live_bytes, &next_live_bytes);
3811     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3812   }
3813 
3814   _total_used_bytes      += used_bytes;
3815   _total_capacity_bytes  += capacity_bytes;
3816   _total_prev_live_bytes += prev_live_bytes;
3817   _total_next_live_bytes += next_live_bytes;
3818   _total_remset_bytes    += remset_bytes;
3819   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3820 
3821   // Print a line for this particular region.
3822   _out->print_cr(G1PPRL_LINE_PREFIX
3823                  G1PPRL_TYPE_FORMAT
3824                  G1PPRL_ADDR_BASE_FORMAT
3825                  G1PPRL_BYTE_FORMAT
3826                  G1PPRL_BYTE_FORMAT
3827                  G1PPRL_BYTE_FORMAT
3828                  G1PPRL_DOUBLE_FORMAT
3829                  G1PPRL_BYTE_FORMAT
3830                  G1PPRL_BYTE_FORMAT,
3831                  type, p2i(bottom), p2i(end),
3832                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3833                  remset_bytes, strong_code_roots_bytes);
3834 
3835   return false;
3836 }
3837 
3838 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3839   // add static memory usages to remembered set sizes
3840   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3841   // Print the footer of the output.
3842   _out->print_cr(G1PPRL_LINE_PREFIX);
3843   _out->print_cr(G1PPRL_LINE_PREFIX
3844                  " SUMMARY"
3845                  G1PPRL_SUM_MB_FORMAT("capacity")
3846                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3847                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3848                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3849                  G1PPRL_SUM_MB_FORMAT("remset")
3850                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3851                  bytes_to_mb(_total_capacity_bytes),
3852                  bytes_to_mb(_total_used_bytes),
3853                  perc(_total_used_bytes, _total_capacity_bytes),
3854                  bytes_to_mb(_total_prev_live_bytes),
3855                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3856                  bytes_to_mb(_total_next_live_bytes),
3857                  perc(_total_next_live_bytes, _total_capacity_bytes),
3858                  bytes_to_mb(_total_remset_bytes),
3859                  bytes_to_mb(_total_strong_code_roots_bytes));
3860   _out->cr();
3861 }