1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512 
 513   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 514   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 515 
 516   // Create & start a ConcurrentMark thread.
 517   _cmThread = new ConcurrentMarkThread(this);
 518   assert(cmThread() != NULL, "CM Thread should have been created");
 519   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 520   if (_cmThread->osthread() == NULL) {
 521       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 522   }
 523 
 524   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 525   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 526   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 527 
 528   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 529   satb_qs.set_buffer_size(G1SATBBufferSize);
 530 
 531   _root_regions.init(_g1h, this);
 532 
 533   if (ConcGCThreads > ParallelGCThreads) {
 534     warning("Can't have more ConcGCThreads (%u) "
 535             "than ParallelGCThreads (%u).",
 536             ConcGCThreads, ParallelGCThreads);
 537     return;
 538   }
 539   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 540     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 541     // if both are set
 542     _sleep_factor             = 0.0;
 543     _marking_task_overhead    = 1.0;
 544   } else if (G1MarkingOverheadPercent > 0) {
 545     // We will calculate the number of parallel marking threads based
 546     // on a target overhead with respect to the soft real-time goal
 547     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 548     double overall_cm_overhead =
 549       (double) MaxGCPauseMillis * marking_overhead /
 550       (double) GCPauseIntervalMillis;
 551     double cpu_ratio = 1.0 / (double) os::processor_count();
 552     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 553     double marking_task_overhead =
 554       overall_cm_overhead / marking_thread_num *
 555                                               (double) os::processor_count();
 556     double sleep_factor =
 557                        (1.0 - marking_task_overhead) / marking_task_overhead;
 558 
 559     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 560     _sleep_factor             = sleep_factor;
 561     _marking_task_overhead    = marking_task_overhead;
 562   } else {
 563     // Calculate the number of parallel marking threads by scaling
 564     // the number of parallel GC threads.
 565     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 566     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 567     _sleep_factor             = 0.0;
 568     _marking_task_overhead    = 1.0;
 569   }
 570 
 571   assert(ConcGCThreads > 0, "Should have been set");
 572   _parallel_marking_threads = ConcGCThreads;
 573   _max_parallel_marking_threads = _parallel_marking_threads;
 574 
 575   if (parallel_marking_threads() > 1) {
 576     _cleanup_task_overhead = 1.0;
 577   } else {
 578     _cleanup_task_overhead = marking_task_overhead();
 579   }
 580   _cleanup_sleep_factor =
 581                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 582 
 583 #if 0
 584   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 585   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 586   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 587   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 588   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 589 #endif
 590 
 591   _parallel_workers = new WorkGang("G1 Marker",
 592        _max_parallel_marking_threads, false, true);
 593   if (_parallel_workers == NULL) {
 594     vm_exit_during_initialization("Failed necessary allocation.");
 595   } else {
 596     _parallel_workers->initialize_workers();
 597   }
 598 
 599   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 600     size_t mark_stack_size =
 601       MIN2(MarkStackSizeMax,
 602           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 603     // Verify that the calculated value for MarkStackSize is in range.
 604     // It would be nice to use the private utility routine from Arguments.
 605     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 606       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 607               "must be between 1 and " SIZE_FORMAT,
 608               mark_stack_size, MarkStackSizeMax);
 609       return;
 610     }
 611     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 612   } else {
 613     // Verify MarkStackSize is in range.
 614     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 615       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 616         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 617           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 618                   "must be between 1 and " SIZE_FORMAT,
 619                   MarkStackSize, MarkStackSizeMax);
 620           return;
 621         }
 622       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 623         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 624           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 625                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 626                   MarkStackSize, MarkStackSizeMax);
 627           return;
 628         }
 629       }
 630     }
 631   }
 632 
 633   if (!_markStack.allocate(MarkStackSize)) {
 634     warning("Failed to allocate CM marking stack");
 635     return;
 636   }
 637 
 638   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 639   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 640 
 641   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 642   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 643 
 644   BitMap::idx_t card_bm_size = _card_bm.size();
 645 
 646   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 647   _active_tasks = _max_worker_id;
 648 
 649   uint max_regions = _g1h->max_regions();
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     CMTaskQueue* task_queue = new CMTaskQueue();
 652     task_queue->initialize();
 653     _task_queues->register_queue(i, task_queue);
 654 
 655     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 656     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 657 
 658     _tasks[i] = new CMTask(i, this,
 659                            _count_marked_bytes[i],
 660                            &_count_card_bitmaps[i],
 661                            task_queue, _task_queues);
 662 
 663     _accum_task_vtime[i] = 0.0;
 664   }
 665 
 666   // Calculate the card number for the bottom of the heap. Used
 667   // in biasing indexes into the accounting card bitmaps.
 668   _heap_bottom_card_num =
 669     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 670                                 CardTableModRefBS::card_shift);
 671 
 672   // Clear all the liveness counting data
 673   clear_all_count_data();
 674 
 675   // so that the call below can read a sensible value
 676   _heap_start = g1h->reserved_region().start();
 677   set_non_marking_state();
 678   _completed_initialization = true;
 679 }
 680 
 681 void ConcurrentMark::reset() {
 682   // Starting values for these two. This should be called in a STW
 683   // phase.
 684   MemRegion reserved = _g1h->g1_reserved();
 685   _heap_start = reserved.start();
 686   _heap_end   = reserved.end();
 687 
 688   // Separated the asserts so that we know which one fires.
 689   assert(_heap_start != NULL, "heap bounds should look ok");
 690   assert(_heap_end != NULL, "heap bounds should look ok");
 691   assert(_heap_start < _heap_end, "heap bounds should look ok");
 692 
 693   // Reset all the marking data structures and any necessary flags
 694   reset_marking_state();
 695 
 696   // We do reset all of them, since different phases will use
 697   // different number of active threads. So, it's easiest to have all
 698   // of them ready.
 699   for (uint i = 0; i < _max_worker_id; ++i) {
 700     _tasks[i]->reset(_nextMarkBitMap);
 701   }
 702 
 703   // we need this to make sure that the flag is on during the evac
 704   // pause with initial mark piggy-backed
 705   set_concurrent_marking_in_progress();
 706 }
 707 
 708 
 709 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 710   _markStack.set_should_expand();
 711   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 712   if (clear_overflow) {
 713     clear_has_overflown();
 714   } else {
 715     assert(has_overflown(), "pre-condition");
 716   }
 717   _finger = _heap_start;
 718 
 719   for (uint i = 0; i < _max_worker_id; ++i) {
 720     CMTaskQueue* queue = _task_queues->queue(i);
 721     queue->set_empty();
 722   }
 723 }
 724 
 725 void ConcurrentMark::set_concurrency(uint active_tasks) {
 726   assert(active_tasks <= _max_worker_id, "we should not have more");
 727 
 728   _active_tasks = active_tasks;
 729   // Need to update the three data structures below according to the
 730   // number of active threads for this phase.
 731   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 732   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 733   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 734 }
 735 
 736 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 737   set_concurrency(active_tasks);
 738 
 739   _concurrent = concurrent;
 740   // We propagate this to all tasks, not just the active ones.
 741   for (uint i = 0; i < _max_worker_id; ++i)
 742     _tasks[i]->set_concurrent(concurrent);
 743 
 744   if (concurrent) {
 745     set_concurrent_marking_in_progress();
 746   } else {
 747     // We currently assume that the concurrent flag has been set to
 748     // false before we start remark. At this point we should also be
 749     // in a STW phase.
 750     assert(!concurrent_marking_in_progress(), "invariant");
 751     assert(out_of_regions(),
 752            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 753            p2i(_finger), p2i(_heap_end));
 754   }
 755 }
 756 
 757 void ConcurrentMark::set_non_marking_state() {
 758   // We set the global marking state to some default values when we're
 759   // not doing marking.
 760   reset_marking_state();
 761   _active_tasks = 0;
 762   clear_concurrent_marking_in_progress();
 763 }
 764 
 765 ConcurrentMark::~ConcurrentMark() {
 766   // The ConcurrentMark instance is never freed.
 767   ShouldNotReachHere();
 768 }
 769 
 770 void ConcurrentMark::clearNextBitmap() {
 771   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 772 
 773   // Make sure that the concurrent mark thread looks to still be in
 774   // the current cycle.
 775   guarantee(cmThread()->during_cycle(), "invariant");
 776 
 777   // We are finishing up the current cycle by clearing the next
 778   // marking bitmap and getting it ready for the next cycle. During
 779   // this time no other cycle can start. So, let's make sure that this
 780   // is the case.
 781   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 782 
 783   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 784   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 785   _parallel_workers->run_task(&task);
 786 
 787   // Clear the liveness counting data. If the marking has been aborted, the abort()
 788   // call already did that.
 789   if (cl.complete()) {
 790     clear_all_count_data();
 791   }
 792 
 793   // Repeat the asserts from above.
 794   guarantee(cmThread()->during_cycle(), "invariant");
 795   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 796 }
 797 
 798 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 799   CMBitMap* _bitmap;
 800   bool _error;
 801  public:
 802   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 803   }
 804 
 805   virtual bool doHeapRegion(HeapRegion* r) {
 806     // This closure can be called concurrently to the mutator, so we must make sure
 807     // that the result of the getNextMarkedWordAddress() call is compared to the
 808     // value passed to it as limit to detect any found bits.
 809     // end never changes in G1.
 810     HeapWord* end = r->end();
 811     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 812   }
 813 };
 814 
 815 bool ConcurrentMark::nextMarkBitmapIsClear() {
 816   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 817   _g1h->heap_region_iterate(&cl);
 818   return cl.complete();
 819 }
 820 
 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 822 public:
 823   bool doHeapRegion(HeapRegion* r) {
 824     r->note_start_of_marking();
 825     return false;
 826   }
 827 };
 828 
 829 void ConcurrentMark::checkpointRootsInitialPre() {
 830   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 831   G1CollectorPolicy* g1p = g1h->g1_policy();
 832 
 833   _has_aborted = false;
 834 
 835   // Initialize marking structures. This has to be done in a STW phase.
 836   reset();
 837 
 838   // For each region note start of marking.
 839   NoteStartOfMarkHRClosure startcl;
 840   g1h->heap_region_iterate(&startcl);
 841 }
 842 
 843 
 844 void ConcurrentMark::checkpointRootsInitialPost() {
 845   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 846 
 847   // If we force an overflow during remark, the remark operation will
 848   // actually abort and we'll restart concurrent marking. If we always
 849   // force an overflow during remark we'll never actually complete the
 850   // marking phase. So, we initialize this here, at the start of the
 851   // cycle, so that at the remaining overflow number will decrease at
 852   // every remark and we'll eventually not need to cause one.
 853   force_overflow_stw()->init();
 854 
 855   // Start Concurrent Marking weak-reference discovery.
 856   ReferenceProcessor* rp = g1h->ref_processor_cm();
 857   // enable ("weak") refs discovery
 858   rp->enable_discovery();
 859   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 860 
 861   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 862   // This is the start of  the marking cycle, we're expected all
 863   // threads to have SATB queues with active set to false.
 864   satb_mq_set.set_active_all_threads(true, /* new active value */
 865                                      false /* expected_active */);
 866 
 867   _root_regions.prepare_for_scan();
 868 
 869   // update_g1_committed() will be called at the end of an evac pause
 870   // when marking is on. So, it's also called at the end of the
 871   // initial-mark pause to update the heap end, if the heap expands
 872   // during it. No need to call it here.
 873 }
 874 
 875 /*
 876  * Notice that in the next two methods, we actually leave the STS
 877  * during the barrier sync and join it immediately afterwards. If we
 878  * do not do this, the following deadlock can occur: one thread could
 879  * be in the barrier sync code, waiting for the other thread to also
 880  * sync up, whereas another one could be trying to yield, while also
 881  * waiting for the other threads to sync up too.
 882  *
 883  * Note, however, that this code is also used during remark and in
 884  * this case we should not attempt to leave / enter the STS, otherwise
 885  * we'll either hit an assert (debug / fastdebug) or deadlock
 886  * (product). So we should only leave / enter the STS if we are
 887  * operating concurrently.
 888  *
 889  * Because the thread that does the sync barrier has left the STS, it
 890  * is possible to be suspended for a Full GC or an evacuation pause
 891  * could occur. This is actually safe, since the entering the sync
 892  * barrier is one of the last things do_marking_step() does, and it
 893  * doesn't manipulate any data structures afterwards.
 894  */
 895 
 896 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 897   bool barrier_aborted;
 898   {
 899     SuspendibleThreadSetLeaver sts_leave(concurrent());
 900     barrier_aborted = !_first_overflow_barrier_sync.enter();
 901   }
 902 
 903   // at this point everyone should have synced up and not be doing any
 904   // more work
 905 
 906   if (barrier_aborted) {
 907     // If the barrier aborted we ignore the overflow condition and
 908     // just abort the whole marking phase as quickly as possible.
 909     return;
 910   }
 911 
 912   // If we're executing the concurrent phase of marking, reset the marking
 913   // state; otherwise the marking state is reset after reference processing,
 914   // during the remark pause.
 915   // If we reset here as a result of an overflow during the remark we will
 916   // see assertion failures from any subsequent set_concurrency_and_phase()
 917   // calls.
 918   if (concurrent()) {
 919     // let the task associated with with worker 0 do this
 920     if (worker_id == 0) {
 921       // task 0 is responsible for clearing the global data structures
 922       // We should be here because of an overflow. During STW we should
 923       // not clear the overflow flag since we rely on it being true when
 924       // we exit this method to abort the pause and restart concurrent
 925       // marking.
 926       reset_marking_state(true /* clear_overflow */);
 927       force_overflow()->update();
 928 
 929       if (G1Log::fine()) {
 930         gclog_or_tty->gclog_stamp();
 931         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 932       }
 933     }
 934   }
 935 
 936   // after this, each task should reset its own data structures then
 937   // then go into the second barrier
 938 }
 939 
 940 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 941   SuspendibleThreadSetLeaver sts_leave(concurrent());
 942   _second_overflow_barrier_sync.enter();
 943 
 944   // at this point everything should be re-initialized and ready to go
 945 }
 946 
 947 #ifndef PRODUCT
 948 void ForceOverflowSettings::init() {
 949   _num_remaining = G1ConcMarkForceOverflow;
 950   _force = false;
 951   update();
 952 }
 953 
 954 void ForceOverflowSettings::update() {
 955   if (_num_remaining > 0) {
 956     _num_remaining -= 1;
 957     _force = true;
 958   } else {
 959     _force = false;
 960   }
 961 }
 962 
 963 bool ForceOverflowSettings::should_force() {
 964   if (_force) {
 965     _force = false;
 966     return true;
 967   } else {
 968     return false;
 969   }
 970 }
 971 #endif // !PRODUCT
 972 
 973 class CMConcurrentMarkingTask: public AbstractGangTask {
 974 private:
 975   ConcurrentMark*       _cm;
 976   ConcurrentMarkThread* _cmt;
 977 
 978 public:
 979   void work(uint worker_id) {
 980     assert(Thread::current()->is_ConcurrentGC_thread(),
 981            "this should only be done by a conc GC thread");
 982     ResourceMark rm;
 983 
 984     double start_vtime = os::elapsedVTime();
 985 
 986     {
 987       SuspendibleThreadSetJoiner sts_join;
 988 
 989       assert(worker_id < _cm->active_tasks(), "invariant");
 990       CMTask* the_task = _cm->task(worker_id);
 991       the_task->record_start_time();
 992       if (!_cm->has_aborted()) {
 993         do {
 994           double start_vtime_sec = os::elapsedVTime();
 995           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 996 
 997           the_task->do_marking_step(mark_step_duration_ms,
 998                                     true  /* do_termination */,
 999                                     false /* is_serial*/);
1000 
1001           double end_vtime_sec = os::elapsedVTime();
1002           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1003           _cm->clear_has_overflown();
1004 
1005           _cm->do_yield_check(worker_id);
1006 
1007           jlong sleep_time_ms;
1008           if (!_cm->has_aborted() && the_task->has_aborted()) {
1009             sleep_time_ms =
1010               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1011             {
1012               SuspendibleThreadSetLeaver sts_leave;
1013               os::sleep(Thread::current(), sleep_time_ms, false);
1014             }
1015           }
1016         } while (!_cm->has_aborted() && the_task->has_aborted());
1017       }
1018       the_task->record_end_time();
1019       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1020     }
1021 
1022     double end_vtime = os::elapsedVTime();
1023     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1024   }
1025 
1026   CMConcurrentMarkingTask(ConcurrentMark* cm,
1027                           ConcurrentMarkThread* cmt) :
1028       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1029 
1030   ~CMConcurrentMarkingTask() { }
1031 };
1032 
1033 // Calculates the number of active workers for a concurrent
1034 // phase.
1035 uint ConcurrentMark::calc_parallel_marking_threads() {
1036   uint n_conc_workers = 0;
1037   if (!UseDynamicNumberOfGCThreads ||
1038       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1039        !ForceDynamicNumberOfGCThreads)) {
1040     n_conc_workers = max_parallel_marking_threads();
1041   } else {
1042     n_conc_workers =
1043       AdaptiveSizePolicy::calc_default_active_workers(
1044                                    max_parallel_marking_threads(),
1045                                    1, /* Minimum workers */
1046                                    parallel_marking_threads(),
1047                                    Threads::number_of_non_daemon_threads());
1048     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1049     // that scaling has already gone into "_max_parallel_marking_threads".
1050   }
1051   assert(n_conc_workers > 0, "Always need at least 1");
1052   return n_conc_workers;
1053 }
1054 
1055 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1056   // Currently, only survivors can be root regions.
1057   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1058   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1059 
1060   const uintx interval = PrefetchScanIntervalInBytes;
1061   HeapWord* curr = hr->bottom();
1062   const HeapWord* end = hr->top();
1063   while (curr < end) {
1064     Prefetch::read(curr, interval);
1065     oop obj = oop(curr);
1066     int size = obj->oop_iterate_size(&cl);
1067     assert(size == obj->size(), "sanity");
1068     curr += size;
1069   }
1070 }
1071 
1072 class CMRootRegionScanTask : public AbstractGangTask {
1073 private:
1074   ConcurrentMark* _cm;
1075 
1076 public:
1077   CMRootRegionScanTask(ConcurrentMark* cm) :
1078     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1079 
1080   void work(uint worker_id) {
1081     assert(Thread::current()->is_ConcurrentGC_thread(),
1082            "this should only be done by a conc GC thread");
1083 
1084     CMRootRegions* root_regions = _cm->root_regions();
1085     HeapRegion* hr = root_regions->claim_next();
1086     while (hr != NULL) {
1087       _cm->scanRootRegion(hr, worker_id);
1088       hr = root_regions->claim_next();
1089     }
1090   }
1091 };
1092 
1093 void ConcurrentMark::scanRootRegions() {
1094   double scan_start = os::elapsedTime();
1095 
1096   // Start of concurrent marking.
1097   ClassLoaderDataGraph::clear_claimed_marks();
1098 
1099   // scan_in_progress() will have been set to true only if there was
1100   // at least one root region to scan. So, if it's false, we
1101   // should not attempt to do any further work.
1102   if (root_regions()->scan_in_progress()) {
1103     if (G1Log::fine()) {
1104       gclog_or_tty->gclog_stamp();
1105       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1106     }
1107 
1108     _parallel_marking_threads = calc_parallel_marking_threads();
1109     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1110            "Maximum number of marking threads exceeded");
1111     uint active_workers = MAX2(1U, parallel_marking_threads());
1112 
1113     CMRootRegionScanTask task(this);
1114     _parallel_workers->set_active_workers(active_workers);
1115     _parallel_workers->run_task(&task);
1116 
1117     if (G1Log::fine()) {
1118       gclog_or_tty->gclog_stamp();
1119       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1120     }
1121 
1122     // It's possible that has_aborted() is true here without actually
1123     // aborting the survivor scan earlier. This is OK as it's
1124     // mainly used for sanity checking.
1125     root_regions()->scan_finished();
1126   }
1127 }
1128 
1129 void ConcurrentMark::markFromRoots() {
1130   // we might be tempted to assert that:
1131   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1132   //        "inconsistent argument?");
1133   // However that wouldn't be right, because it's possible that
1134   // a safepoint is indeed in progress as a younger generation
1135   // stop-the-world GC happens even as we mark in this generation.
1136 
1137   _restart_for_overflow = false;
1138   force_overflow_conc()->init();
1139 
1140   // _g1h has _n_par_threads
1141   _parallel_marking_threads = calc_parallel_marking_threads();
1142   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1143     "Maximum number of marking threads exceeded");
1144 
1145   uint active_workers = MAX2(1U, parallel_marking_threads());
1146   assert(active_workers > 0, "Should have been set");
1147 
1148   // Parallel task terminator is set in "set_concurrency_and_phase()"
1149   set_concurrency_and_phase(active_workers, true /* concurrent */);
1150 
1151   CMConcurrentMarkingTask markingTask(this, cmThread());
1152   _parallel_workers->set_active_workers(active_workers);
1153   _parallel_workers->run_task(&markingTask);
1154   print_stats();
1155 }
1156 
1157 // Helper class to get rid of some boilerplate code.
1158 class G1CMTraceTime : public StackObj {
1159   GCTraceTimeImpl _gc_trace_time;
1160   static bool doit_and_prepend(bool doit) {
1161     if (doit) {
1162       gclog_or_tty->put(' ');
1163     }
1164     return doit;
1165   }
1166 
1167  public:
1168   G1CMTraceTime(const char* title, bool doit)
1169     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1170   }
1171 };
1172 
1173 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1174   // world is stopped at this checkpoint
1175   assert(SafepointSynchronize::is_at_safepoint(),
1176          "world should be stopped");
1177 
1178   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1179 
1180   // If a full collection has happened, we shouldn't do this.
1181   if (has_aborted()) {
1182     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1183     return;
1184   }
1185 
1186   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1187 
1188   if (VerifyDuringGC) {
1189     HandleMark hm;  // handle scope
1190     g1h->prepare_for_verify();
1191     Universe::verify(VerifyOption_G1UsePrevMarking,
1192                      " VerifyDuringGC:(before)");
1193   }
1194   g1h->check_bitmaps("Remark Start");
1195 
1196   G1CollectorPolicy* g1p = g1h->g1_policy();
1197   g1p->record_concurrent_mark_remark_start();
1198 
1199   double start = os::elapsedTime();
1200 
1201   checkpointRootsFinalWork();
1202 
1203   double mark_work_end = os::elapsedTime();
1204 
1205   weakRefsWork(clear_all_soft_refs);
1206 
1207   if (has_overflown()) {
1208     // Oops.  We overflowed.  Restart concurrent marking.
1209     _restart_for_overflow = true;
1210     if (G1TraceMarkStackOverflow) {
1211       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1212     }
1213 
1214     // Verify the heap w.r.t. the previous marking bitmap.
1215     if (VerifyDuringGC) {
1216       HandleMark hm;  // handle scope
1217       g1h->prepare_for_verify();
1218       Universe::verify(VerifyOption_G1UsePrevMarking,
1219                        " VerifyDuringGC:(overflow)");
1220     }
1221 
1222     // Clear the marking state because we will be restarting
1223     // marking due to overflowing the global mark stack.
1224     reset_marking_state();
1225   } else {
1226     {
1227       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1228 
1229       // Aggregate the per-task counting data that we have accumulated
1230       // while marking.
1231       aggregate_count_data();
1232     }
1233 
1234     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1235     // We're done with marking.
1236     // This is the end of  the marking cycle, we're expected all
1237     // threads to have SATB queues with active set to true.
1238     satb_mq_set.set_active_all_threads(false, /* new active value */
1239                                        true /* expected_active */);
1240 
1241     if (VerifyDuringGC) {
1242       HandleMark hm;  // handle scope
1243       g1h->prepare_for_verify();
1244       Universe::verify(VerifyOption_G1UseNextMarking,
1245                        " VerifyDuringGC:(after)");
1246     }
1247     g1h->check_bitmaps("Remark End");
1248     assert(!restart_for_overflow(), "sanity");
1249     // Completely reset the marking state since marking completed
1250     set_non_marking_state();
1251   }
1252 
1253   // Expand the marking stack, if we have to and if we can.
1254   if (_markStack.should_expand()) {
1255     _markStack.expand();
1256   }
1257 
1258   // Statistics
1259   double now = os::elapsedTime();
1260   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1261   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1262   _remark_times.add((now - start) * 1000.0);
1263 
1264   g1p->record_concurrent_mark_remark_end();
1265 
1266   G1CMIsAliveClosure is_alive(g1h);
1267   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1268 }
1269 
1270 // Base class of the closures that finalize and verify the
1271 // liveness counting data.
1272 class CMCountDataClosureBase: public HeapRegionClosure {
1273 protected:
1274   G1CollectedHeap* _g1h;
1275   ConcurrentMark* _cm;
1276   CardTableModRefBS* _ct_bs;
1277 
1278   BitMap* _region_bm;
1279   BitMap* _card_bm;
1280 
1281   // Takes a region that's not empty (i.e., it has at least one
1282   // live object in it and sets its corresponding bit on the region
1283   // bitmap to 1.
1284   void set_bit_for_region(HeapRegion* hr) {
1285     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1286     _region_bm->par_at_put(index, true);
1287   }
1288 
1289 public:
1290   CMCountDataClosureBase(G1CollectedHeap* g1h,
1291                          BitMap* region_bm, BitMap* card_bm):
1292     _g1h(g1h), _cm(g1h->concurrent_mark()),
1293     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1294     _region_bm(region_bm), _card_bm(card_bm) { }
1295 };
1296 
1297 // Closure that calculates the # live objects per region. Used
1298 // for verification purposes during the cleanup pause.
1299 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1300   CMBitMapRO* _bm;
1301   size_t _region_marked_bytes;
1302 
1303 public:
1304   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1305                          BitMap* region_bm, BitMap* card_bm) :
1306     CMCountDataClosureBase(g1h, region_bm, card_bm),
1307     _bm(bm), _region_marked_bytes(0) { }
1308 
1309   bool doHeapRegion(HeapRegion* hr) {
1310     HeapWord* ntams = hr->next_top_at_mark_start();
1311     HeapWord* start = hr->bottom();
1312 
1313     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1314            "Preconditions not met - "
1315            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1316            p2i(start), p2i(ntams), p2i(hr->end()));
1317 
1318     // Find the first marked object at or after "start".
1319     start = _bm->getNextMarkedWordAddress(start, ntams);
1320 
1321     size_t marked_bytes = 0;
1322 
1323     while (start < ntams) {
1324       oop obj = oop(start);
1325       int obj_sz = obj->size();
1326       HeapWord* obj_end = start + obj_sz;
1327 
1328       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1329       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1330 
1331       // Note: if we're looking at the last region in heap - obj_end
1332       // could be actually just beyond the end of the heap; end_idx
1333       // will then correspond to a (non-existent) card that is also
1334       // just beyond the heap.
1335       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1336         // end of object is not card aligned - increment to cover
1337         // all the cards spanned by the object
1338         end_idx += 1;
1339       }
1340 
1341       // Set the bits in the card BM for the cards spanned by this object.
1342       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1343 
1344       // Add the size of this object to the number of marked bytes.
1345       marked_bytes += (size_t)obj_sz * HeapWordSize;
1346 
1347       // This will happen if we are handling a humongous object that spans
1348       // several heap regions.
1349       if (obj_end > hr->end()) {
1350         break;
1351       }
1352       // Find the next marked object after this one.
1353       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1354     }
1355 
1356     // Mark the allocated-since-marking portion...
1357     HeapWord* top = hr->top();
1358     if (ntams < top) {
1359       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1360       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1361 
1362       // Note: if we're looking at the last region in heap - top
1363       // could be actually just beyond the end of the heap; end_idx
1364       // will then correspond to a (non-existent) card that is also
1365       // just beyond the heap.
1366       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1367         // end of object is not card aligned - increment to cover
1368         // all the cards spanned by the object
1369         end_idx += 1;
1370       }
1371       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1372 
1373       // This definitely means the region has live objects.
1374       set_bit_for_region(hr);
1375     }
1376 
1377     // Update the live region bitmap.
1378     if (marked_bytes > 0) {
1379       set_bit_for_region(hr);
1380     }
1381 
1382     // Set the marked bytes for the current region so that
1383     // it can be queried by a calling verification routine
1384     _region_marked_bytes = marked_bytes;
1385 
1386     return false;
1387   }
1388 
1389   size_t region_marked_bytes() const { return _region_marked_bytes; }
1390 };
1391 
1392 // Heap region closure used for verifying the counting data
1393 // that was accumulated concurrently and aggregated during
1394 // the remark pause. This closure is applied to the heap
1395 // regions during the STW cleanup pause.
1396 
1397 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1398   G1CollectedHeap* _g1h;
1399   ConcurrentMark* _cm;
1400   CalcLiveObjectsClosure _calc_cl;
1401   BitMap* _region_bm;   // Region BM to be verified
1402   BitMap* _card_bm;     // Card BM to be verified
1403 
1404   BitMap* _exp_region_bm; // Expected Region BM values
1405   BitMap* _exp_card_bm;   // Expected card BM values
1406 
1407   int _failures;
1408 
1409 public:
1410   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1411                                 BitMap* region_bm,
1412                                 BitMap* card_bm,
1413                                 BitMap* exp_region_bm,
1414                                 BitMap* exp_card_bm) :
1415     _g1h(g1h), _cm(g1h->concurrent_mark()),
1416     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1417     _region_bm(region_bm), _card_bm(card_bm),
1418     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1419     _failures(0) { }
1420 
1421   int failures() const { return _failures; }
1422 
1423   bool doHeapRegion(HeapRegion* hr) {
1424     int failures = 0;
1425 
1426     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1427     // this region and set the corresponding bits in the expected region
1428     // and card bitmaps.
1429     bool res = _calc_cl.doHeapRegion(hr);
1430     assert(res == false, "should be continuing");
1431 
1432     // Verify the marked bytes for this region.
1433     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1434     size_t act_marked_bytes = hr->next_marked_bytes();
1435 
1436     // We're not OK if expected marked bytes > actual marked bytes. It means
1437     // we have missed accounting some objects during the actual marking.
1438     // For start_humongous regions, the size of the whole object will be
1439     // in exp_marked_bytes, so this check does not apply in this case.
1440     if (exp_marked_bytes > act_marked_bytes && !hr->is_starts_humongous()) {
1441       failures += 1;
1442     }
1443 
1444     // Verify the bit, for this region, in the actual and expected
1445     // (which was just calculated) region bit maps.
1446     // We're not OK if the bit in the calculated expected region
1447     // bitmap is set and the bit in the actual region bitmap is not.
1448     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1449 
1450     bool expected = _exp_region_bm->at(index);
1451     bool actual = _region_bm->at(index);
1452     if (expected && !actual) {
1453       failures += 1;
1454     }
1455 
1456     // Verify that the card bit maps for the cards spanned by the current
1457     // region match. We have an error if we have a set bit in the expected
1458     // bit map and the corresponding bit in the actual bitmap is not set.
1459 
1460     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1461     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1462 
1463     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1464       expected = _exp_card_bm->at(i);
1465       actual = _card_bm->at(i);
1466 
1467       if (expected && !actual) {
1468         failures += 1;
1469       }
1470     }
1471 
1472     _failures += failures;
1473 
1474     // We could stop iteration over the heap when we
1475     // find the first violating region by returning true.
1476     return false;
1477   }
1478 };
1479 
1480 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1481 protected:
1482   G1CollectedHeap* _g1h;
1483   ConcurrentMark* _cm;
1484   BitMap* _actual_region_bm;
1485   BitMap* _actual_card_bm;
1486 
1487   uint    _n_workers;
1488 
1489   BitMap* _expected_region_bm;
1490   BitMap* _expected_card_bm;
1491 
1492   int  _failures;
1493 
1494   HeapRegionClaimer _hrclaimer;
1495 
1496 public:
1497   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1498                             BitMap* region_bm, BitMap* card_bm,
1499                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1500     : AbstractGangTask("G1 verify final counting"),
1501       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1502       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1503       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1504       _failures(0),
1505       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1506     assert(VerifyDuringGC, "don't call this otherwise");
1507     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1508     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1509   }
1510 
1511   void work(uint worker_id) {
1512     assert(worker_id < _n_workers, "invariant");
1513 
1514     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1515                                             _actual_region_bm, _actual_card_bm,
1516                                             _expected_region_bm,
1517                                             _expected_card_bm);
1518 
1519     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1520 
1521     Atomic::add(verify_cl.failures(), &_failures);
1522   }
1523 
1524   int failures() const { return _failures; }
1525 };
1526 
1527 // Closure that finalizes the liveness counting data.
1528 // Used during the cleanup pause.
1529 // Sets the bits corresponding to the interval [NTAMS, top]
1530 // (which contains the implicitly live objects) in the
1531 // card liveness bitmap. Also sets the bit for each region,
1532 // containing live data, in the region liveness bitmap.
1533 
1534 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1535  public:
1536   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1537                               BitMap* region_bm,
1538                               BitMap* card_bm) :
1539     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1540 
1541   bool doHeapRegion(HeapRegion* hr) {
1542     HeapWord* ntams = hr->next_top_at_mark_start();
1543     HeapWord* top   = hr->top();
1544 
1545     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1546 
1547     // Mark the allocated-since-marking portion...
1548     if (ntams < top) {
1549       // This definitely means the region has live objects.
1550       set_bit_for_region(hr);
1551 
1552       // Now set the bits in the card bitmap for [ntams, top)
1553       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1554       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1555 
1556       // Note: if we're looking at the last region in heap - top
1557       // could be actually just beyond the end of the heap; end_idx
1558       // will then correspond to a (non-existent) card that is also
1559       // just beyond the heap.
1560       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1561         // end of object is not card aligned - increment to cover
1562         // all the cards spanned by the object
1563         end_idx += 1;
1564       }
1565 
1566       assert(end_idx <= _card_bm->size(),
1567              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1568              end_idx, _card_bm->size());
1569       assert(start_idx < _card_bm->size(),
1570              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1571              start_idx, _card_bm->size());
1572 
1573       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1574     }
1575 
1576     // Set the bit for the region if it contains live data
1577     if (hr->next_marked_bytes() > 0) {
1578       set_bit_for_region(hr);
1579     }
1580 
1581     return false;
1582   }
1583 };
1584 
1585 class G1ParFinalCountTask: public AbstractGangTask {
1586 protected:
1587   G1CollectedHeap* _g1h;
1588   ConcurrentMark* _cm;
1589   BitMap* _actual_region_bm;
1590   BitMap* _actual_card_bm;
1591 
1592   uint    _n_workers;
1593   HeapRegionClaimer _hrclaimer;
1594 
1595 public:
1596   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1597     : AbstractGangTask("G1 final counting"),
1598       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1599       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1600       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1601   }
1602 
1603   void work(uint worker_id) {
1604     assert(worker_id < _n_workers, "invariant");
1605 
1606     FinalCountDataUpdateClosure final_update_cl(_g1h,
1607                                                 _actual_region_bm,
1608                                                 _actual_card_bm);
1609 
1610     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1611   }
1612 };
1613 
1614 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1615   G1CollectedHeap* _g1;
1616   size_t _freed_bytes;
1617   FreeRegionList* _local_cleanup_list;
1618   HeapRegionSetCount _old_regions_removed;
1619   HeapRegionSetCount _humongous_regions_removed;
1620   HRRSCleanupTask* _hrrs_cleanup_task;
1621 
1622 public:
1623   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1624                              FreeRegionList* local_cleanup_list,
1625                              HRRSCleanupTask* hrrs_cleanup_task) :
1626     _g1(g1),
1627     _freed_bytes(0),
1628     _local_cleanup_list(local_cleanup_list),
1629     _old_regions_removed(),
1630     _humongous_regions_removed(),
1631     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1632 
1633   size_t freed_bytes() { return _freed_bytes; }
1634   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1635   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1636 
1637   bool doHeapRegion(HeapRegion *hr) {
1638     if (hr->is_archive()) {
1639       return false;
1640     }
1641     // We use a claim value of zero here because all regions
1642     // were claimed with value 1 in the FinalCount task.
1643     _g1->reset_gc_time_stamps(hr);
1644     hr->note_end_of_marking();
1645 
1646     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1647       _freed_bytes += hr->used();
1648       hr->set_containing_set(NULL);
1649       if (hr->is_humongous()) {
1650         _humongous_regions_removed.increment(1u, hr->capacity());
1651         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1652       } else {
1653         _old_regions_removed.increment(1u, hr->capacity());
1654         _g1->free_region(hr, _local_cleanup_list, true);
1655       }
1656     } else {
1657       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1658     }
1659 
1660     return false;
1661   }
1662 };
1663 
1664 class G1ParNoteEndTask: public AbstractGangTask {
1665   friend class G1NoteEndOfConcMarkClosure;
1666 
1667 protected:
1668   G1CollectedHeap* _g1h;
1669   FreeRegionList* _cleanup_list;
1670   HeapRegionClaimer _hrclaimer;
1671 
1672 public:
1673   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1674       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1675   }
1676 
1677   void work(uint worker_id) {
1678     FreeRegionList local_cleanup_list("Local Cleanup List");
1679     HRRSCleanupTask hrrs_cleanup_task;
1680     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1681                                            &hrrs_cleanup_task);
1682     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1683     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1684 
1685     // Now update the lists
1686     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1687     {
1688       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1689       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1690 
1691       // If we iterate over the global cleanup list at the end of
1692       // cleanup to do this printing we will not guarantee to only
1693       // generate output for the newly-reclaimed regions (the list
1694       // might not be empty at the beginning of cleanup; we might
1695       // still be working on its previous contents). So we do the
1696       // printing here, before we append the new regions to the global
1697       // cleanup list.
1698 
1699       G1HRPrinter* hr_printer = _g1h->hr_printer();
1700       if (hr_printer->is_active()) {
1701         FreeRegionListIterator iter(&local_cleanup_list);
1702         while (iter.more_available()) {
1703           HeapRegion* hr = iter.get_next();
1704           hr_printer->cleanup(hr);
1705         }
1706       }
1707 
1708       _cleanup_list->add_ordered(&local_cleanup_list);
1709       assert(local_cleanup_list.is_empty(), "post-condition");
1710 
1711       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1712     }
1713   }
1714 };
1715 
1716 class G1ParScrubRemSetTask: public AbstractGangTask {
1717 protected:
1718   G1RemSet* _g1rs;
1719   BitMap* _region_bm;
1720   BitMap* _card_bm;
1721   HeapRegionClaimer _hrclaimer;
1722 
1723 public:
1724   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1725       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1726   }
1727 
1728   void work(uint worker_id) {
1729     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1730   }
1731 
1732 };
1733 
1734 void ConcurrentMark::cleanup() {
1735   // world is stopped at this checkpoint
1736   assert(SafepointSynchronize::is_at_safepoint(),
1737          "world should be stopped");
1738   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1739 
1740   // If a full collection has happened, we shouldn't do this.
1741   if (has_aborted()) {
1742     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1743     return;
1744   }
1745 
1746   g1h->verify_region_sets_optional();
1747 
1748   if (VerifyDuringGC) {
1749     HandleMark hm;  // handle scope
1750     g1h->prepare_for_verify();
1751     Universe::verify(VerifyOption_G1UsePrevMarking,
1752                      " VerifyDuringGC:(before)");
1753   }
1754   g1h->check_bitmaps("Cleanup Start");
1755 
1756   G1CollectorPolicy* g1p = g1h->g1_policy();
1757   g1p->record_concurrent_mark_cleanup_start();
1758 
1759   double start = os::elapsedTime();
1760 
1761   HeapRegionRemSet::reset_for_cleanup_tasks();
1762 
1763   // Do counting once more with the world stopped for good measure.
1764   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1765 
1766   g1h->workers()->run_task(&g1_par_count_task);
1767 
1768   if (VerifyDuringGC) {
1769     // Verify that the counting data accumulated during marking matches
1770     // that calculated by walking the marking bitmap.
1771 
1772     // Bitmaps to hold expected values
1773     BitMap expected_region_bm(_region_bm.size(), true);
1774     BitMap expected_card_bm(_card_bm.size(), true);
1775 
1776     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1777                                                  &_region_bm,
1778                                                  &_card_bm,
1779                                                  &expected_region_bm,
1780                                                  &expected_card_bm);
1781 
1782     g1h->workers()->run_task(&g1_par_verify_task);
1783 
1784     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1785   }
1786 
1787   size_t start_used_bytes = g1h->used();
1788   g1h->collector_state()->set_mark_in_progress(false);
1789 
1790   double count_end = os::elapsedTime();
1791   double this_final_counting_time = (count_end - start);
1792   _total_counting_time += this_final_counting_time;
1793 
1794   if (G1PrintRegionLivenessInfo) {
1795     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1796     _g1h->heap_region_iterate(&cl);
1797   }
1798 
1799   // Install newly created mark bitMap as "prev".
1800   swapMarkBitMaps();
1801 
1802   g1h->reset_gc_time_stamp();
1803 
1804   uint n_workers = _g1h->workers()->active_workers();
1805 
1806   // Note end of marking in all heap regions.
1807   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1808   g1h->workers()->run_task(&g1_par_note_end_task);
1809   g1h->check_gc_time_stamps();
1810 
1811   if (!cleanup_list_is_empty()) {
1812     // The cleanup list is not empty, so we'll have to process it
1813     // concurrently. Notify anyone else that might be wanting free
1814     // regions that there will be more free regions coming soon.
1815     g1h->set_free_regions_coming();
1816   }
1817 
1818   // call below, since it affects the metric by which we sort the heap
1819   // regions.
1820   if (G1ScrubRemSets) {
1821     double rs_scrub_start = os::elapsedTime();
1822     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1823     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1824 
1825     double rs_scrub_end = os::elapsedTime();
1826     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1827     _total_rs_scrub_time += this_rs_scrub_time;
1828   }
1829 
1830   // this will also free any regions totally full of garbage objects,
1831   // and sort the regions.
1832   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1833 
1834   // Statistics.
1835   double end = os::elapsedTime();
1836   _cleanup_times.add((end - start) * 1000.0);
1837 
1838   if (G1Log::fine()) {
1839     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1840   }
1841 
1842   // Clean up will have freed any regions completely full of garbage.
1843   // Update the soft reference policy with the new heap occupancy.
1844   Universe::update_heap_info_at_gc();
1845 
1846   if (VerifyDuringGC) {
1847     HandleMark hm;  // handle scope
1848     g1h->prepare_for_verify();
1849     Universe::verify(VerifyOption_G1UsePrevMarking,
1850                      " VerifyDuringGC:(after)");
1851   }
1852 
1853   g1h->check_bitmaps("Cleanup End");
1854 
1855   g1h->verify_region_sets_optional();
1856 
1857   // We need to make this be a "collection" so any collection pause that
1858   // races with it goes around and waits for completeCleanup to finish.
1859   g1h->increment_total_collections();
1860 
1861   // Clean out dead classes and update Metaspace sizes.
1862   if (ClassUnloadingWithConcurrentMark) {
1863     ClassLoaderDataGraph::purge();
1864   }
1865   MetaspaceGC::compute_new_size();
1866 
1867   // We reclaimed old regions so we should calculate the sizes to make
1868   // sure we update the old gen/space data.
1869   g1h->g1mm()->update_sizes();
1870   g1h->allocation_context_stats().update_after_mark();
1871 
1872   g1h->trace_heap_after_concurrent_cycle();
1873 }
1874 
1875 void ConcurrentMark::completeCleanup() {
1876   if (has_aborted()) return;
1877 
1878   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1879 
1880   _cleanup_list.verify_optional();
1881   FreeRegionList tmp_free_list("Tmp Free List");
1882 
1883   if (G1ConcRegionFreeingVerbose) {
1884     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1885                            "cleanup list has %u entries",
1886                            _cleanup_list.length());
1887   }
1888 
1889   // No one else should be accessing the _cleanup_list at this point,
1890   // so it is not necessary to take any locks
1891   while (!_cleanup_list.is_empty()) {
1892     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1893     assert(hr != NULL, "Got NULL from a non-empty list");
1894     hr->par_clear();
1895     tmp_free_list.add_ordered(hr);
1896 
1897     // Instead of adding one region at a time to the secondary_free_list,
1898     // we accumulate them in the local list and move them a few at a
1899     // time. This also cuts down on the number of notify_all() calls
1900     // we do during this process. We'll also append the local list when
1901     // _cleanup_list is empty (which means we just removed the last
1902     // region from the _cleanup_list).
1903     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1904         _cleanup_list.is_empty()) {
1905       if (G1ConcRegionFreeingVerbose) {
1906         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1907                                "appending %u entries to the secondary_free_list, "
1908                                "cleanup list still has %u entries",
1909                                tmp_free_list.length(),
1910                                _cleanup_list.length());
1911       }
1912 
1913       {
1914         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1915         g1h->secondary_free_list_add(&tmp_free_list);
1916         SecondaryFreeList_lock->notify_all();
1917       }
1918 #ifndef PRODUCT
1919       if (G1StressConcRegionFreeing) {
1920         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1921           os::sleep(Thread::current(), (jlong) 1, false);
1922         }
1923       }
1924 #endif
1925     }
1926   }
1927   assert(tmp_free_list.is_empty(), "post-condition");
1928 }
1929 
1930 // Supporting Object and Oop closures for reference discovery
1931 // and processing in during marking
1932 
1933 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1934   HeapWord* addr = (HeapWord*)obj;
1935   return addr != NULL &&
1936          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1937 }
1938 
1939 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1940 // Uses the CMTask associated with a worker thread (for serial reference
1941 // processing the CMTask for worker 0 is used) to preserve (mark) and
1942 // trace referent objects.
1943 //
1944 // Using the CMTask and embedded local queues avoids having the worker
1945 // threads operating on the global mark stack. This reduces the risk
1946 // of overflowing the stack - which we would rather avoid at this late
1947 // state. Also using the tasks' local queues removes the potential
1948 // of the workers interfering with each other that could occur if
1949 // operating on the global stack.
1950 
1951 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1952   ConcurrentMark* _cm;
1953   CMTask*         _task;
1954   int             _ref_counter_limit;
1955   int             _ref_counter;
1956   bool            _is_serial;
1957  public:
1958   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1959     _cm(cm), _task(task), _is_serial(is_serial),
1960     _ref_counter_limit(G1RefProcDrainInterval) {
1961     assert(_ref_counter_limit > 0, "sanity");
1962     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1963     _ref_counter = _ref_counter_limit;
1964   }
1965 
1966   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1967   virtual void do_oop(      oop* p) { do_oop_work(p); }
1968 
1969   template <class T> void do_oop_work(T* p) {
1970     if (!_cm->has_overflown()) {
1971       oop obj = oopDesc::load_decode_heap_oop(p);
1972       _task->deal_with_reference(obj);
1973       _ref_counter--;
1974 
1975       if (_ref_counter == 0) {
1976         // We have dealt with _ref_counter_limit references, pushing them
1977         // and objects reachable from them on to the local stack (and
1978         // possibly the global stack). Call CMTask::do_marking_step() to
1979         // process these entries.
1980         //
1981         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1982         // there's nothing more to do (i.e. we're done with the entries that
1983         // were pushed as a result of the CMTask::deal_with_reference() calls
1984         // above) or we overflow.
1985         //
1986         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1987         // flag while there may still be some work to do. (See the comment at
1988         // the beginning of CMTask::do_marking_step() for those conditions -
1989         // one of which is reaching the specified time target.) It is only
1990         // when CMTask::do_marking_step() returns without setting the
1991         // has_aborted() flag that the marking step has completed.
1992         do {
1993           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1994           _task->do_marking_step(mark_step_duration_ms,
1995                                  false      /* do_termination */,
1996                                  _is_serial);
1997         } while (_task->has_aborted() && !_cm->has_overflown());
1998         _ref_counter = _ref_counter_limit;
1999       }
2000     }
2001   }
2002 };
2003 
2004 // 'Drain' oop closure used by both serial and parallel reference processing.
2005 // Uses the CMTask associated with a given worker thread (for serial
2006 // reference processing the CMtask for worker 0 is used). Calls the
2007 // do_marking_step routine, with an unbelievably large timeout value,
2008 // to drain the marking data structures of the remaining entries
2009 // added by the 'keep alive' oop closure above.
2010 
2011 class G1CMDrainMarkingStackClosure: public VoidClosure {
2012   ConcurrentMark* _cm;
2013   CMTask*         _task;
2014   bool            _is_serial;
2015  public:
2016   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2017     _cm(cm), _task(task), _is_serial(is_serial) {
2018     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2019   }
2020 
2021   void do_void() {
2022     do {
2023       // We call CMTask::do_marking_step() to completely drain the local
2024       // and global marking stacks of entries pushed by the 'keep alive'
2025       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2026       //
2027       // CMTask::do_marking_step() is called in a loop, which we'll exit
2028       // if there's nothing more to do (i.e. we've completely drained the
2029       // entries that were pushed as a a result of applying the 'keep alive'
2030       // closure to the entries on the discovered ref lists) or we overflow
2031       // the global marking stack.
2032       //
2033       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2034       // flag while there may still be some work to do. (See the comment at
2035       // the beginning of CMTask::do_marking_step() for those conditions -
2036       // one of which is reaching the specified time target.) It is only
2037       // when CMTask::do_marking_step() returns without setting the
2038       // has_aborted() flag that the marking step has completed.
2039 
2040       _task->do_marking_step(1000000000.0 /* something very large */,
2041                              true         /* do_termination */,
2042                              _is_serial);
2043     } while (_task->has_aborted() && !_cm->has_overflown());
2044   }
2045 };
2046 
2047 // Implementation of AbstractRefProcTaskExecutor for parallel
2048 // reference processing at the end of G1 concurrent marking
2049 
2050 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2051 private:
2052   G1CollectedHeap* _g1h;
2053   ConcurrentMark*  _cm;
2054   WorkGang*        _workers;
2055   uint             _active_workers;
2056 
2057 public:
2058   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2059                           ConcurrentMark* cm,
2060                           WorkGang* workers,
2061                           uint n_workers) :
2062     _g1h(g1h), _cm(cm),
2063     _workers(workers), _active_workers(n_workers) { }
2064 
2065   // Executes the given task using concurrent marking worker threads.
2066   virtual void execute(ProcessTask& task);
2067   virtual void execute(EnqueueTask& task);
2068 };
2069 
2070 class G1CMRefProcTaskProxy: public AbstractGangTask {
2071   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2072   ProcessTask&     _proc_task;
2073   G1CollectedHeap* _g1h;
2074   ConcurrentMark*  _cm;
2075 
2076 public:
2077   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2078                      G1CollectedHeap* g1h,
2079                      ConcurrentMark* cm) :
2080     AbstractGangTask("Process reference objects in parallel"),
2081     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2082     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2083     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2084   }
2085 
2086   virtual void work(uint worker_id) {
2087     ResourceMark rm;
2088     HandleMark hm;
2089     CMTask* task = _cm->task(worker_id);
2090     G1CMIsAliveClosure g1_is_alive(_g1h);
2091     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2092     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2093 
2094     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2095   }
2096 };
2097 
2098 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2099   assert(_workers != NULL, "Need parallel worker threads.");
2100   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2101 
2102   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2103 
2104   // We need to reset the concurrency level before each
2105   // proxy task execution, so that the termination protocol
2106   // and overflow handling in CMTask::do_marking_step() knows
2107   // how many workers to wait for.
2108   _cm->set_concurrency(_active_workers);
2109   _workers->run_task(&proc_task_proxy);
2110 }
2111 
2112 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2113   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2114   EnqueueTask& _enq_task;
2115 
2116 public:
2117   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2118     AbstractGangTask("Enqueue reference objects in parallel"),
2119     _enq_task(enq_task) { }
2120 
2121   virtual void work(uint worker_id) {
2122     _enq_task.work(worker_id);
2123   }
2124 };
2125 
2126 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2127   assert(_workers != NULL, "Need parallel worker threads.");
2128   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2129 
2130   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2131 
2132   // Not strictly necessary but...
2133   //
2134   // We need to reset the concurrency level before each
2135   // proxy task execution, so that the termination protocol
2136   // and overflow handling in CMTask::do_marking_step() knows
2137   // how many workers to wait for.
2138   _cm->set_concurrency(_active_workers);
2139   _workers->run_task(&enq_task_proxy);
2140 }
2141 
2142 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2143   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2144 }
2145 
2146 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2147   if (has_overflown()) {
2148     // Skip processing the discovered references if we have
2149     // overflown the global marking stack. Reference objects
2150     // only get discovered once so it is OK to not
2151     // de-populate the discovered reference lists. We could have,
2152     // but the only benefit would be that, when marking restarts,
2153     // less reference objects are discovered.
2154     return;
2155   }
2156 
2157   ResourceMark rm;
2158   HandleMark   hm;
2159 
2160   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2161 
2162   // Is alive closure.
2163   G1CMIsAliveClosure g1_is_alive(g1h);
2164 
2165   // Inner scope to exclude the cleaning of the string and symbol
2166   // tables from the displayed time.
2167   {
2168     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2169 
2170     ReferenceProcessor* rp = g1h->ref_processor_cm();
2171 
2172     // See the comment in G1CollectedHeap::ref_processing_init()
2173     // about how reference processing currently works in G1.
2174 
2175     // Set the soft reference policy
2176     rp->setup_policy(clear_all_soft_refs);
2177     assert(_markStack.isEmpty(), "mark stack should be empty");
2178 
2179     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2180     // in serial reference processing. Note these closures are also
2181     // used for serially processing (by the the current thread) the
2182     // JNI references during parallel reference processing.
2183     //
2184     // These closures do not need to synchronize with the worker
2185     // threads involved in parallel reference processing as these
2186     // instances are executed serially by the current thread (e.g.
2187     // reference processing is not multi-threaded and is thus
2188     // performed by the current thread instead of a gang worker).
2189     //
2190     // The gang tasks involved in parallel reference processing create
2191     // their own instances of these closures, which do their own
2192     // synchronization among themselves.
2193     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2194     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2195 
2196     // We need at least one active thread. If reference processing
2197     // is not multi-threaded we use the current (VMThread) thread,
2198     // otherwise we use the work gang from the G1CollectedHeap and
2199     // we utilize all the worker threads we can.
2200     bool processing_is_mt = rp->processing_is_mt();
2201     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2202     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2203 
2204     // Parallel processing task executor.
2205     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2206                                               g1h->workers(), active_workers);
2207     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2208 
2209     // Set the concurrency level. The phase was already set prior to
2210     // executing the remark task.
2211     set_concurrency(active_workers);
2212 
2213     // Set the degree of MT processing here.  If the discovery was done MT,
2214     // the number of threads involved during discovery could differ from
2215     // the number of active workers.  This is OK as long as the discovered
2216     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2217     rp->set_active_mt_degree(active_workers);
2218 
2219     // Process the weak references.
2220     const ReferenceProcessorStats& stats =
2221         rp->process_discovered_references(&g1_is_alive,
2222                                           &g1_keep_alive,
2223                                           &g1_drain_mark_stack,
2224                                           executor,
2225                                           g1h->gc_timer_cm());
2226     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2227 
2228     // The do_oop work routines of the keep_alive and drain_marking_stack
2229     // oop closures will set the has_overflown flag if we overflow the
2230     // global marking stack.
2231 
2232     assert(_markStack.overflow() || _markStack.isEmpty(),
2233             "mark stack should be empty (unless it overflowed)");
2234 
2235     if (_markStack.overflow()) {
2236       // This should have been done already when we tried to push an
2237       // entry on to the global mark stack. But let's do it again.
2238       set_has_overflown();
2239     }
2240 
2241     assert(rp->num_q() == active_workers, "why not");
2242 
2243     rp->enqueue_discovered_references(executor);
2244 
2245     rp->verify_no_references_recorded();
2246     assert(!rp->discovery_enabled(), "Post condition");
2247   }
2248 
2249   if (has_overflown()) {
2250     // We can not trust g1_is_alive if the marking stack overflowed
2251     return;
2252   }
2253 
2254   assert(_markStack.isEmpty(), "Marking should have completed");
2255 
2256   // Unload Klasses, String, Symbols, Code Cache, etc.
2257   {
2258     G1CMTraceTime trace("Unloading", G1Log::finer());
2259 
2260     if (ClassUnloadingWithConcurrentMark) {
2261       bool purged_classes;
2262 
2263       {
2264         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2265         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2266       }
2267 
2268       {
2269         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2270         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2271       }
2272     }
2273 
2274     if (G1StringDedup::is_enabled()) {
2275       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2276       G1StringDedup::unlink(&g1_is_alive);
2277     }
2278   }
2279 }
2280 
2281 void ConcurrentMark::swapMarkBitMaps() {
2282   CMBitMapRO* temp = _prevMarkBitMap;
2283   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2284   _nextMarkBitMap  = (CMBitMap*)  temp;
2285 }
2286 
2287 // Closure for marking entries in SATB buffers.
2288 class CMSATBBufferClosure : public SATBBufferClosure {
2289 private:
2290   CMTask* _task;
2291   G1CollectedHeap* _g1h;
2292 
2293   // This is very similar to CMTask::deal_with_reference, but with
2294   // more relaxed requirements for the argument, so this must be more
2295   // circumspect about treating the argument as an object.
2296   void do_entry(void* entry) const {
2297     _task->increment_refs_reached();
2298     HeapRegion* hr = _g1h->heap_region_containing(entry);
2299     if (entry < hr->next_top_at_mark_start()) {
2300       // Until we get here, we don't know whether entry refers to a valid
2301       // object; it could instead have been a stale reference.
2302       oop obj = static_cast<oop>(entry);
2303       assert(obj->is_oop(true /* ignore mark word */),
2304              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2305       _task->make_reference_grey(obj, hr);
2306     }
2307   }
2308 
2309 public:
2310   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2311     : _task(task), _g1h(g1h) { }
2312 
2313   virtual void do_buffer(void** buffer, size_t size) {
2314     for (size_t i = 0; i < size; ++i) {
2315       do_entry(buffer[i]);
2316     }
2317   }
2318 };
2319 
2320 class G1RemarkThreadsClosure : public ThreadClosure {
2321   CMSATBBufferClosure _cm_satb_cl;
2322   G1CMOopClosure _cm_cl;
2323   MarkingCodeBlobClosure _code_cl;
2324   int _thread_parity;
2325 
2326  public:
2327   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2328     _cm_satb_cl(task, g1h),
2329     _cm_cl(g1h, g1h->concurrent_mark(), task),
2330     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2331     _thread_parity(Threads::thread_claim_parity()) {}
2332 
2333   void do_thread(Thread* thread) {
2334     if (thread->is_Java_thread()) {
2335       if (thread->claim_oops_do(true, _thread_parity)) {
2336         JavaThread* jt = (JavaThread*)thread;
2337 
2338         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2339         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2340         // * Alive if on the stack of an executing method
2341         // * Weakly reachable otherwise
2342         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2343         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2344         jt->nmethods_do(&_code_cl);
2345 
2346         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2347       }
2348     } else if (thread->is_VM_thread()) {
2349       if (thread->claim_oops_do(true, _thread_parity)) {
2350         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2351       }
2352     }
2353   }
2354 };
2355 
2356 class CMRemarkTask: public AbstractGangTask {
2357 private:
2358   ConcurrentMark* _cm;
2359 public:
2360   void work(uint worker_id) {
2361     // Since all available tasks are actually started, we should
2362     // only proceed if we're supposed to be active.
2363     if (worker_id < _cm->active_tasks()) {
2364       CMTask* task = _cm->task(worker_id);
2365       task->record_start_time();
2366       {
2367         ResourceMark rm;
2368         HandleMark hm;
2369 
2370         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2371         Threads::threads_do(&threads_f);
2372       }
2373 
2374       do {
2375         task->do_marking_step(1000000000.0 /* something very large */,
2376                               true         /* do_termination       */,
2377                               false        /* is_serial            */);
2378       } while (task->has_aborted() && !_cm->has_overflown());
2379       // If we overflow, then we do not want to restart. We instead
2380       // want to abort remark and do concurrent marking again.
2381       task->record_end_time();
2382     }
2383   }
2384 
2385   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2386     AbstractGangTask("Par Remark"), _cm(cm) {
2387     _cm->terminator()->reset_for_reuse(active_workers);
2388   }
2389 };
2390 
2391 void ConcurrentMark::checkpointRootsFinalWork() {
2392   ResourceMark rm;
2393   HandleMark   hm;
2394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2395 
2396   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2397 
2398   g1h->ensure_parsability(false);
2399 
2400   // this is remark, so we'll use up all active threads
2401   uint active_workers = g1h->workers()->active_workers();
2402   set_concurrency_and_phase(active_workers, false /* concurrent */);
2403   // Leave _parallel_marking_threads at it's
2404   // value originally calculated in the ConcurrentMark
2405   // constructor and pass values of the active workers
2406   // through the gang in the task.
2407 
2408   {
2409     StrongRootsScope srs(active_workers);
2410 
2411     CMRemarkTask remarkTask(this, active_workers);
2412     // We will start all available threads, even if we decide that the
2413     // active_workers will be fewer. The extra ones will just bail out
2414     // immediately.
2415     g1h->workers()->run_task(&remarkTask);
2416   }
2417 
2418   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2419   guarantee(has_overflown() ||
2420             satb_mq_set.completed_buffers_num() == 0,
2421             "Invariant: has_overflown = %s, num buffers = %d",
2422             BOOL_TO_STR(has_overflown()),
2423             satb_mq_set.completed_buffers_num());
2424 
2425   print_stats();
2426 }
2427 
2428 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2429   // Note we are overriding the read-only view of the prev map here, via
2430   // the cast.
2431   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2432 }
2433 
2434 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2435   _nextMarkBitMap->clearRange(mr);
2436 }
2437 
2438 HeapRegion*
2439 ConcurrentMark::claim_region(uint worker_id) {
2440   // "checkpoint" the finger
2441   HeapWord* finger = _finger;
2442 
2443   // _heap_end will not change underneath our feet; it only changes at
2444   // yield points.
2445   while (finger < _heap_end) {
2446     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2447 
2448     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2449 
2450     // Above heap_region_containing may return NULL as we always scan claim
2451     // until the end of the heap. In this case, just jump to the next region.
2452     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2453 
2454     // Is the gap between reading the finger and doing the CAS too long?
2455     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2456     if (res == finger && curr_region != NULL) {
2457       // we succeeded
2458       HeapWord*   bottom        = curr_region->bottom();
2459       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2460 
2461       // notice that _finger == end cannot be guaranteed here since,
2462       // someone else might have moved the finger even further
2463       assert(_finger >= end, "the finger should have moved forward");
2464 
2465       if (limit > bottom) {
2466         return curr_region;
2467       } else {
2468         assert(limit == bottom,
2469                "the region limit should be at bottom");
2470         // we return NULL and the caller should try calling
2471         // claim_region() again.
2472         return NULL;
2473       }
2474     } else {
2475       assert(_finger > finger, "the finger should have moved forward");
2476       // read it again
2477       finger = _finger;
2478     }
2479   }
2480 
2481   return NULL;
2482 }
2483 
2484 #ifndef PRODUCT
2485 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2486 private:
2487   G1CollectedHeap* _g1h;
2488   const char* _phase;
2489   int _info;
2490 
2491 public:
2492   VerifyNoCSetOops(const char* phase, int info = -1) :
2493     _g1h(G1CollectedHeap::heap()),
2494     _phase(phase),
2495     _info(info)
2496   { }
2497 
2498   void operator()(oop obj) const {
2499     guarantee(obj->is_oop(),
2500               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2501               p2i(obj), _phase, _info);
2502     guarantee(!_g1h->obj_in_cs(obj),
2503               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2504               p2i(obj), _phase, _info);
2505   }
2506 };
2507 
2508 void ConcurrentMark::verify_no_cset_oops() {
2509   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2510   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2511     return;
2512   }
2513 
2514   // Verify entries on the global mark stack
2515   _markStack.iterate(VerifyNoCSetOops("Stack"));
2516 
2517   // Verify entries on the task queues
2518   for (uint i = 0; i < _max_worker_id; ++i) {
2519     CMTaskQueue* queue = _task_queues->queue(i);
2520     queue->iterate(VerifyNoCSetOops("Queue", i));
2521   }
2522 
2523   // Verify the global finger
2524   HeapWord* global_finger = finger();
2525   if (global_finger != NULL && global_finger < _heap_end) {
2526     // Since we always iterate over all regions, we might get a NULL HeapRegion
2527     // here.
2528     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2529     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2530               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2531               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2532   }
2533 
2534   // Verify the task fingers
2535   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2536   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2537     CMTask* task = _tasks[i];
2538     HeapWord* task_finger = task->finger();
2539     if (task_finger != NULL && task_finger < _heap_end) {
2540       // See above note on the global finger verification.
2541       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2542       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2543                 !task_hr->in_collection_set(),
2544                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2545                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2546     }
2547   }
2548 }
2549 #endif // PRODUCT
2550 
2551 // Aggregate the counting data that was constructed concurrently
2552 // with marking.
2553 class AggregateCountDataHRClosure: public HeapRegionClosure {
2554   G1CollectedHeap* _g1h;
2555   ConcurrentMark* _cm;
2556   CardTableModRefBS* _ct_bs;
2557   BitMap* _cm_card_bm;
2558   uint _max_worker_id;
2559 
2560  public:
2561   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2562                               BitMap* cm_card_bm,
2563                               uint max_worker_id) :
2564     _g1h(g1h), _cm(g1h->concurrent_mark()),
2565     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2566     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2567 
2568   bool doHeapRegion(HeapRegion* hr) {
2569     HeapWord* start = hr->bottom();
2570     HeapWord* limit = hr->next_top_at_mark_start();
2571     HeapWord* end = hr->end();
2572 
2573     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2574            "Preconditions not met - "
2575            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2576            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2577            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2578 
2579     assert(hr->next_marked_bytes() == 0, "Precondition");
2580 
2581     if (start == limit) {
2582       // NTAMS of this region has not been set so nothing to do.
2583       return false;
2584     }
2585 
2586     // 'start' should be in the heap.
2587     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2588     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2589     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2590 
2591     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2592     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2593     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2594 
2595     // If ntams is not card aligned then we bump card bitmap index
2596     // for limit so that we get the all the cards spanned by
2597     // the object ending at ntams.
2598     // Note: if this is the last region in the heap then ntams
2599     // could be actually just beyond the end of the the heap;
2600     // limit_idx will then  correspond to a (non-existent) card
2601     // that is also outside the heap.
2602     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2603       limit_idx += 1;
2604     }
2605 
2606     assert(limit_idx <= end_idx, "or else use atomics");
2607 
2608     // Aggregate the "stripe" in the count data associated with hr.
2609     uint hrm_index = hr->hrm_index();
2610     size_t marked_bytes = 0;
2611 
2612     for (uint i = 0; i < _max_worker_id; i += 1) {
2613       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2614       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2615 
2616       // Fetch the marked_bytes in this region for task i and
2617       // add it to the running total for this region.
2618       marked_bytes += marked_bytes_array[hrm_index];
2619 
2620       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2621       // into the global card bitmap.
2622       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2623 
2624       while (scan_idx < limit_idx) {
2625         assert(task_card_bm->at(scan_idx) == true, "should be");
2626         _cm_card_bm->set_bit(scan_idx);
2627         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2628 
2629         // BitMap::get_next_one_offset() can handle the case when
2630         // its left_offset parameter is greater than its right_offset
2631         // parameter. It does, however, have an early exit if
2632         // left_offset == right_offset. So let's limit the value
2633         // passed in for left offset here.
2634         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2635         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2636       }
2637     }
2638 
2639     // Update the marked bytes for this region.
2640     hr->add_to_marked_bytes(marked_bytes);
2641 
2642     // Next heap region
2643     return false;
2644   }
2645 };
2646 
2647 class G1AggregateCountDataTask: public AbstractGangTask {
2648 protected:
2649   G1CollectedHeap* _g1h;
2650   ConcurrentMark* _cm;
2651   BitMap* _cm_card_bm;
2652   uint _max_worker_id;
2653   uint _active_workers;
2654   HeapRegionClaimer _hrclaimer;
2655 
2656 public:
2657   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2658                            ConcurrentMark* cm,
2659                            BitMap* cm_card_bm,
2660                            uint max_worker_id,
2661                            uint n_workers) :
2662       AbstractGangTask("Count Aggregation"),
2663       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2664       _max_worker_id(max_worker_id),
2665       _active_workers(n_workers),
2666       _hrclaimer(_active_workers) {
2667   }
2668 
2669   void work(uint worker_id) {
2670     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2671 
2672     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2673   }
2674 };
2675 
2676 
2677 void ConcurrentMark::aggregate_count_data() {
2678   uint n_workers = _g1h->workers()->active_workers();
2679 
2680   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2681                                            _max_worker_id, n_workers);
2682 
2683   _g1h->workers()->run_task(&g1_par_agg_task);
2684 }
2685 
2686 // Clear the per-worker arrays used to store the per-region counting data
2687 void ConcurrentMark::clear_all_count_data() {
2688   // Clear the global card bitmap - it will be filled during
2689   // liveness count aggregation (during remark) and the
2690   // final counting task.
2691   _card_bm.clear();
2692 
2693   // Clear the global region bitmap - it will be filled as part
2694   // of the final counting task.
2695   _region_bm.clear();
2696 
2697   uint max_regions = _g1h->max_regions();
2698   assert(_max_worker_id > 0, "uninitialized");
2699 
2700   for (uint i = 0; i < _max_worker_id; i += 1) {
2701     BitMap* task_card_bm = count_card_bitmap_for(i);
2702     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2703 
2704     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2705     assert(marked_bytes_array != NULL, "uninitialized");
2706 
2707     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2708     task_card_bm->clear();
2709   }
2710 }
2711 
2712 void ConcurrentMark::print_stats() {
2713   if (G1MarkingVerboseLevel > 0) {
2714     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2715     for (size_t i = 0; i < _active_tasks; ++i) {
2716       _tasks[i]->print_stats();
2717       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2718     }
2719   }
2720 }
2721 
2722 // abandon current marking iteration due to a Full GC
2723 void ConcurrentMark::abort() {
2724   if (!cmThread()->during_cycle() || _has_aborted) {
2725     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2726     return;
2727   }
2728 
2729   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2730   // concurrent bitmap clearing.
2731   _nextMarkBitMap->clearAll();
2732 
2733   // Note we cannot clear the previous marking bitmap here
2734   // since VerifyDuringGC verifies the objects marked during
2735   // a full GC against the previous bitmap.
2736 
2737   // Clear the liveness counting data
2738   clear_all_count_data();
2739   // Empty mark stack
2740   reset_marking_state();
2741   for (uint i = 0; i < _max_worker_id; ++i) {
2742     _tasks[i]->clear_region_fields();
2743   }
2744   _first_overflow_barrier_sync.abort();
2745   _second_overflow_barrier_sync.abort();
2746   _has_aborted = true;
2747 
2748   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2749   satb_mq_set.abandon_partial_marking();
2750   // This can be called either during or outside marking, we'll read
2751   // the expected_active value from the SATB queue set.
2752   satb_mq_set.set_active_all_threads(
2753                                  false, /* new active value */
2754                                  satb_mq_set.is_active() /* expected_active */);
2755 
2756   _g1h->trace_heap_after_concurrent_cycle();
2757   _g1h->register_concurrent_cycle_end();
2758 }
2759 
2760 static void print_ms_time_info(const char* prefix, const char* name,
2761                                NumberSeq& ns) {
2762   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2763                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2764   if (ns.num() > 0) {
2765     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2766                            prefix, ns.sd(), ns.maximum());
2767   }
2768 }
2769 
2770 void ConcurrentMark::print_summary_info() {
2771   gclog_or_tty->print_cr(" Concurrent marking:");
2772   print_ms_time_info("  ", "init marks", _init_times);
2773   print_ms_time_info("  ", "remarks", _remark_times);
2774   {
2775     print_ms_time_info("     ", "final marks", _remark_mark_times);
2776     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2777 
2778   }
2779   print_ms_time_info("  ", "cleanups", _cleanup_times);
2780   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2781                          _total_counting_time,
2782                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2783                           (double)_cleanup_times.num()
2784                          : 0.0));
2785   if (G1ScrubRemSets) {
2786     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2787                            _total_rs_scrub_time,
2788                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2789                             (double)_cleanup_times.num()
2790                            : 0.0));
2791   }
2792   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2793                          (_init_times.sum() + _remark_times.sum() +
2794                           _cleanup_times.sum())/1000.0);
2795   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2796                 "(%8.2f s marking).",
2797                 cmThread()->vtime_accum(),
2798                 cmThread()->vtime_mark_accum());
2799 }
2800 
2801 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2802   _parallel_workers->print_worker_threads_on(st);
2803 }
2804 
2805 void ConcurrentMark::print_on_error(outputStream* st) const {
2806   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2807       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2808   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2809   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2810 }
2811 
2812 // We take a break if someone is trying to stop the world.
2813 bool ConcurrentMark::do_yield_check(uint worker_id) {
2814   if (SuspendibleThreadSet::should_yield()) {
2815     if (worker_id == 0) {
2816       _g1h->g1_policy()->record_concurrent_pause();
2817     }
2818     SuspendibleThreadSet::yield();
2819     return true;
2820   } else {
2821     return false;
2822   }
2823 }
2824 
2825 // Closure for iteration over bitmaps
2826 class CMBitMapClosure : public BitMapClosure {
2827 private:
2828   // the bitmap that is being iterated over
2829   CMBitMap*                   _nextMarkBitMap;
2830   ConcurrentMark*             _cm;
2831   CMTask*                     _task;
2832 
2833 public:
2834   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2835     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2836 
2837   bool do_bit(size_t offset) {
2838     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2839     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2840     assert( addr < _cm->finger(), "invariant");
2841     assert(addr >= _task->finger(), "invariant");
2842 
2843     // We move that task's local finger along.
2844     _task->move_finger_to(addr);
2845 
2846     _task->scan_object(oop(addr));
2847     // we only partially drain the local queue and global stack
2848     _task->drain_local_queue(true);
2849     _task->drain_global_stack(true);
2850 
2851     // if the has_aborted flag has been raised, we need to bail out of
2852     // the iteration
2853     return !_task->has_aborted();
2854   }
2855 };
2856 
2857 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2858   ReferenceProcessor* result = NULL;
2859   if (G1UseConcMarkReferenceProcessing) {
2860     result = g1h->ref_processor_cm();
2861     assert(result != NULL, "should not be NULL");
2862   }
2863   return result;
2864 }
2865 
2866 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2867                                ConcurrentMark* cm,
2868                                CMTask* task)
2869   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2870     _g1h(g1h), _cm(cm), _task(task)
2871 { }
2872 
2873 void CMTask::setup_for_region(HeapRegion* hr) {
2874   assert(hr != NULL,
2875         "claim_region() should have filtered out NULL regions");
2876   _curr_region  = hr;
2877   _finger       = hr->bottom();
2878   update_region_limit();
2879 }
2880 
2881 void CMTask::update_region_limit() {
2882   HeapRegion* hr            = _curr_region;
2883   HeapWord* bottom          = hr->bottom();
2884   HeapWord* limit           = hr->next_top_at_mark_start();
2885 
2886   if (limit == bottom) {
2887     // The region was collected underneath our feet.
2888     // We set the finger to bottom to ensure that the bitmap
2889     // iteration that will follow this will not do anything.
2890     // (this is not a condition that holds when we set the region up,
2891     // as the region is not supposed to be empty in the first place)
2892     _finger = bottom;
2893   } else if (limit >= _region_limit) {
2894     assert(limit >= _finger, "peace of mind");
2895   } else {
2896     assert(limit < _region_limit, "only way to get here");
2897     // This can happen under some pretty unusual circumstances.  An
2898     // evacuation pause empties the region underneath our feet (NTAMS
2899     // at bottom). We then do some allocation in the region (NTAMS
2900     // stays at bottom), followed by the region being used as a GC
2901     // alloc region (NTAMS will move to top() and the objects
2902     // originally below it will be grayed). All objects now marked in
2903     // the region are explicitly grayed, if below the global finger,
2904     // and we do not need in fact to scan anything else. So, we simply
2905     // set _finger to be limit to ensure that the bitmap iteration
2906     // doesn't do anything.
2907     _finger = limit;
2908   }
2909 
2910   _region_limit = limit;
2911 }
2912 
2913 void CMTask::giveup_current_region() {
2914   assert(_curr_region != NULL, "invariant");
2915   clear_region_fields();
2916 }
2917 
2918 void CMTask::clear_region_fields() {
2919   // Values for these three fields that indicate that we're not
2920   // holding on to a region.
2921   _curr_region   = NULL;
2922   _finger        = NULL;
2923   _region_limit  = NULL;
2924 }
2925 
2926 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2927   if (cm_oop_closure == NULL) {
2928     assert(_cm_oop_closure != NULL, "invariant");
2929   } else {
2930     assert(_cm_oop_closure == NULL, "invariant");
2931   }
2932   _cm_oop_closure = cm_oop_closure;
2933 }
2934 
2935 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2936   guarantee(nextMarkBitMap != NULL, "invariant");
2937   _nextMarkBitMap                = nextMarkBitMap;
2938   clear_region_fields();
2939 
2940   _calls                         = 0;
2941   _elapsed_time_ms               = 0.0;
2942   _termination_time_ms           = 0.0;
2943   _termination_start_time_ms     = 0.0;
2944 }
2945 
2946 bool CMTask::should_exit_termination() {
2947   regular_clock_call();
2948   // This is called when we are in the termination protocol. We should
2949   // quit if, for some reason, this task wants to abort or the global
2950   // stack is not empty (this means that we can get work from it).
2951   return !_cm->mark_stack_empty() || has_aborted();
2952 }
2953 
2954 void CMTask::reached_limit() {
2955   assert(_words_scanned >= _words_scanned_limit ||
2956          _refs_reached >= _refs_reached_limit ,
2957          "shouldn't have been called otherwise");
2958   regular_clock_call();
2959 }
2960 
2961 void CMTask::regular_clock_call() {
2962   if (has_aborted()) return;
2963 
2964   // First, we need to recalculate the words scanned and refs reached
2965   // limits for the next clock call.
2966   recalculate_limits();
2967 
2968   // During the regular clock call we do the following
2969 
2970   // (1) If an overflow has been flagged, then we abort.
2971   if (_cm->has_overflown()) {
2972     set_has_aborted();
2973     return;
2974   }
2975 
2976   // If we are not concurrent (i.e. we're doing remark) we don't need
2977   // to check anything else. The other steps are only needed during
2978   // the concurrent marking phase.
2979   if (!concurrent()) return;
2980 
2981   // (2) If marking has been aborted for Full GC, then we also abort.
2982   if (_cm->has_aborted()) {
2983     set_has_aborted();
2984     return;
2985   }
2986 
2987   double curr_time_ms = os::elapsedVTime() * 1000.0;
2988 
2989   // (4) We check whether we should yield. If we have to, then we abort.
2990   if (SuspendibleThreadSet::should_yield()) {
2991     // We should yield. To do this we abort the task. The caller is
2992     // responsible for yielding.
2993     set_has_aborted();
2994     return;
2995   }
2996 
2997   // (5) We check whether we've reached our time quota. If we have,
2998   // then we abort.
2999   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3000   if (elapsed_time_ms > _time_target_ms) {
3001     set_has_aborted();
3002     _has_timed_out = true;
3003     return;
3004   }
3005 
3006   // (6) Finally, we check whether there are enough completed STAB
3007   // buffers available for processing. If there are, we abort.
3008   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3009   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3010     // we do need to process SATB buffers, we'll abort and restart
3011     // the marking task to do so
3012     set_has_aborted();
3013     return;
3014   }
3015 }
3016 
3017 void CMTask::recalculate_limits() {
3018   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3019   _words_scanned_limit      = _real_words_scanned_limit;
3020 
3021   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3022   _refs_reached_limit       = _real_refs_reached_limit;
3023 }
3024 
3025 void CMTask::decrease_limits() {
3026   // This is called when we believe that we're going to do an infrequent
3027   // operation which will increase the per byte scanned cost (i.e. move
3028   // entries to/from the global stack). It basically tries to decrease the
3029   // scanning limit so that the clock is called earlier.
3030 
3031   _words_scanned_limit = _real_words_scanned_limit -
3032     3 * words_scanned_period / 4;
3033   _refs_reached_limit  = _real_refs_reached_limit -
3034     3 * refs_reached_period / 4;
3035 }
3036 
3037 void CMTask::move_entries_to_global_stack() {
3038   // local array where we'll store the entries that will be popped
3039   // from the local queue
3040   oop buffer[global_stack_transfer_size];
3041 
3042   int n = 0;
3043   oop obj;
3044   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3045     buffer[n] = obj;
3046     ++n;
3047   }
3048 
3049   if (n > 0) {
3050     // we popped at least one entry from the local queue
3051 
3052     if (!_cm->mark_stack_push(buffer, n)) {
3053       set_has_aborted();
3054     }
3055   }
3056 
3057   // this operation was quite expensive, so decrease the limits
3058   decrease_limits();
3059 }
3060 
3061 void CMTask::get_entries_from_global_stack() {
3062   // local array where we'll store the entries that will be popped
3063   // from the global stack.
3064   oop buffer[global_stack_transfer_size];
3065   int n;
3066   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3067   assert(n <= global_stack_transfer_size,
3068          "we should not pop more than the given limit");
3069   if (n > 0) {
3070     // yes, we did actually pop at least one entry
3071     for (int i = 0; i < n; ++i) {
3072       bool success = _task_queue->push(buffer[i]);
3073       // We only call this when the local queue is empty or under a
3074       // given target limit. So, we do not expect this push to fail.
3075       assert(success, "invariant");
3076     }
3077   }
3078 
3079   // this operation was quite expensive, so decrease the limits
3080   decrease_limits();
3081 }
3082 
3083 void CMTask::drain_local_queue(bool partially) {
3084   if (has_aborted()) return;
3085 
3086   // Decide what the target size is, depending whether we're going to
3087   // drain it partially (so that other tasks can steal if they run out
3088   // of things to do) or totally (at the very end).
3089   size_t target_size;
3090   if (partially) {
3091     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3092   } else {
3093     target_size = 0;
3094   }
3095 
3096   if (_task_queue->size() > target_size) {
3097     oop obj;
3098     bool ret = _task_queue->pop_local(obj);
3099     while (ret) {
3100       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3101       assert(!_g1h->is_on_master_free_list(
3102                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3103 
3104       scan_object(obj);
3105 
3106       if (_task_queue->size() <= target_size || has_aborted()) {
3107         ret = false;
3108       } else {
3109         ret = _task_queue->pop_local(obj);
3110       }
3111     }
3112   }
3113 }
3114 
3115 void CMTask::drain_global_stack(bool partially) {
3116   if (has_aborted()) return;
3117 
3118   // We have a policy to drain the local queue before we attempt to
3119   // drain the global stack.
3120   assert(partially || _task_queue->size() == 0, "invariant");
3121 
3122   // Decide what the target size is, depending whether we're going to
3123   // drain it partially (so that other tasks can steal if they run out
3124   // of things to do) or totally (at the very end).  Notice that,
3125   // because we move entries from the global stack in chunks or
3126   // because another task might be doing the same, we might in fact
3127   // drop below the target. But, this is not a problem.
3128   size_t target_size;
3129   if (partially) {
3130     target_size = _cm->partial_mark_stack_size_target();
3131   } else {
3132     target_size = 0;
3133   }
3134 
3135   if (_cm->mark_stack_size() > target_size) {
3136     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3137       get_entries_from_global_stack();
3138       drain_local_queue(partially);
3139     }
3140   }
3141 }
3142 
3143 // SATB Queue has several assumptions on whether to call the par or
3144 // non-par versions of the methods. this is why some of the code is
3145 // replicated. We should really get rid of the single-threaded version
3146 // of the code to simplify things.
3147 void CMTask::drain_satb_buffers() {
3148   if (has_aborted()) return;
3149 
3150   // We set this so that the regular clock knows that we're in the
3151   // middle of draining buffers and doesn't set the abort flag when it
3152   // notices that SATB buffers are available for draining. It'd be
3153   // very counter productive if it did that. :-)
3154   _draining_satb_buffers = true;
3155 
3156   CMSATBBufferClosure satb_cl(this, _g1h);
3157   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3158 
3159   // This keeps claiming and applying the closure to completed buffers
3160   // until we run out of buffers or we need to abort.
3161   while (!has_aborted() &&
3162          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3163     regular_clock_call();
3164   }
3165 
3166   _draining_satb_buffers = false;
3167 
3168   assert(has_aborted() ||
3169          concurrent() ||
3170          satb_mq_set.completed_buffers_num() == 0, "invariant");
3171 
3172   // again, this was a potentially expensive operation, decrease the
3173   // limits to get the regular clock call early
3174   decrease_limits();
3175 }
3176 
3177 void CMTask::print_stats() {
3178   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3179                          _worker_id, _calls);
3180   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3181                          _elapsed_time_ms, _termination_time_ms);
3182   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3183                          _step_times_ms.num(), _step_times_ms.avg(),
3184                          _step_times_ms.sd());
3185   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3186                          _step_times_ms.maximum(), _step_times_ms.sum());
3187 }
3188 
3189 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3190   return _task_queues->steal(worker_id, hash_seed, obj);
3191 }
3192 
3193 /*****************************************************************************
3194 
3195     The do_marking_step(time_target_ms, ...) method is the building
3196     block of the parallel marking framework. It can be called in parallel
3197     with other invocations of do_marking_step() on different tasks
3198     (but only one per task, obviously) and concurrently with the
3199     mutator threads, or during remark, hence it eliminates the need
3200     for two versions of the code. When called during remark, it will
3201     pick up from where the task left off during the concurrent marking
3202     phase. Interestingly, tasks are also claimable during evacuation
3203     pauses too, since do_marking_step() ensures that it aborts before
3204     it needs to yield.
3205 
3206     The data structures that it uses to do marking work are the
3207     following:
3208 
3209       (1) Marking Bitmap. If there are gray objects that appear only
3210       on the bitmap (this happens either when dealing with an overflow
3211       or when the initial marking phase has simply marked the roots
3212       and didn't push them on the stack), then tasks claim heap
3213       regions whose bitmap they then scan to find gray objects. A
3214       global finger indicates where the end of the last claimed region
3215       is. A local finger indicates how far into the region a task has
3216       scanned. The two fingers are used to determine how to gray an
3217       object (i.e. whether simply marking it is OK, as it will be
3218       visited by a task in the future, or whether it needs to be also
3219       pushed on a stack).
3220 
3221       (2) Local Queue. The local queue of the task which is accessed
3222       reasonably efficiently by the task. Other tasks can steal from
3223       it when they run out of work. Throughout the marking phase, a
3224       task attempts to keep its local queue short but not totally
3225       empty, so that entries are available for stealing by other
3226       tasks. Only when there is no more work, a task will totally
3227       drain its local queue.
3228 
3229       (3) Global Mark Stack. This handles local queue overflow. During
3230       marking only sets of entries are moved between it and the local
3231       queues, as access to it requires a mutex and more fine-grain
3232       interaction with it which might cause contention. If it
3233       overflows, then the marking phase should restart and iterate
3234       over the bitmap to identify gray objects. Throughout the marking
3235       phase, tasks attempt to keep the global mark stack at a small
3236       length but not totally empty, so that entries are available for
3237       popping by other tasks. Only when there is no more work, tasks
3238       will totally drain the global mark stack.
3239 
3240       (4) SATB Buffer Queue. This is where completed SATB buffers are
3241       made available. Buffers are regularly removed from this queue
3242       and scanned for roots, so that the queue doesn't get too
3243       long. During remark, all completed buffers are processed, as
3244       well as the filled in parts of any uncompleted buffers.
3245 
3246     The do_marking_step() method tries to abort when the time target
3247     has been reached. There are a few other cases when the
3248     do_marking_step() method also aborts:
3249 
3250       (1) When the marking phase has been aborted (after a Full GC).
3251 
3252       (2) When a global overflow (on the global stack) has been
3253       triggered. Before the task aborts, it will actually sync up with
3254       the other tasks to ensure that all the marking data structures
3255       (local queues, stacks, fingers etc.)  are re-initialized so that
3256       when do_marking_step() completes, the marking phase can
3257       immediately restart.
3258 
3259       (3) When enough completed SATB buffers are available. The
3260       do_marking_step() method only tries to drain SATB buffers right
3261       at the beginning. So, if enough buffers are available, the
3262       marking step aborts and the SATB buffers are processed at
3263       the beginning of the next invocation.
3264 
3265       (4) To yield. when we have to yield then we abort and yield
3266       right at the end of do_marking_step(). This saves us from a lot
3267       of hassle as, by yielding we might allow a Full GC. If this
3268       happens then objects will be compacted underneath our feet, the
3269       heap might shrink, etc. We save checking for this by just
3270       aborting and doing the yield right at the end.
3271 
3272     From the above it follows that the do_marking_step() method should
3273     be called in a loop (or, otherwise, regularly) until it completes.
3274 
3275     If a marking step completes without its has_aborted() flag being
3276     true, it means it has completed the current marking phase (and
3277     also all other marking tasks have done so and have all synced up).
3278 
3279     A method called regular_clock_call() is invoked "regularly" (in
3280     sub ms intervals) throughout marking. It is this clock method that
3281     checks all the abort conditions which were mentioned above and
3282     decides when the task should abort. A work-based scheme is used to
3283     trigger this clock method: when the number of object words the
3284     marking phase has scanned or the number of references the marking
3285     phase has visited reach a given limit. Additional invocations to
3286     the method clock have been planted in a few other strategic places
3287     too. The initial reason for the clock method was to avoid calling
3288     vtime too regularly, as it is quite expensive. So, once it was in
3289     place, it was natural to piggy-back all the other conditions on it
3290     too and not constantly check them throughout the code.
3291 
3292     If do_termination is true then do_marking_step will enter its
3293     termination protocol.
3294 
3295     The value of is_serial must be true when do_marking_step is being
3296     called serially (i.e. by the VMThread) and do_marking_step should
3297     skip any synchronization in the termination and overflow code.
3298     Examples include the serial remark code and the serial reference
3299     processing closures.
3300 
3301     The value of is_serial must be false when do_marking_step is
3302     being called by any of the worker threads in a work gang.
3303     Examples include the concurrent marking code (CMMarkingTask),
3304     the MT remark code, and the MT reference processing closures.
3305 
3306  *****************************************************************************/
3307 
3308 void CMTask::do_marking_step(double time_target_ms,
3309                              bool do_termination,
3310                              bool is_serial) {
3311   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3312   assert(concurrent() == _cm->concurrent(), "they should be the same");
3313 
3314   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3315   assert(_task_queues != NULL, "invariant");
3316   assert(_task_queue != NULL, "invariant");
3317   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3318 
3319   assert(!_claimed,
3320          "only one thread should claim this task at any one time");
3321 
3322   // OK, this doesn't safeguard again all possible scenarios, as it is
3323   // possible for two threads to set the _claimed flag at the same
3324   // time. But it is only for debugging purposes anyway and it will
3325   // catch most problems.
3326   _claimed = true;
3327 
3328   _start_time_ms = os::elapsedVTime() * 1000.0;
3329 
3330   // If do_stealing is true then do_marking_step will attempt to
3331   // steal work from the other CMTasks. It only makes sense to
3332   // enable stealing when the termination protocol is enabled
3333   // and do_marking_step() is not being called serially.
3334   bool do_stealing = do_termination && !is_serial;
3335 
3336   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3337   _time_target_ms = time_target_ms - diff_prediction_ms;
3338 
3339   // set up the variables that are used in the work-based scheme to
3340   // call the regular clock method
3341   _words_scanned = 0;
3342   _refs_reached  = 0;
3343   recalculate_limits();
3344 
3345   // clear all flags
3346   clear_has_aborted();
3347   _has_timed_out = false;
3348   _draining_satb_buffers = false;
3349 
3350   ++_calls;
3351 
3352   // Set up the bitmap and oop closures. Anything that uses them is
3353   // eventually called from this method, so it is OK to allocate these
3354   // statically.
3355   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3356   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3357   set_cm_oop_closure(&cm_oop_closure);
3358 
3359   if (_cm->has_overflown()) {
3360     // This can happen if the mark stack overflows during a GC pause
3361     // and this task, after a yield point, restarts. We have to abort
3362     // as we need to get into the overflow protocol which happens
3363     // right at the end of this task.
3364     set_has_aborted();
3365   }
3366 
3367   // First drain any available SATB buffers. After this, we will not
3368   // look at SATB buffers before the next invocation of this method.
3369   // If enough completed SATB buffers are queued up, the regular clock
3370   // will abort this task so that it restarts.
3371   drain_satb_buffers();
3372   // ...then partially drain the local queue and the global stack
3373   drain_local_queue(true);
3374   drain_global_stack(true);
3375 
3376   do {
3377     if (!has_aborted() && _curr_region != NULL) {
3378       // This means that we're already holding on to a region.
3379       assert(_finger != NULL, "if region is not NULL, then the finger "
3380              "should not be NULL either");
3381 
3382       // We might have restarted this task after an evacuation pause
3383       // which might have evacuated the region we're holding on to
3384       // underneath our feet. Let's read its limit again to make sure
3385       // that we do not iterate over a region of the heap that
3386       // contains garbage (update_region_limit() will also move
3387       // _finger to the start of the region if it is found empty).
3388       update_region_limit();
3389       // We will start from _finger not from the start of the region,
3390       // as we might be restarting this task after aborting half-way
3391       // through scanning this region. In this case, _finger points to
3392       // the address where we last found a marked object. If this is a
3393       // fresh region, _finger points to start().
3394       MemRegion mr = MemRegion(_finger, _region_limit);
3395 
3396       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3397              "humongous regions should go around loop once only");
3398 
3399       // Some special cases:
3400       // If the memory region is empty, we can just give up the region.
3401       // If the current region is humongous then we only need to check
3402       // the bitmap for the bit associated with the start of the object,
3403       // scan the object if it's live, and give up the region.
3404       // Otherwise, let's iterate over the bitmap of the part of the region
3405       // that is left.
3406       // If the iteration is successful, give up the region.
3407       if (mr.is_empty()) {
3408         giveup_current_region();
3409         regular_clock_call();
3410       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3411         if (_nextMarkBitMap->isMarked(mr.start())) {
3412           // The object is marked - apply the closure
3413           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3414           bitmap_closure.do_bit(offset);
3415         }
3416         // Even if this task aborted while scanning the humongous object
3417         // we can (and should) give up the current region.
3418         giveup_current_region();
3419         regular_clock_call();
3420       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3421         giveup_current_region();
3422         regular_clock_call();
3423       } else {
3424         assert(has_aborted(), "currently the only way to do so");
3425         // The only way to abort the bitmap iteration is to return
3426         // false from the do_bit() method. However, inside the
3427         // do_bit() method we move the _finger to point to the
3428         // object currently being looked at. So, if we bail out, we
3429         // have definitely set _finger to something non-null.
3430         assert(_finger != NULL, "invariant");
3431 
3432         // Region iteration was actually aborted. So now _finger
3433         // points to the address of the object we last scanned. If we
3434         // leave it there, when we restart this task, we will rescan
3435         // the object. It is easy to avoid this. We move the finger by
3436         // enough to point to the next possible object header (the
3437         // bitmap knows by how much we need to move it as it knows its
3438         // granularity).
3439         assert(_finger < _region_limit, "invariant");
3440         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3441         // Check if bitmap iteration was aborted while scanning the last object
3442         if (new_finger >= _region_limit) {
3443           giveup_current_region();
3444         } else {
3445           move_finger_to(new_finger);
3446         }
3447       }
3448     }
3449     // At this point we have either completed iterating over the
3450     // region we were holding on to, or we have aborted.
3451 
3452     // We then partially drain the local queue and the global stack.
3453     // (Do we really need this?)
3454     drain_local_queue(true);
3455     drain_global_stack(true);
3456 
3457     // Read the note on the claim_region() method on why it might
3458     // return NULL with potentially more regions available for
3459     // claiming and why we have to check out_of_regions() to determine
3460     // whether we're done or not.
3461     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3462       // We are going to try to claim a new region. We should have
3463       // given up on the previous one.
3464       // Separated the asserts so that we know which one fires.
3465       assert(_curr_region  == NULL, "invariant");
3466       assert(_finger       == NULL, "invariant");
3467       assert(_region_limit == NULL, "invariant");
3468       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3469       if (claimed_region != NULL) {
3470         // Yes, we managed to claim one
3471         setup_for_region(claimed_region);
3472         assert(_curr_region == claimed_region, "invariant");
3473       }
3474       // It is important to call the regular clock here. It might take
3475       // a while to claim a region if, for example, we hit a large
3476       // block of empty regions. So we need to call the regular clock
3477       // method once round the loop to make sure it's called
3478       // frequently enough.
3479       regular_clock_call();
3480     }
3481 
3482     if (!has_aborted() && _curr_region == NULL) {
3483       assert(_cm->out_of_regions(),
3484              "at this point we should be out of regions");
3485     }
3486   } while ( _curr_region != NULL && !has_aborted());
3487 
3488   if (!has_aborted()) {
3489     // We cannot check whether the global stack is empty, since other
3490     // tasks might be pushing objects to it concurrently.
3491     assert(_cm->out_of_regions(),
3492            "at this point we should be out of regions");
3493     // Try to reduce the number of available SATB buffers so that
3494     // remark has less work to do.
3495     drain_satb_buffers();
3496   }
3497 
3498   // Since we've done everything else, we can now totally drain the
3499   // local queue and global stack.
3500   drain_local_queue(false);
3501   drain_global_stack(false);
3502 
3503   // Attempt at work stealing from other task's queues.
3504   if (do_stealing && !has_aborted()) {
3505     // We have not aborted. This means that we have finished all that
3506     // we could. Let's try to do some stealing...
3507 
3508     // We cannot check whether the global stack is empty, since other
3509     // tasks might be pushing objects to it concurrently.
3510     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3511            "only way to reach here");
3512     while (!has_aborted()) {
3513       oop obj;
3514       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3515         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3516                "any stolen object should be marked");
3517         scan_object(obj);
3518 
3519         // And since we're towards the end, let's totally drain the
3520         // local queue and global stack.
3521         drain_local_queue(false);
3522         drain_global_stack(false);
3523       } else {
3524         break;
3525       }
3526     }
3527   }
3528 
3529   // If we are about to wrap up and go into termination, check if we
3530   // should raise the overflow flag.
3531   if (do_termination && !has_aborted()) {
3532     if (_cm->force_overflow()->should_force()) {
3533       _cm->set_has_overflown();
3534       regular_clock_call();
3535     }
3536   }
3537 
3538   // We still haven't aborted. Now, let's try to get into the
3539   // termination protocol.
3540   if (do_termination && !has_aborted()) {
3541     // We cannot check whether the global stack is empty, since other
3542     // tasks might be concurrently pushing objects on it.
3543     // Separated the asserts so that we know which one fires.
3544     assert(_cm->out_of_regions(), "only way to reach here");
3545     assert(_task_queue->size() == 0, "only way to reach here");
3546     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3547 
3548     // The CMTask class also extends the TerminatorTerminator class,
3549     // hence its should_exit_termination() method will also decide
3550     // whether to exit the termination protocol or not.
3551     bool finished = (is_serial ||
3552                      _cm->terminator()->offer_termination(this));
3553     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3554     _termination_time_ms +=
3555       termination_end_time_ms - _termination_start_time_ms;
3556 
3557     if (finished) {
3558       // We're all done.
3559 
3560       if (_worker_id == 0) {
3561         // let's allow task 0 to do this
3562         if (concurrent()) {
3563           assert(_cm->concurrent_marking_in_progress(), "invariant");
3564           // we need to set this to false before the next
3565           // safepoint. This way we ensure that the marking phase
3566           // doesn't observe any more heap expansions.
3567           _cm->clear_concurrent_marking_in_progress();
3568         }
3569       }
3570 
3571       // We can now guarantee that the global stack is empty, since
3572       // all other tasks have finished. We separated the guarantees so
3573       // that, if a condition is false, we can immediately find out
3574       // which one.
3575       guarantee(_cm->out_of_regions(), "only way to reach here");
3576       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3577       guarantee(_task_queue->size() == 0, "only way to reach here");
3578       guarantee(!_cm->has_overflown(), "only way to reach here");
3579       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3580     } else {
3581       // Apparently there's more work to do. Let's abort this task. It
3582       // will restart it and we can hopefully find more things to do.
3583       set_has_aborted();
3584     }
3585   }
3586 
3587   // Mainly for debugging purposes to make sure that a pointer to the
3588   // closure which was statically allocated in this frame doesn't
3589   // escape it by accident.
3590   set_cm_oop_closure(NULL);
3591   double end_time_ms = os::elapsedVTime() * 1000.0;
3592   double elapsed_time_ms = end_time_ms - _start_time_ms;
3593   // Update the step history.
3594   _step_times_ms.add(elapsed_time_ms);
3595 
3596   if (has_aborted()) {
3597     // The task was aborted for some reason.
3598     if (_has_timed_out) {
3599       double diff_ms = elapsed_time_ms - _time_target_ms;
3600       // Keep statistics of how well we did with respect to hitting
3601       // our target only if we actually timed out (if we aborted for
3602       // other reasons, then the results might get skewed).
3603       _marking_step_diffs_ms.add(diff_ms);
3604     }
3605 
3606     if (_cm->has_overflown()) {
3607       // This is the interesting one. We aborted because a global
3608       // overflow was raised. This means we have to restart the
3609       // marking phase and start iterating over regions. However, in
3610       // order to do this we have to make sure that all tasks stop
3611       // what they are doing and re-initialize in a safe manner. We
3612       // will achieve this with the use of two barrier sync points.
3613 
3614       if (!is_serial) {
3615         // We only need to enter the sync barrier if being called
3616         // from a parallel context
3617         _cm->enter_first_sync_barrier(_worker_id);
3618 
3619         // When we exit this sync barrier we know that all tasks have
3620         // stopped doing marking work. So, it's now safe to
3621         // re-initialize our data structures. At the end of this method,
3622         // task 0 will clear the global data structures.
3623       }
3624 
3625       // We clear the local state of this task...
3626       clear_region_fields();
3627 
3628       if (!is_serial) {
3629         // ...and enter the second barrier.
3630         _cm->enter_second_sync_barrier(_worker_id);
3631       }
3632       // At this point, if we're during the concurrent phase of
3633       // marking, everything has been re-initialized and we're
3634       // ready to restart.
3635     }
3636   }
3637 
3638   _claimed = false;
3639 }
3640 
3641 CMTask::CMTask(uint worker_id,
3642                ConcurrentMark* cm,
3643                size_t* marked_bytes,
3644                BitMap* card_bm,
3645                CMTaskQueue* task_queue,
3646                CMTaskQueueSet* task_queues)
3647   : _g1h(G1CollectedHeap::heap()),
3648     _worker_id(worker_id), _cm(cm),
3649     _claimed(false),
3650     _nextMarkBitMap(NULL), _hash_seed(17),
3651     _task_queue(task_queue),
3652     _task_queues(task_queues),
3653     _cm_oop_closure(NULL),
3654     _marked_bytes_array(marked_bytes),
3655     _card_bm(card_bm) {
3656   guarantee(task_queue != NULL, "invariant");
3657   guarantee(task_queues != NULL, "invariant");
3658 
3659   _marking_step_diffs_ms.add(0.5);
3660 }
3661 
3662 // These are formatting macros that are used below to ensure
3663 // consistent formatting. The *_H_* versions are used to format the
3664 // header for a particular value and they should be kept consistent
3665 // with the corresponding macro. Also note that most of the macros add
3666 // the necessary white space (as a prefix) which makes them a bit
3667 // easier to compose.
3668 
3669 // All the output lines are prefixed with this string to be able to
3670 // identify them easily in a large log file.
3671 #define G1PPRL_LINE_PREFIX            "###"
3672 
3673 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3674 #ifdef _LP64
3675 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3676 #else // _LP64
3677 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3678 #endif // _LP64
3679 
3680 // For per-region info
3681 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3682 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3683 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3684 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3685 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3686 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3687 
3688 // For summary info
3689 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3690 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3691 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3692 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3693 
3694 G1PrintRegionLivenessInfoClosure::
3695 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3696   : _out(out),
3697     _total_used_bytes(0), _total_capacity_bytes(0),
3698     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3699     _hum_used_bytes(0), _hum_capacity_bytes(0),
3700     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3701     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3702   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3703   MemRegion g1_reserved = g1h->g1_reserved();
3704   double now = os::elapsedTime();
3705 
3706   // Print the header of the output.
3707   _out->cr();
3708   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3709   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3710                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3711                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3712                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3713                  HeapRegion::GrainBytes);
3714   _out->print_cr(G1PPRL_LINE_PREFIX);
3715   _out->print_cr(G1PPRL_LINE_PREFIX
3716                 G1PPRL_TYPE_H_FORMAT
3717                 G1PPRL_ADDR_BASE_H_FORMAT
3718                 G1PPRL_BYTE_H_FORMAT
3719                 G1PPRL_BYTE_H_FORMAT
3720                 G1PPRL_BYTE_H_FORMAT
3721                 G1PPRL_DOUBLE_H_FORMAT
3722                 G1PPRL_BYTE_H_FORMAT
3723                 G1PPRL_BYTE_H_FORMAT,
3724                 "type", "address-range",
3725                 "used", "prev-live", "next-live", "gc-eff",
3726                 "remset", "code-roots");
3727   _out->print_cr(G1PPRL_LINE_PREFIX
3728                 G1PPRL_TYPE_H_FORMAT
3729                 G1PPRL_ADDR_BASE_H_FORMAT
3730                 G1PPRL_BYTE_H_FORMAT
3731                 G1PPRL_BYTE_H_FORMAT
3732                 G1PPRL_BYTE_H_FORMAT
3733                 G1PPRL_DOUBLE_H_FORMAT
3734                 G1PPRL_BYTE_H_FORMAT
3735                 G1PPRL_BYTE_H_FORMAT,
3736                 "", "",
3737                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3738                 "(bytes)", "(bytes)");
3739 }
3740 
3741 // It takes as a parameter a reference to one of the _hum_* fields, it
3742 // deduces the corresponding value for a region in a humongous region
3743 // series (either the region size, or what's left if the _hum_* field
3744 // is < the region size), and updates the _hum_* field accordingly.
3745 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3746   size_t bytes = 0;
3747   // The > 0 check is to deal with the prev and next live bytes which
3748   // could be 0.
3749   if (*hum_bytes > 0) {
3750     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3751     *hum_bytes -= bytes;
3752   }
3753   return bytes;
3754 }
3755 
3756 // It deduces the values for a region in a humongous region series
3757 // from the _hum_* fields and updates those accordingly. It assumes
3758 // that that _hum_* fields have already been set up from the "starts
3759 // humongous" region and we visit the regions in address order.
3760 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3761                                                      size_t* capacity_bytes,
3762                                                      size_t* prev_live_bytes,
3763                                                      size_t* next_live_bytes) {
3764   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3765   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3766   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3767   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3768   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3769 }
3770 
3771 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3772   const char* type       = r->get_type_str();
3773   HeapWord* bottom       = r->bottom();
3774   HeapWord* end          = r->end();
3775   size_t capacity_bytes  = r->capacity();
3776   size_t used_bytes      = r->used();
3777   size_t prev_live_bytes = r->live_bytes();
3778   size_t next_live_bytes = r->next_live_bytes();
3779   double gc_eff          = r->gc_efficiency();
3780   size_t remset_bytes    = r->rem_set()->mem_size();
3781   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3782 
3783   if (r->is_starts_humongous()) {
3784     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3785            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3786            "they should have been zeroed after the last time we used them");
3787     // Set up the _hum_* fields.
3788     _hum_capacity_bytes  = capacity_bytes;
3789     _hum_used_bytes      = used_bytes;
3790     _hum_prev_live_bytes = prev_live_bytes;
3791     _hum_next_live_bytes = next_live_bytes;
3792     get_hum_bytes(&used_bytes, &capacity_bytes,
3793                   &prev_live_bytes, &next_live_bytes);
3794     end = bottom + HeapRegion::GrainWords;
3795   } else if (r->is_continues_humongous()) {
3796     get_hum_bytes(&used_bytes, &capacity_bytes,
3797                   &prev_live_bytes, &next_live_bytes);
3798     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3799   }
3800 
3801   _total_used_bytes      += used_bytes;
3802   _total_capacity_bytes  += capacity_bytes;
3803   _total_prev_live_bytes += prev_live_bytes;
3804   _total_next_live_bytes += next_live_bytes;
3805   _total_remset_bytes    += remset_bytes;
3806   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3807 
3808   // Print a line for this particular region.
3809   _out->print_cr(G1PPRL_LINE_PREFIX
3810                  G1PPRL_TYPE_FORMAT
3811                  G1PPRL_ADDR_BASE_FORMAT
3812                  G1PPRL_BYTE_FORMAT
3813                  G1PPRL_BYTE_FORMAT
3814                  G1PPRL_BYTE_FORMAT
3815                  G1PPRL_DOUBLE_FORMAT
3816                  G1PPRL_BYTE_FORMAT
3817                  G1PPRL_BYTE_FORMAT,
3818                  type, p2i(bottom), p2i(end),
3819                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3820                  remset_bytes, strong_code_roots_bytes);
3821 
3822   return false;
3823 }
3824 
3825 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3826   // add static memory usages to remembered set sizes
3827   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3828   // Print the footer of the output.
3829   _out->print_cr(G1PPRL_LINE_PREFIX);
3830   _out->print_cr(G1PPRL_LINE_PREFIX
3831                  " SUMMARY"
3832                  G1PPRL_SUM_MB_FORMAT("capacity")
3833                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3834                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3835                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3836                  G1PPRL_SUM_MB_FORMAT("remset")
3837                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3838                  bytes_to_mb(_total_capacity_bytes),
3839                  bytes_to_mb(_total_used_bytes),
3840                  perc(_total_used_bytes, _total_capacity_bytes),
3841                  bytes_to_mb(_total_prev_live_bytes),
3842                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3843                  bytes_to_mb(_total_next_live_bytes),
3844                  perc(_total_next_live_bytes, _total_capacity_bytes),
3845                  bytes_to_mb(_total_remset_bytes),
3846                  bytes_to_mb(_total_strong_code_roots_bytes));
3847   _out->cr();
3848 }