1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 458   return MAX2((n_par_threads + 2) / 4, 1U);
 459 }
 460 
 461 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 462   _g1h(g1h),
 463   _markBitMap1(),
 464   _markBitMap2(),
 465   _parallel_marking_threads(0),
 466   _max_parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 473   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 474             CardTableModRefBS::card_shift,
 475             false /* in_resource_area*/),
 476 
 477   _prevMarkBitMap(&_markBitMap1),
 478   _nextMarkBitMap(&_markBitMap2),
 479 
 480   _markStack(this),
 481   // _finger set in set_non_marking_state
 482 
 483   _max_worker_id(ParallelGCThreads),
 484   // _active_tasks set in set_non_marking_state
 485   // _tasks set inside the constructor
 486   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 487   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 488 
 489   _has_overflown(false),
 490   _concurrent(false),
 491   _has_aborted(false),
 492   _restart_for_overflow(false),
 493   _concurrent_marking_in_progress(false),
 494 
 495   // _verbose_level set below
 496 
 497   _init_times(),
 498   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 499   _cleanup_times(),
 500   _total_counting_time(0.0),
 501   _total_rs_scrub_time(0.0),
 502 
 503   _parallel_workers(NULL),
 504 
 505   _count_card_bitmaps(NULL),
 506   _count_marked_bytes(NULL),
 507   _completed_initialization(false) {
 508 
 509   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 510   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 511 
 512   // Create & start a ConcurrentMark thread.
 513   _cmThread = new ConcurrentMarkThread(this);
 514   assert(cmThread() != NULL, "CM Thread should have been created");
 515   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 516   if (_cmThread->osthread() == NULL) {
 517       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 518   }
 519 
 520   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 521   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 522   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 523 
 524   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 525   satb_qs.set_buffer_size(G1SATBBufferSize);
 526 
 527   _root_regions.init(_g1h, this);
 528 
 529   if (ConcGCThreads > ParallelGCThreads) {
 530     warning("Can't have more ConcGCThreads (%u) "
 531             "than ParallelGCThreads (%u).",
 532             ConcGCThreads, ParallelGCThreads);
 533     return;
 534   }
 535   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 536     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 537     // if both are set
 538     _sleep_factor             = 0.0;
 539     _marking_task_overhead    = 1.0;
 540   } else if (G1MarkingOverheadPercent > 0) {
 541     // We will calculate the number of parallel marking threads based
 542     // on a target overhead with respect to the soft real-time goal
 543     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 544     double overall_cm_overhead =
 545       (double) MaxGCPauseMillis * marking_overhead /
 546       (double) GCPauseIntervalMillis;
 547     double cpu_ratio = 1.0 / (double) os::processor_count();
 548     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 549     double marking_task_overhead =
 550       overall_cm_overhead / marking_thread_num *
 551                                               (double) os::processor_count();
 552     double sleep_factor =
 553                        (1.0 - marking_task_overhead) / marking_task_overhead;
 554 
 555     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 556     _sleep_factor             = sleep_factor;
 557     _marking_task_overhead    = marking_task_overhead;
 558   } else {
 559     // Calculate the number of parallel marking threads by scaling
 560     // the number of parallel GC threads.
 561     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 562     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 563     _sleep_factor             = 0.0;
 564     _marking_task_overhead    = 1.0;
 565   }
 566 
 567   assert(ConcGCThreads > 0, "Should have been set");
 568   _parallel_marking_threads = ConcGCThreads;
 569   _max_parallel_marking_threads = _parallel_marking_threads;
 570 
 571   if (parallel_marking_threads() > 1) {
 572     _cleanup_task_overhead = 1.0;
 573   } else {
 574     _cleanup_task_overhead = marking_task_overhead();
 575   }
 576   _cleanup_sleep_factor =
 577                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 578 
 579 #if 0
 580   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 581   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 582   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 583   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 584   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 585 #endif
 586 
 587   _parallel_workers = new WorkGang("G1 Marker",
 588        _max_parallel_marking_threads, false, true);
 589   if (_parallel_workers == NULL) {
 590     vm_exit_during_initialization("Failed necessary allocation.");
 591   } else {
 592     _parallel_workers->initialize_workers();
 593   }
 594 
 595   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 596     size_t mark_stack_size =
 597       MIN2(MarkStackSizeMax,
 598           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 599     // Verify that the calculated value for MarkStackSize is in range.
 600     // It would be nice to use the private utility routine from Arguments.
 601     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 602       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 603               "must be between 1 and " SIZE_FORMAT,
 604               mark_stack_size, MarkStackSizeMax);
 605       return;
 606     }
 607     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 608   } else {
 609     // Verify MarkStackSize is in range.
 610     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 611       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 612         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 613           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 614                   "must be between 1 and " SIZE_FORMAT,
 615                   MarkStackSize, MarkStackSizeMax);
 616           return;
 617         }
 618       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 619         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 620           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 621                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 622                   MarkStackSize, MarkStackSizeMax);
 623           return;
 624         }
 625       }
 626     }
 627   }
 628 
 629   if (!_markStack.allocate(MarkStackSize)) {
 630     warning("Failed to allocate CM marking stack");
 631     return;
 632   }
 633 
 634   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 635   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 636 
 637   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 638   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 639 
 640   BitMap::idx_t card_bm_size = _card_bm.size();
 641 
 642   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 643   _active_tasks = _max_worker_id;
 644 
 645   uint max_regions = _g1h->max_regions();
 646   for (uint i = 0; i < _max_worker_id; ++i) {
 647     CMTaskQueue* task_queue = new CMTaskQueue();
 648     task_queue->initialize();
 649     _task_queues->register_queue(i, task_queue);
 650 
 651     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 652     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 653 
 654     _tasks[i] = new CMTask(i, this,
 655                            _count_marked_bytes[i],
 656                            &_count_card_bitmaps[i],
 657                            task_queue, _task_queues);
 658 
 659     _accum_task_vtime[i] = 0.0;
 660   }
 661 
 662   // Calculate the card number for the bottom of the heap. Used
 663   // in biasing indexes into the accounting card bitmaps.
 664   _heap_bottom_card_num =
 665     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 666                                 CardTableModRefBS::card_shift);
 667 
 668   // Clear all the liveness counting data
 669   clear_all_count_data();
 670 
 671   // so that the call below can read a sensible value
 672   _heap_start = g1h->reserved_region().start();
 673   set_non_marking_state();
 674   _completed_initialization = true;
 675 }
 676 
 677 void ConcurrentMark::reset() {
 678   // Starting values for these two. This should be called in a STW
 679   // phase.
 680   MemRegion reserved = _g1h->g1_reserved();
 681   _heap_start = reserved.start();
 682   _heap_end   = reserved.end();
 683 
 684   // Separated the asserts so that we know which one fires.
 685   assert(_heap_start != NULL, "heap bounds should look ok");
 686   assert(_heap_end != NULL, "heap bounds should look ok");
 687   assert(_heap_start < _heap_end, "heap bounds should look ok");
 688 
 689   // Reset all the marking data structures and any necessary flags
 690   reset_marking_state();
 691 
 692   // We do reset all of them, since different phases will use
 693   // different number of active threads. So, it's easiest to have all
 694   // of them ready.
 695   for (uint i = 0; i < _max_worker_id; ++i) {
 696     _tasks[i]->reset(_nextMarkBitMap);
 697   }
 698 
 699   // we need this to make sure that the flag is on during the evac
 700   // pause with initial mark piggy-backed
 701   set_concurrent_marking_in_progress();
 702 }
 703 
 704 
 705 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 706   _markStack.set_should_expand();
 707   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 708   if (clear_overflow) {
 709     clear_has_overflown();
 710   } else {
 711     assert(has_overflown(), "pre-condition");
 712   }
 713   _finger = _heap_start;
 714 
 715   for (uint i = 0; i < _max_worker_id; ++i) {
 716     CMTaskQueue* queue = _task_queues->queue(i);
 717     queue->set_empty();
 718   }
 719 }
 720 
 721 void ConcurrentMark::set_concurrency(uint active_tasks) {
 722   assert(active_tasks <= _max_worker_id, "we should not have more");
 723 
 724   _active_tasks = active_tasks;
 725   // Need to update the three data structures below according to the
 726   // number of active threads for this phase.
 727   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 728   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 729   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 730 }
 731 
 732 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 733   set_concurrency(active_tasks);
 734 
 735   _concurrent = concurrent;
 736   // We propagate this to all tasks, not just the active ones.
 737   for (uint i = 0; i < _max_worker_id; ++i)
 738     _tasks[i]->set_concurrent(concurrent);
 739 
 740   if (concurrent) {
 741     set_concurrent_marking_in_progress();
 742   } else {
 743     // We currently assume that the concurrent flag has been set to
 744     // false before we start remark. At this point we should also be
 745     // in a STW phase.
 746     assert(!concurrent_marking_in_progress(), "invariant");
 747     assert(out_of_regions(),
 748            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 749            p2i(_finger), p2i(_heap_end));
 750   }
 751 }
 752 
 753 void ConcurrentMark::set_non_marking_state() {
 754   // We set the global marking state to some default values when we're
 755   // not doing marking.
 756   reset_marking_state();
 757   _active_tasks = 0;
 758   clear_concurrent_marking_in_progress();
 759 }
 760 
 761 ConcurrentMark::~ConcurrentMark() {
 762   // The ConcurrentMark instance is never freed.
 763   ShouldNotReachHere();
 764 }
 765 
 766 void ConcurrentMark::clearNextBitmap() {
 767   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 768 
 769   // Make sure that the concurrent mark thread looks to still be in
 770   // the current cycle.
 771   guarantee(cmThread()->during_cycle(), "invariant");
 772 
 773   // We are finishing up the current cycle by clearing the next
 774   // marking bitmap and getting it ready for the next cycle. During
 775   // this time no other cycle can start. So, let's make sure that this
 776   // is the case.
 777   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 778 
 779   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 780   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 781   _parallel_workers->run_task(&task);
 782 
 783   // Clear the liveness counting data. If the marking has been aborted, the abort()
 784   // call already did that.
 785   if (cl.complete()) {
 786     clear_all_count_data();
 787   }
 788 
 789   // Repeat the asserts from above.
 790   guarantee(cmThread()->during_cycle(), "invariant");
 791   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 792 }
 793 
 794 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 795   CMBitMap* _bitmap;
 796   bool _error;
 797  public:
 798   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 799   }
 800 
 801   virtual bool doHeapRegion(HeapRegion* r) {
 802     // This closure can be called concurrently to the mutator, so we must make sure
 803     // that the result of the getNextMarkedWordAddress() call is compared to the
 804     // value passed to it as limit to detect any found bits.
 805     // We can use the region's orig_end() for the limit and the comparison value
 806     // as it always contains the "real" end of the region that never changes and
 807     // has no side effects.
 808     // Due to the latter, there can also be no problem with the compiler generating
 809     // reloads of the orig_end() call.
 810     HeapWord* end = r->orig_end();
 811     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 812   }
 813 };
 814 
 815 bool ConcurrentMark::nextMarkBitmapIsClear() {
 816   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 817   _g1h->heap_region_iterate(&cl);
 818   return cl.complete();
 819 }
 820 
 821 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 822 public:
 823   bool doHeapRegion(HeapRegion* r) {
 824     if (!r->is_continues_humongous()) {
 825       r->note_start_of_marking();
 826     }
 827     return false;
 828   }
 829 };
 830 
 831 void ConcurrentMark::checkpointRootsInitialPre() {
 832   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 833   G1CollectorPolicy* g1p = g1h->g1_policy();
 834 
 835   _has_aborted = false;
 836 
 837   // Initialize marking structures. This has to be done in a STW phase.
 838   reset();
 839 
 840   // For each region note start of marking.
 841   NoteStartOfMarkHRClosure startcl;
 842   g1h->heap_region_iterate(&startcl);
 843 }
 844 
 845 
 846 void ConcurrentMark::checkpointRootsInitialPost() {
 847   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 848 
 849   // If we force an overflow during remark, the remark operation will
 850   // actually abort and we'll restart concurrent marking. If we always
 851   // force an overflow during remark we'll never actually complete the
 852   // marking phase. So, we initialize this here, at the start of the
 853   // cycle, so that at the remaining overflow number will decrease at
 854   // every remark and we'll eventually not need to cause one.
 855   force_overflow_stw()->init();
 856 
 857   // Start Concurrent Marking weak-reference discovery.
 858   ReferenceProcessor* rp = g1h->ref_processor_cm();
 859   // enable ("weak") refs discovery
 860   rp->enable_discovery();
 861   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 862 
 863   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 864   // This is the start of  the marking cycle, we're expected all
 865   // threads to have SATB queues with active set to false.
 866   satb_mq_set.set_active_all_threads(true, /* new active value */
 867                                      false /* expected_active */);
 868 
 869   _root_regions.prepare_for_scan();
 870 
 871   // update_g1_committed() will be called at the end of an evac pause
 872   // when marking is on. So, it's also called at the end of the
 873   // initial-mark pause to update the heap end, if the heap expands
 874   // during it. No need to call it here.
 875 }
 876 
 877 /*
 878  * Notice that in the next two methods, we actually leave the STS
 879  * during the barrier sync and join it immediately afterwards. If we
 880  * do not do this, the following deadlock can occur: one thread could
 881  * be in the barrier sync code, waiting for the other thread to also
 882  * sync up, whereas another one could be trying to yield, while also
 883  * waiting for the other threads to sync up too.
 884  *
 885  * Note, however, that this code is also used during remark and in
 886  * this case we should not attempt to leave / enter the STS, otherwise
 887  * we'll either hit an assert (debug / fastdebug) or deadlock
 888  * (product). So we should only leave / enter the STS if we are
 889  * operating concurrently.
 890  *
 891  * Because the thread that does the sync barrier has left the STS, it
 892  * is possible to be suspended for a Full GC or an evacuation pause
 893  * could occur. This is actually safe, since the entering the sync
 894  * barrier is one of the last things do_marking_step() does, and it
 895  * doesn't manipulate any data structures afterwards.
 896  */
 897 
 898 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 899   bool barrier_aborted;
 900   {
 901     SuspendibleThreadSetLeaver sts_leave(concurrent());
 902     barrier_aborted = !_first_overflow_barrier_sync.enter();
 903   }
 904 
 905   // at this point everyone should have synced up and not be doing any
 906   // more work
 907 
 908   if (barrier_aborted) {
 909     // If the barrier aborted we ignore the overflow condition and
 910     // just abort the whole marking phase as quickly as possible.
 911     return;
 912   }
 913 
 914   // If we're executing the concurrent phase of marking, reset the marking
 915   // state; otherwise the marking state is reset after reference processing,
 916   // during the remark pause.
 917   // If we reset here as a result of an overflow during the remark we will
 918   // see assertion failures from any subsequent set_concurrency_and_phase()
 919   // calls.
 920   if (concurrent()) {
 921     // let the task associated with with worker 0 do this
 922     if (worker_id == 0) {
 923       // task 0 is responsible for clearing the global data structures
 924       // We should be here because of an overflow. During STW we should
 925       // not clear the overflow flag since we rely on it being true when
 926       // we exit this method to abort the pause and restart concurrent
 927       // marking.
 928       reset_marking_state(true /* clear_overflow */);
 929       force_overflow()->update();
 930 
 931       if (G1Log::fine()) {
 932         gclog_or_tty->gclog_stamp();
 933         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 934       }
 935     }
 936   }
 937 
 938   // after this, each task should reset its own data structures then
 939   // then go into the second barrier
 940 }
 941 
 942 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 943   SuspendibleThreadSetLeaver sts_leave(concurrent());
 944   _second_overflow_barrier_sync.enter();
 945 
 946   // at this point everything should be re-initialized and ready to go
 947 }
 948 
 949 #ifndef PRODUCT
 950 void ForceOverflowSettings::init() {
 951   _num_remaining = G1ConcMarkForceOverflow;
 952   _force = false;
 953   update();
 954 }
 955 
 956 void ForceOverflowSettings::update() {
 957   if (_num_remaining > 0) {
 958     _num_remaining -= 1;
 959     _force = true;
 960   } else {
 961     _force = false;
 962   }
 963 }
 964 
 965 bool ForceOverflowSettings::should_force() {
 966   if (_force) {
 967     _force = false;
 968     return true;
 969   } else {
 970     return false;
 971   }
 972 }
 973 #endif // !PRODUCT
 974 
 975 class CMConcurrentMarkingTask: public AbstractGangTask {
 976 private:
 977   ConcurrentMark*       _cm;
 978   ConcurrentMarkThread* _cmt;
 979 
 980 public:
 981   void work(uint worker_id) {
 982     assert(Thread::current()->is_ConcurrentGC_thread(),
 983            "this should only be done by a conc GC thread");
 984     ResourceMark rm;
 985 
 986     double start_vtime = os::elapsedVTime();
 987 
 988     {
 989       SuspendibleThreadSetJoiner sts_join;
 990 
 991       assert(worker_id < _cm->active_tasks(), "invariant");
 992       CMTask* the_task = _cm->task(worker_id);
 993       the_task->record_start_time();
 994       if (!_cm->has_aborted()) {
 995         do {
 996           double start_vtime_sec = os::elapsedVTime();
 997           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 998 
 999           the_task->do_marking_step(mark_step_duration_ms,
1000                                     true  /* do_termination */,
1001                                     false /* is_serial*/);
1002 
1003           double end_vtime_sec = os::elapsedVTime();
1004           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1005           _cm->clear_has_overflown();
1006 
1007           _cm->do_yield_check(worker_id);
1008 
1009           jlong sleep_time_ms;
1010           if (!_cm->has_aborted() && the_task->has_aborted()) {
1011             sleep_time_ms =
1012               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1013             {
1014               SuspendibleThreadSetLeaver sts_leave;
1015               os::sleep(Thread::current(), sleep_time_ms, false);
1016             }
1017           }
1018         } while (!_cm->has_aborted() && the_task->has_aborted());
1019       }
1020       the_task->record_end_time();
1021       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1022     }
1023 
1024     double end_vtime = os::elapsedVTime();
1025     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1026   }
1027 
1028   CMConcurrentMarkingTask(ConcurrentMark* cm,
1029                           ConcurrentMarkThread* cmt) :
1030       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1031 
1032   ~CMConcurrentMarkingTask() { }
1033 };
1034 
1035 // Calculates the number of active workers for a concurrent
1036 // phase.
1037 uint ConcurrentMark::calc_parallel_marking_threads() {
1038   uint n_conc_workers = 0;
1039   if (!UseDynamicNumberOfGCThreads ||
1040       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1041        !ForceDynamicNumberOfGCThreads)) {
1042     n_conc_workers = max_parallel_marking_threads();
1043   } else {
1044     n_conc_workers =
1045       AdaptiveSizePolicy::calc_default_active_workers(
1046                                    max_parallel_marking_threads(),
1047                                    1, /* Minimum workers */
1048                                    parallel_marking_threads(),
1049                                    Threads::number_of_non_daemon_threads());
1050     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1051     // that scaling has already gone into "_max_parallel_marking_threads".
1052   }
1053   assert(n_conc_workers > 0, "Always need at least 1");
1054   return n_conc_workers;
1055 }
1056 
1057 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1058   // Currently, only survivors can be root regions.
1059   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1060   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1061 
1062   const uintx interval = PrefetchScanIntervalInBytes;
1063   HeapWord* curr = hr->bottom();
1064   const HeapWord* end = hr->top();
1065   while (curr < end) {
1066     Prefetch::read(curr, interval);
1067     oop obj = oop(curr);
1068     int size = obj->oop_iterate_size(&cl);
1069     assert(size == obj->size(), "sanity");
1070     curr += size;
1071   }
1072 }
1073 
1074 class CMRootRegionScanTask : public AbstractGangTask {
1075 private:
1076   ConcurrentMark* _cm;
1077 
1078 public:
1079   CMRootRegionScanTask(ConcurrentMark* cm) :
1080     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1081 
1082   void work(uint worker_id) {
1083     assert(Thread::current()->is_ConcurrentGC_thread(),
1084            "this should only be done by a conc GC thread");
1085 
1086     CMRootRegions* root_regions = _cm->root_regions();
1087     HeapRegion* hr = root_regions->claim_next();
1088     while (hr != NULL) {
1089       _cm->scanRootRegion(hr, worker_id);
1090       hr = root_regions->claim_next();
1091     }
1092   }
1093 };
1094 
1095 void ConcurrentMark::scanRootRegions() {
1096   double scan_start = os::elapsedTime();
1097 
1098   // Start of concurrent marking.
1099   ClassLoaderDataGraph::clear_claimed_marks();
1100 
1101   // scan_in_progress() will have been set to true only if there was
1102   // at least one root region to scan. So, if it's false, we
1103   // should not attempt to do any further work.
1104   if (root_regions()->scan_in_progress()) {
1105     if (G1Log::fine()) {
1106       gclog_or_tty->gclog_stamp();
1107       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1108     }
1109 
1110     _parallel_marking_threads = calc_parallel_marking_threads();
1111     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1112            "Maximum number of marking threads exceeded");
1113     uint active_workers = MAX2(1U, parallel_marking_threads());
1114 
1115     CMRootRegionScanTask task(this);
1116     _parallel_workers->set_active_workers(active_workers);
1117     _parallel_workers->run_task(&task);
1118 
1119     if (G1Log::fine()) {
1120       gclog_or_tty->gclog_stamp();
1121       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1122     }
1123 
1124     // It's possible that has_aborted() is true here without actually
1125     // aborting the survivor scan earlier. This is OK as it's
1126     // mainly used for sanity checking.
1127     root_regions()->scan_finished();
1128   }
1129 }
1130 
1131 void ConcurrentMark::markFromRoots() {
1132   // we might be tempted to assert that:
1133   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1134   //        "inconsistent argument?");
1135   // However that wouldn't be right, because it's possible that
1136   // a safepoint is indeed in progress as a younger generation
1137   // stop-the-world GC happens even as we mark in this generation.
1138 
1139   _restart_for_overflow = false;
1140   force_overflow_conc()->init();
1141 
1142   // _g1h has _n_par_threads
1143   _parallel_marking_threads = calc_parallel_marking_threads();
1144   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1145     "Maximum number of marking threads exceeded");
1146 
1147   uint active_workers = MAX2(1U, parallel_marking_threads());
1148   assert(active_workers > 0, "Should have been set");
1149 
1150   // Parallel task terminator is set in "set_concurrency_and_phase()"
1151   set_concurrency_and_phase(active_workers, true /* concurrent */);
1152 
1153   CMConcurrentMarkingTask markingTask(this, cmThread());
1154   _parallel_workers->set_active_workers(active_workers);
1155   _parallel_workers->run_task(&markingTask);
1156   print_stats();
1157 }
1158 
1159 // Helper class to get rid of some boilerplate code.
1160 class G1CMTraceTime : public StackObj {
1161   GCTraceTimeImpl _gc_trace_time;
1162   static bool doit_and_prepend(bool doit) {
1163     if (doit) {
1164       gclog_or_tty->put(' ');
1165     }
1166     return doit;
1167   }
1168 
1169  public:
1170   G1CMTraceTime(const char* title, bool doit)
1171     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1172   }
1173 };
1174 
1175 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1176   // world is stopped at this checkpoint
1177   assert(SafepointSynchronize::is_at_safepoint(),
1178          "world should be stopped");
1179 
1180   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1181 
1182   // If a full collection has happened, we shouldn't do this.
1183   if (has_aborted()) {
1184     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1185     return;
1186   }
1187 
1188   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1189 
1190   if (VerifyDuringGC) {
1191     HandleMark hm;  // handle scope
1192     g1h->prepare_for_verify();
1193     Universe::verify(VerifyOption_G1UsePrevMarking,
1194                      " VerifyDuringGC:(before)");
1195   }
1196   g1h->check_bitmaps("Remark Start");
1197 
1198   G1CollectorPolicy* g1p = g1h->g1_policy();
1199   g1p->record_concurrent_mark_remark_start();
1200 
1201   double start = os::elapsedTime();
1202 
1203   checkpointRootsFinalWork();
1204 
1205   double mark_work_end = os::elapsedTime();
1206 
1207   weakRefsWork(clear_all_soft_refs);
1208 
1209   if (has_overflown()) {
1210     // Oops.  We overflowed.  Restart concurrent marking.
1211     _restart_for_overflow = true;
1212     if (G1TraceMarkStackOverflow) {
1213       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1214     }
1215 
1216     // Verify the heap w.r.t. the previous marking bitmap.
1217     if (VerifyDuringGC) {
1218       HandleMark hm;  // handle scope
1219       g1h->prepare_for_verify();
1220       Universe::verify(VerifyOption_G1UsePrevMarking,
1221                        " VerifyDuringGC:(overflow)");
1222     }
1223 
1224     // Clear the marking state because we will be restarting
1225     // marking due to overflowing the global mark stack.
1226     reset_marking_state();
1227   } else {
1228     {
1229       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1230 
1231       // Aggregate the per-task counting data that we have accumulated
1232       // while marking.
1233       aggregate_count_data();
1234     }
1235 
1236     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1237     // We're done with marking.
1238     // This is the end of  the marking cycle, we're expected all
1239     // threads to have SATB queues with active set to true.
1240     satb_mq_set.set_active_all_threads(false, /* new active value */
1241                                        true /* expected_active */);
1242 
1243     if (VerifyDuringGC) {
1244       HandleMark hm;  // handle scope
1245       g1h->prepare_for_verify();
1246       Universe::verify(VerifyOption_G1UseNextMarking,
1247                        " VerifyDuringGC:(after)");
1248     }
1249     g1h->check_bitmaps("Remark End");
1250     assert(!restart_for_overflow(), "sanity");
1251     // Completely reset the marking state since marking completed
1252     set_non_marking_state();
1253   }
1254 
1255   // Expand the marking stack, if we have to and if we can.
1256   if (_markStack.should_expand()) {
1257     _markStack.expand();
1258   }
1259 
1260   // Statistics
1261   double now = os::elapsedTime();
1262   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1263   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1264   _remark_times.add((now - start) * 1000.0);
1265 
1266   g1p->record_concurrent_mark_remark_end();
1267 
1268   G1CMIsAliveClosure is_alive(g1h);
1269   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1270 }
1271 
1272 // Base class of the closures that finalize and verify the
1273 // liveness counting data.
1274 class CMCountDataClosureBase: public HeapRegionClosure {
1275 protected:
1276   G1CollectedHeap* _g1h;
1277   ConcurrentMark* _cm;
1278   CardTableModRefBS* _ct_bs;
1279 
1280   BitMap* _region_bm;
1281   BitMap* _card_bm;
1282 
1283   // Takes a region that's not empty (i.e., it has at least one
1284   // live object in it and sets its corresponding bit on the region
1285   // bitmap to 1. If the region is "starts humongous" it will also set
1286   // to 1 the bits on the region bitmap that correspond to its
1287   // associated "continues humongous" regions.
1288   void set_bit_for_region(HeapRegion* hr) {
1289     assert(!hr->is_continues_humongous(), "should have filtered those out");
1290 
1291     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1292     if (!hr->is_starts_humongous()) {
1293       // Normal (non-humongous) case: just set the bit.
1294       _region_bm->par_at_put(index, true);
1295     } else {
1296       // Starts humongous case: calculate how many regions are part of
1297       // this humongous region and then set the bit range.
1298       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1299       _region_bm->par_at_put_range(index, end_index, true);
1300     }
1301   }
1302 
1303 public:
1304   CMCountDataClosureBase(G1CollectedHeap* g1h,
1305                          BitMap* region_bm, BitMap* card_bm):
1306     _g1h(g1h), _cm(g1h->concurrent_mark()),
1307     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1308     _region_bm(region_bm), _card_bm(card_bm) { }
1309 };
1310 
1311 // Closure that calculates the # live objects per region. Used
1312 // for verification purposes during the cleanup pause.
1313 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1314   CMBitMapRO* _bm;
1315   size_t _region_marked_bytes;
1316 
1317 public:
1318   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1319                          BitMap* region_bm, BitMap* card_bm) :
1320     CMCountDataClosureBase(g1h, region_bm, card_bm),
1321     _bm(bm), _region_marked_bytes(0) { }
1322 
1323   bool doHeapRegion(HeapRegion* hr) {
1324 
1325     if (hr->is_continues_humongous()) {
1326       // We will ignore these here and process them when their
1327       // associated "starts humongous" region is processed (see
1328       // set_bit_for_heap_region()). Note that we cannot rely on their
1329       // associated "starts humongous" region to have their bit set to
1330       // 1 since, due to the region chunking in the parallel region
1331       // iteration, a "continues humongous" region might be visited
1332       // before its associated "starts humongous".
1333       return false;
1334     }
1335 
1336     HeapWord* ntams = hr->next_top_at_mark_start();
1337     HeapWord* start = hr->bottom();
1338 
1339     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1340            "Preconditions not met - "
1341            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1342            p2i(start), p2i(ntams), p2i(hr->end()));
1343 
1344     // Find the first marked object at or after "start".
1345     start = _bm->getNextMarkedWordAddress(start, ntams);
1346 
1347     size_t marked_bytes = 0;
1348 
1349     while (start < ntams) {
1350       oop obj = oop(start);
1351       int obj_sz = obj->size();
1352       HeapWord* obj_end = start + obj_sz;
1353 
1354       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1355       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1356 
1357       // Note: if we're looking at the last region in heap - obj_end
1358       // could be actually just beyond the end of the heap; end_idx
1359       // will then correspond to a (non-existent) card that is also
1360       // just beyond the heap.
1361       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1362         // end of object is not card aligned - increment to cover
1363         // all the cards spanned by the object
1364         end_idx += 1;
1365       }
1366 
1367       // Set the bits in the card BM for the cards spanned by this object.
1368       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1369 
1370       // Add the size of this object to the number of marked bytes.
1371       marked_bytes += (size_t)obj_sz * HeapWordSize;
1372 
1373       // Find the next marked object after this one.
1374       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1375     }
1376 
1377     // Mark the allocated-since-marking portion...
1378     HeapWord* top = hr->top();
1379     if (ntams < top) {
1380       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1381       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1382 
1383       // Note: if we're looking at the last region in heap - top
1384       // could be actually just beyond the end of the heap; end_idx
1385       // will then correspond to a (non-existent) card that is also
1386       // just beyond the heap.
1387       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1388         // end of object is not card aligned - increment to cover
1389         // all the cards spanned by the object
1390         end_idx += 1;
1391       }
1392       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1393 
1394       // This definitely means the region has live objects.
1395       set_bit_for_region(hr);
1396     }
1397 
1398     // Update the live region bitmap.
1399     if (marked_bytes > 0) {
1400       set_bit_for_region(hr);
1401     }
1402 
1403     // Set the marked bytes for the current region so that
1404     // it can be queried by a calling verification routine
1405     _region_marked_bytes = marked_bytes;
1406 
1407     return false;
1408   }
1409 
1410   size_t region_marked_bytes() const { return _region_marked_bytes; }
1411 };
1412 
1413 // Heap region closure used for verifying the counting data
1414 // that was accumulated concurrently and aggregated during
1415 // the remark pause. This closure is applied to the heap
1416 // regions during the STW cleanup pause.
1417 
1418 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1419   G1CollectedHeap* _g1h;
1420   ConcurrentMark* _cm;
1421   CalcLiveObjectsClosure _calc_cl;
1422   BitMap* _region_bm;   // Region BM to be verified
1423   BitMap* _card_bm;     // Card BM to be verified
1424 
1425   BitMap* _exp_region_bm; // Expected Region BM values
1426   BitMap* _exp_card_bm;   // Expected card BM values
1427 
1428   int _failures;
1429 
1430 public:
1431   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1432                                 BitMap* region_bm,
1433                                 BitMap* card_bm,
1434                                 BitMap* exp_region_bm,
1435                                 BitMap* exp_card_bm) :
1436     _g1h(g1h), _cm(g1h->concurrent_mark()),
1437     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1438     _region_bm(region_bm), _card_bm(card_bm),
1439     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1440     _failures(0) { }
1441 
1442   int failures() const { return _failures; }
1443 
1444   bool doHeapRegion(HeapRegion* hr) {
1445     if (hr->is_continues_humongous()) {
1446       // We will ignore these here and process them when their
1447       // associated "starts humongous" region is processed (see
1448       // set_bit_for_heap_region()). Note that we cannot rely on their
1449       // associated "starts humongous" region to have their bit set to
1450       // 1 since, due to the region chunking in the parallel region
1451       // iteration, a "continues humongous" region might be visited
1452       // before its associated "starts humongous".
1453       return false;
1454     }
1455 
1456     int failures = 0;
1457 
1458     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1459     // this region and set the corresponding bits in the expected region
1460     // and card bitmaps.
1461     bool res = _calc_cl.doHeapRegion(hr);
1462     assert(res == false, "should be continuing");
1463 
1464     // Verify the marked bytes for this region.
1465     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1466     size_t act_marked_bytes = hr->next_marked_bytes();
1467 
1468     // We're not OK if expected marked bytes > actual marked bytes. It means
1469     // we have missed accounting some objects during the actual marking.
1470     if (exp_marked_bytes > act_marked_bytes) {
1471       failures += 1;
1472     }
1473 
1474     // Verify the bit, for this region, in the actual and expected
1475     // (which was just calculated) region bit maps.
1476     // We're not OK if the bit in the calculated expected region
1477     // bitmap is set and the bit in the actual region bitmap is not.
1478     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1479 
1480     bool expected = _exp_region_bm->at(index);
1481     bool actual = _region_bm->at(index);
1482     if (expected && !actual) {
1483       failures += 1;
1484     }
1485 
1486     // Verify that the card bit maps for the cards spanned by the current
1487     // region match. We have an error if we have a set bit in the expected
1488     // bit map and the corresponding bit in the actual bitmap is not set.
1489 
1490     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1491     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1492 
1493     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1494       expected = _exp_card_bm->at(i);
1495       actual = _card_bm->at(i);
1496 
1497       if (expected && !actual) {
1498         failures += 1;
1499       }
1500     }
1501 
1502     _failures += failures;
1503 
1504     // We could stop iteration over the heap when we
1505     // find the first violating region by returning true.
1506     return false;
1507   }
1508 };
1509 
1510 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1511 protected:
1512   G1CollectedHeap* _g1h;
1513   ConcurrentMark* _cm;
1514   BitMap* _actual_region_bm;
1515   BitMap* _actual_card_bm;
1516 
1517   uint    _n_workers;
1518 
1519   BitMap* _expected_region_bm;
1520   BitMap* _expected_card_bm;
1521 
1522   int  _failures;
1523 
1524   HeapRegionClaimer _hrclaimer;
1525 
1526 public:
1527   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1528                             BitMap* region_bm, BitMap* card_bm,
1529                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1530     : AbstractGangTask("G1 verify final counting"),
1531       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1532       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1533       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1534       _failures(0),
1535       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1536     assert(VerifyDuringGC, "don't call this otherwise");
1537     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1538     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1539   }
1540 
1541   void work(uint worker_id) {
1542     assert(worker_id < _n_workers, "invariant");
1543 
1544     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1545                                             _actual_region_bm, _actual_card_bm,
1546                                             _expected_region_bm,
1547                                             _expected_card_bm);
1548 
1549     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1550 
1551     Atomic::add(verify_cl.failures(), &_failures);
1552   }
1553 
1554   int failures() const { return _failures; }
1555 };
1556 
1557 // Closure that finalizes the liveness counting data.
1558 // Used during the cleanup pause.
1559 // Sets the bits corresponding to the interval [NTAMS, top]
1560 // (which contains the implicitly live objects) in the
1561 // card liveness bitmap. Also sets the bit for each region,
1562 // containing live data, in the region liveness bitmap.
1563 
1564 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1565  public:
1566   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1567                               BitMap* region_bm,
1568                               BitMap* card_bm) :
1569     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1570 
1571   bool doHeapRegion(HeapRegion* hr) {
1572 
1573     if (hr->is_continues_humongous()) {
1574       // We will ignore these here and process them when their
1575       // associated "starts humongous" region is processed (see
1576       // set_bit_for_heap_region()). Note that we cannot rely on their
1577       // associated "starts humongous" region to have their bit set to
1578       // 1 since, due to the region chunking in the parallel region
1579       // iteration, a "continues humongous" region might be visited
1580       // before its associated "starts humongous".
1581       return false;
1582     }
1583 
1584     HeapWord* ntams = hr->next_top_at_mark_start();
1585     HeapWord* top   = hr->top();
1586 
1587     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1588 
1589     // Mark the allocated-since-marking portion...
1590     if (ntams < top) {
1591       // This definitely means the region has live objects.
1592       set_bit_for_region(hr);
1593 
1594       // Now set the bits in the card bitmap for [ntams, top)
1595       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1596       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1597 
1598       // Note: if we're looking at the last region in heap - top
1599       // could be actually just beyond the end of the heap; end_idx
1600       // will then correspond to a (non-existent) card that is also
1601       // just beyond the heap.
1602       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1603         // end of object is not card aligned - increment to cover
1604         // all the cards spanned by the object
1605         end_idx += 1;
1606       }
1607 
1608       assert(end_idx <= _card_bm->size(),
1609              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1610              end_idx, _card_bm->size());
1611       assert(start_idx < _card_bm->size(),
1612              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1613              start_idx, _card_bm->size());
1614 
1615       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1616     }
1617 
1618     // Set the bit for the region if it contains live data
1619     if (hr->next_marked_bytes() > 0) {
1620       set_bit_for_region(hr);
1621     }
1622 
1623     return false;
1624   }
1625 };
1626 
1627 class G1ParFinalCountTask: public AbstractGangTask {
1628 protected:
1629   G1CollectedHeap* _g1h;
1630   ConcurrentMark* _cm;
1631   BitMap* _actual_region_bm;
1632   BitMap* _actual_card_bm;
1633 
1634   uint    _n_workers;
1635   HeapRegionClaimer _hrclaimer;
1636 
1637 public:
1638   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1639     : AbstractGangTask("G1 final counting"),
1640       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1641       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1642       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1643   }
1644 
1645   void work(uint worker_id) {
1646     assert(worker_id < _n_workers, "invariant");
1647 
1648     FinalCountDataUpdateClosure final_update_cl(_g1h,
1649                                                 _actual_region_bm,
1650                                                 _actual_card_bm);
1651 
1652     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1653   }
1654 };
1655 
1656 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1657   G1CollectedHeap* _g1;
1658   size_t _freed_bytes;
1659   FreeRegionList* _local_cleanup_list;
1660   HeapRegionSetCount _old_regions_removed;
1661   HeapRegionSetCount _humongous_regions_removed;
1662   HRRSCleanupTask* _hrrs_cleanup_task;
1663 
1664 public:
1665   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1666                              FreeRegionList* local_cleanup_list,
1667                              HRRSCleanupTask* hrrs_cleanup_task) :
1668     _g1(g1),
1669     _freed_bytes(0),
1670     _local_cleanup_list(local_cleanup_list),
1671     _old_regions_removed(),
1672     _humongous_regions_removed(),
1673     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1674 
1675   size_t freed_bytes() { return _freed_bytes; }
1676   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1677   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1678 
1679   bool doHeapRegion(HeapRegion *hr) {
1680     if (hr->is_continues_humongous() || hr->is_archive()) {
1681       return false;
1682     }
1683     // We use a claim value of zero here because all regions
1684     // were claimed with value 1 in the FinalCount task.
1685     _g1->reset_gc_time_stamps(hr);
1686     hr->note_end_of_marking();
1687 
1688     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1689       _freed_bytes += hr->used();
1690       hr->set_containing_set(NULL);
1691       if (hr->is_humongous()) {
1692         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1693         _humongous_regions_removed.increment(1u, hr->capacity());
1694         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1695       } else {
1696         _old_regions_removed.increment(1u, hr->capacity());
1697         _g1->free_region(hr, _local_cleanup_list, true);
1698       }
1699     } else {
1700       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1701     }
1702 
1703     return false;
1704   }
1705 };
1706 
1707 class G1ParNoteEndTask: public AbstractGangTask {
1708   friend class G1NoteEndOfConcMarkClosure;
1709 
1710 protected:
1711   G1CollectedHeap* _g1h;
1712   FreeRegionList* _cleanup_list;
1713   HeapRegionClaimer _hrclaimer;
1714 
1715 public:
1716   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1717       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1718   }
1719 
1720   void work(uint worker_id) {
1721     FreeRegionList local_cleanup_list("Local Cleanup List");
1722     HRRSCleanupTask hrrs_cleanup_task;
1723     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1724                                            &hrrs_cleanup_task);
1725     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1726     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1727 
1728     // Now update the lists
1729     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1730     {
1731       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1732       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1733 
1734       // If we iterate over the global cleanup list at the end of
1735       // cleanup to do this printing we will not guarantee to only
1736       // generate output for the newly-reclaimed regions (the list
1737       // might not be empty at the beginning of cleanup; we might
1738       // still be working on its previous contents). So we do the
1739       // printing here, before we append the new regions to the global
1740       // cleanup list.
1741 
1742       G1HRPrinter* hr_printer = _g1h->hr_printer();
1743       if (hr_printer->is_active()) {
1744         FreeRegionListIterator iter(&local_cleanup_list);
1745         while (iter.more_available()) {
1746           HeapRegion* hr = iter.get_next();
1747           hr_printer->cleanup(hr);
1748         }
1749       }
1750 
1751       _cleanup_list->add_ordered(&local_cleanup_list);
1752       assert(local_cleanup_list.is_empty(), "post-condition");
1753 
1754       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1755     }
1756   }
1757 };
1758 
1759 class G1ParScrubRemSetTask: public AbstractGangTask {
1760 protected:
1761   G1RemSet* _g1rs;
1762   BitMap* _region_bm;
1763   BitMap* _card_bm;
1764   HeapRegionClaimer _hrclaimer;
1765 
1766 public:
1767   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1768       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1769   }
1770 
1771   void work(uint worker_id) {
1772     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1773   }
1774 
1775 };
1776 
1777 void ConcurrentMark::cleanup() {
1778   // world is stopped at this checkpoint
1779   assert(SafepointSynchronize::is_at_safepoint(),
1780          "world should be stopped");
1781   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1782 
1783   // If a full collection has happened, we shouldn't do this.
1784   if (has_aborted()) {
1785     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1786     return;
1787   }
1788 
1789   g1h->verify_region_sets_optional();
1790 
1791   if (VerifyDuringGC) {
1792     HandleMark hm;  // handle scope
1793     g1h->prepare_for_verify();
1794     Universe::verify(VerifyOption_G1UsePrevMarking,
1795                      " VerifyDuringGC:(before)");
1796   }
1797   g1h->check_bitmaps("Cleanup Start");
1798 
1799   G1CollectorPolicy* g1p = g1h->g1_policy();
1800   g1p->record_concurrent_mark_cleanup_start();
1801 
1802   double start = os::elapsedTime();
1803 
1804   HeapRegionRemSet::reset_for_cleanup_tasks();
1805 
1806   // Do counting once more with the world stopped for good measure.
1807   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1808 
1809   g1h->workers()->run_task(&g1_par_count_task);
1810 
1811   if (VerifyDuringGC) {
1812     // Verify that the counting data accumulated during marking matches
1813     // that calculated by walking the marking bitmap.
1814 
1815     // Bitmaps to hold expected values
1816     BitMap expected_region_bm(_region_bm.size(), true);
1817     BitMap expected_card_bm(_card_bm.size(), true);
1818 
1819     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1820                                                  &_region_bm,
1821                                                  &_card_bm,
1822                                                  &expected_region_bm,
1823                                                  &expected_card_bm);
1824 
1825     g1h->workers()->run_task(&g1_par_verify_task);
1826 
1827     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1828   }
1829 
1830   size_t start_used_bytes = g1h->used();
1831   g1h->collector_state()->set_mark_in_progress(false);
1832 
1833   double count_end = os::elapsedTime();
1834   double this_final_counting_time = (count_end - start);
1835   _total_counting_time += this_final_counting_time;
1836 
1837   if (G1PrintRegionLivenessInfo) {
1838     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1839     _g1h->heap_region_iterate(&cl);
1840   }
1841 
1842   // Install newly created mark bitMap as "prev".
1843   swapMarkBitMaps();
1844 
1845   g1h->reset_gc_time_stamp();
1846 
1847   uint n_workers = _g1h->workers()->active_workers();
1848 
1849   // Note end of marking in all heap regions.
1850   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1851   g1h->workers()->run_task(&g1_par_note_end_task);
1852   g1h->check_gc_time_stamps();
1853 
1854   if (!cleanup_list_is_empty()) {
1855     // The cleanup list is not empty, so we'll have to process it
1856     // concurrently. Notify anyone else that might be wanting free
1857     // regions that there will be more free regions coming soon.
1858     g1h->set_free_regions_coming();
1859   }
1860 
1861   // call below, since it affects the metric by which we sort the heap
1862   // regions.
1863   if (G1ScrubRemSets) {
1864     double rs_scrub_start = os::elapsedTime();
1865     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1866     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1867 
1868     double rs_scrub_end = os::elapsedTime();
1869     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1870     _total_rs_scrub_time += this_rs_scrub_time;
1871   }
1872 
1873   // this will also free any regions totally full of garbage objects,
1874   // and sort the regions.
1875   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1876 
1877   // Statistics.
1878   double end = os::elapsedTime();
1879   _cleanup_times.add((end - start) * 1000.0);
1880 
1881   if (G1Log::fine()) {
1882     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1883   }
1884 
1885   // Clean up will have freed any regions completely full of garbage.
1886   // Update the soft reference policy with the new heap occupancy.
1887   Universe::update_heap_info_at_gc();
1888 
1889   if (VerifyDuringGC) {
1890     HandleMark hm;  // handle scope
1891     g1h->prepare_for_verify();
1892     Universe::verify(VerifyOption_G1UsePrevMarking,
1893                      " VerifyDuringGC:(after)");
1894   }
1895 
1896   g1h->check_bitmaps("Cleanup End");
1897 
1898   g1h->verify_region_sets_optional();
1899 
1900   // We need to make this be a "collection" so any collection pause that
1901   // races with it goes around and waits for completeCleanup to finish.
1902   g1h->increment_total_collections();
1903 
1904   // Clean out dead classes and update Metaspace sizes.
1905   if (ClassUnloadingWithConcurrentMark) {
1906     ClassLoaderDataGraph::purge();
1907   }
1908   MetaspaceGC::compute_new_size();
1909 
1910   // We reclaimed old regions so we should calculate the sizes to make
1911   // sure we update the old gen/space data.
1912   g1h->g1mm()->update_sizes();
1913   g1h->allocation_context_stats().update_after_mark();
1914 
1915   g1h->trace_heap_after_concurrent_cycle();
1916 }
1917 
1918 void ConcurrentMark::completeCleanup() {
1919   if (has_aborted()) return;
1920 
1921   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1922 
1923   _cleanup_list.verify_optional();
1924   FreeRegionList tmp_free_list("Tmp Free List");
1925 
1926   if (G1ConcRegionFreeingVerbose) {
1927     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1928                            "cleanup list has %u entries",
1929                            _cleanup_list.length());
1930   }
1931 
1932   // No one else should be accessing the _cleanup_list at this point,
1933   // so it is not necessary to take any locks
1934   while (!_cleanup_list.is_empty()) {
1935     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1936     assert(hr != NULL, "Got NULL from a non-empty list");
1937     hr->par_clear();
1938     tmp_free_list.add_ordered(hr);
1939 
1940     // Instead of adding one region at a time to the secondary_free_list,
1941     // we accumulate them in the local list and move them a few at a
1942     // time. This also cuts down on the number of notify_all() calls
1943     // we do during this process. We'll also append the local list when
1944     // _cleanup_list is empty (which means we just removed the last
1945     // region from the _cleanup_list).
1946     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1947         _cleanup_list.is_empty()) {
1948       if (G1ConcRegionFreeingVerbose) {
1949         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1950                                "appending %u entries to the secondary_free_list, "
1951                                "cleanup list still has %u entries",
1952                                tmp_free_list.length(),
1953                                _cleanup_list.length());
1954       }
1955 
1956       {
1957         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1958         g1h->secondary_free_list_add(&tmp_free_list);
1959         SecondaryFreeList_lock->notify_all();
1960       }
1961 #ifndef PRODUCT
1962       if (G1StressConcRegionFreeing) {
1963         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1964           os::sleep(Thread::current(), (jlong) 1, false);
1965         }
1966       }
1967 #endif
1968     }
1969   }
1970   assert(tmp_free_list.is_empty(), "post-condition");
1971 }
1972 
1973 // Supporting Object and Oop closures for reference discovery
1974 // and processing in during marking
1975 
1976 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1977   HeapWord* addr = (HeapWord*)obj;
1978   return addr != NULL &&
1979          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1980 }
1981 
1982 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1983 // Uses the CMTask associated with a worker thread (for serial reference
1984 // processing the CMTask for worker 0 is used) to preserve (mark) and
1985 // trace referent objects.
1986 //
1987 // Using the CMTask and embedded local queues avoids having the worker
1988 // threads operating on the global mark stack. This reduces the risk
1989 // of overflowing the stack - which we would rather avoid at this late
1990 // state. Also using the tasks' local queues removes the potential
1991 // of the workers interfering with each other that could occur if
1992 // operating on the global stack.
1993 
1994 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1995   ConcurrentMark* _cm;
1996   CMTask*         _task;
1997   int             _ref_counter_limit;
1998   int             _ref_counter;
1999   bool            _is_serial;
2000  public:
2001   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2002     _cm(cm), _task(task), _is_serial(is_serial),
2003     _ref_counter_limit(G1RefProcDrainInterval) {
2004     assert(_ref_counter_limit > 0, "sanity");
2005     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2006     _ref_counter = _ref_counter_limit;
2007   }
2008 
2009   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2010   virtual void do_oop(      oop* p) { do_oop_work(p); }
2011 
2012   template <class T> void do_oop_work(T* p) {
2013     if (!_cm->has_overflown()) {
2014       oop obj = oopDesc::load_decode_heap_oop(p);
2015       _task->deal_with_reference(obj);
2016       _ref_counter--;
2017 
2018       if (_ref_counter == 0) {
2019         // We have dealt with _ref_counter_limit references, pushing them
2020         // and objects reachable from them on to the local stack (and
2021         // possibly the global stack). Call CMTask::do_marking_step() to
2022         // process these entries.
2023         //
2024         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2025         // there's nothing more to do (i.e. we're done with the entries that
2026         // were pushed as a result of the CMTask::deal_with_reference() calls
2027         // above) or we overflow.
2028         //
2029         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2030         // flag while there may still be some work to do. (See the comment at
2031         // the beginning of CMTask::do_marking_step() for those conditions -
2032         // one of which is reaching the specified time target.) It is only
2033         // when CMTask::do_marking_step() returns without setting the
2034         // has_aborted() flag that the marking step has completed.
2035         do {
2036           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2037           _task->do_marking_step(mark_step_duration_ms,
2038                                  false      /* do_termination */,
2039                                  _is_serial);
2040         } while (_task->has_aborted() && !_cm->has_overflown());
2041         _ref_counter = _ref_counter_limit;
2042       }
2043     }
2044   }
2045 };
2046 
2047 // 'Drain' oop closure used by both serial and parallel reference processing.
2048 // Uses the CMTask associated with a given worker thread (for serial
2049 // reference processing the CMtask for worker 0 is used). Calls the
2050 // do_marking_step routine, with an unbelievably large timeout value,
2051 // to drain the marking data structures of the remaining entries
2052 // added by the 'keep alive' oop closure above.
2053 
2054 class G1CMDrainMarkingStackClosure: public VoidClosure {
2055   ConcurrentMark* _cm;
2056   CMTask*         _task;
2057   bool            _is_serial;
2058  public:
2059   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2060     _cm(cm), _task(task), _is_serial(is_serial) {
2061     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2062   }
2063 
2064   void do_void() {
2065     do {
2066       // We call CMTask::do_marking_step() to completely drain the local
2067       // and global marking stacks of entries pushed by the 'keep alive'
2068       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2069       //
2070       // CMTask::do_marking_step() is called in a loop, which we'll exit
2071       // if there's nothing more to do (i.e. we've completely drained the
2072       // entries that were pushed as a a result of applying the 'keep alive'
2073       // closure to the entries on the discovered ref lists) or we overflow
2074       // the global marking stack.
2075       //
2076       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2077       // flag while there may still be some work to do. (See the comment at
2078       // the beginning of CMTask::do_marking_step() for those conditions -
2079       // one of which is reaching the specified time target.) It is only
2080       // when CMTask::do_marking_step() returns without setting the
2081       // has_aborted() flag that the marking step has completed.
2082 
2083       _task->do_marking_step(1000000000.0 /* something very large */,
2084                              true         /* do_termination */,
2085                              _is_serial);
2086     } while (_task->has_aborted() && !_cm->has_overflown());
2087   }
2088 };
2089 
2090 // Implementation of AbstractRefProcTaskExecutor for parallel
2091 // reference processing at the end of G1 concurrent marking
2092 
2093 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2094 private:
2095   G1CollectedHeap* _g1h;
2096   ConcurrentMark*  _cm;
2097   WorkGang*        _workers;
2098   uint             _active_workers;
2099 
2100 public:
2101   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2102                           ConcurrentMark* cm,
2103                           WorkGang* workers,
2104                           uint n_workers) :
2105     _g1h(g1h), _cm(cm),
2106     _workers(workers), _active_workers(n_workers) { }
2107 
2108   // Executes the given task using concurrent marking worker threads.
2109   virtual void execute(ProcessTask& task);
2110   virtual void execute(EnqueueTask& task);
2111 };
2112 
2113 class G1CMRefProcTaskProxy: public AbstractGangTask {
2114   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2115   ProcessTask&     _proc_task;
2116   G1CollectedHeap* _g1h;
2117   ConcurrentMark*  _cm;
2118 
2119 public:
2120   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2121                      G1CollectedHeap* g1h,
2122                      ConcurrentMark* cm) :
2123     AbstractGangTask("Process reference objects in parallel"),
2124     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2125     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2126     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2127   }
2128 
2129   virtual void work(uint worker_id) {
2130     ResourceMark rm;
2131     HandleMark hm;
2132     CMTask* task = _cm->task(worker_id);
2133     G1CMIsAliveClosure g1_is_alive(_g1h);
2134     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2135     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2136 
2137     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2138   }
2139 };
2140 
2141 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2142   assert(_workers != NULL, "Need parallel worker threads.");
2143   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2144 
2145   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2146 
2147   // We need to reset the concurrency level before each
2148   // proxy task execution, so that the termination protocol
2149   // and overflow handling in CMTask::do_marking_step() knows
2150   // how many workers to wait for.
2151   _cm->set_concurrency(_active_workers);
2152   _workers->run_task(&proc_task_proxy);
2153 }
2154 
2155 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2156   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2157   EnqueueTask& _enq_task;
2158 
2159 public:
2160   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2161     AbstractGangTask("Enqueue reference objects in parallel"),
2162     _enq_task(enq_task) { }
2163 
2164   virtual void work(uint worker_id) {
2165     _enq_task.work(worker_id);
2166   }
2167 };
2168 
2169 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2170   assert(_workers != NULL, "Need parallel worker threads.");
2171   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2172 
2173   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2174 
2175   // Not strictly necessary but...
2176   //
2177   // We need to reset the concurrency level before each
2178   // proxy task execution, so that the termination protocol
2179   // and overflow handling in CMTask::do_marking_step() knows
2180   // how many workers to wait for.
2181   _cm->set_concurrency(_active_workers);
2182   _workers->run_task(&enq_task_proxy);
2183 }
2184 
2185 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2186   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2187 }
2188 
2189 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2190   if (has_overflown()) {
2191     // Skip processing the discovered references if we have
2192     // overflown the global marking stack. Reference objects
2193     // only get discovered once so it is OK to not
2194     // de-populate the discovered reference lists. We could have,
2195     // but the only benefit would be that, when marking restarts,
2196     // less reference objects are discovered.
2197     return;
2198   }
2199 
2200   ResourceMark rm;
2201   HandleMark   hm;
2202 
2203   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2204 
2205   // Is alive closure.
2206   G1CMIsAliveClosure g1_is_alive(g1h);
2207 
2208   // Inner scope to exclude the cleaning of the string and symbol
2209   // tables from the displayed time.
2210   {
2211     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2212 
2213     ReferenceProcessor* rp = g1h->ref_processor_cm();
2214 
2215     // See the comment in G1CollectedHeap::ref_processing_init()
2216     // about how reference processing currently works in G1.
2217 
2218     // Set the soft reference policy
2219     rp->setup_policy(clear_all_soft_refs);
2220     assert(_markStack.isEmpty(), "mark stack should be empty");
2221 
2222     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2223     // in serial reference processing. Note these closures are also
2224     // used for serially processing (by the the current thread) the
2225     // JNI references during parallel reference processing.
2226     //
2227     // These closures do not need to synchronize with the worker
2228     // threads involved in parallel reference processing as these
2229     // instances are executed serially by the current thread (e.g.
2230     // reference processing is not multi-threaded and is thus
2231     // performed by the current thread instead of a gang worker).
2232     //
2233     // The gang tasks involved in parallel reference processing create
2234     // their own instances of these closures, which do their own
2235     // synchronization among themselves.
2236     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2237     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2238 
2239     // We need at least one active thread. If reference processing
2240     // is not multi-threaded we use the current (VMThread) thread,
2241     // otherwise we use the work gang from the G1CollectedHeap and
2242     // we utilize all the worker threads we can.
2243     bool processing_is_mt = rp->processing_is_mt();
2244     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2245     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2246 
2247     // Parallel processing task executor.
2248     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2249                                               g1h->workers(), active_workers);
2250     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2251 
2252     // Set the concurrency level. The phase was already set prior to
2253     // executing the remark task.
2254     set_concurrency(active_workers);
2255 
2256     // Set the degree of MT processing here.  If the discovery was done MT,
2257     // the number of threads involved during discovery could differ from
2258     // the number of active workers.  This is OK as long as the discovered
2259     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2260     rp->set_active_mt_degree(active_workers);
2261 
2262     // Process the weak references.
2263     const ReferenceProcessorStats& stats =
2264         rp->process_discovered_references(&g1_is_alive,
2265                                           &g1_keep_alive,
2266                                           &g1_drain_mark_stack,
2267                                           executor,
2268                                           g1h->gc_timer_cm());
2269     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2270 
2271     // The do_oop work routines of the keep_alive and drain_marking_stack
2272     // oop closures will set the has_overflown flag if we overflow the
2273     // global marking stack.
2274 
2275     assert(_markStack.overflow() || _markStack.isEmpty(),
2276             "mark stack should be empty (unless it overflowed)");
2277 
2278     if (_markStack.overflow()) {
2279       // This should have been done already when we tried to push an
2280       // entry on to the global mark stack. But let's do it again.
2281       set_has_overflown();
2282     }
2283 
2284     assert(rp->num_q() == active_workers, "why not");
2285 
2286     rp->enqueue_discovered_references(executor);
2287 
2288     rp->verify_no_references_recorded();
2289     assert(!rp->discovery_enabled(), "Post condition");
2290   }
2291 
2292   if (has_overflown()) {
2293     // We can not trust g1_is_alive if the marking stack overflowed
2294     return;
2295   }
2296 
2297   assert(_markStack.isEmpty(), "Marking should have completed");
2298 
2299   // Unload Klasses, String, Symbols, Code Cache, etc.
2300   {
2301     G1CMTraceTime trace("Unloading", G1Log::finer());
2302 
2303     if (ClassUnloadingWithConcurrentMark) {
2304       bool purged_classes;
2305 
2306       {
2307         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2308         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2309       }
2310 
2311       {
2312         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2313         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2314       }
2315     }
2316 
2317     if (G1StringDedup::is_enabled()) {
2318       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2319       G1StringDedup::unlink(&g1_is_alive);
2320     }
2321   }
2322 }
2323 
2324 void ConcurrentMark::swapMarkBitMaps() {
2325   CMBitMapRO* temp = _prevMarkBitMap;
2326   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2327   _nextMarkBitMap  = (CMBitMap*)  temp;
2328 }
2329 
2330 // Closure for marking entries in SATB buffers.
2331 class CMSATBBufferClosure : public SATBBufferClosure {
2332 private:
2333   CMTask* _task;
2334   G1CollectedHeap* _g1h;
2335 
2336   // This is very similar to CMTask::deal_with_reference, but with
2337   // more relaxed requirements for the argument, so this must be more
2338   // circumspect about treating the argument as an object.
2339   void do_entry(void* entry) const {
2340     _task->increment_refs_reached();
2341     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2342     if (entry < hr->next_top_at_mark_start()) {
2343       // Until we get here, we don't know whether entry refers to a valid
2344       // object; it could instead have been a stale reference.
2345       oop obj = static_cast<oop>(entry);
2346       assert(obj->is_oop(true /* ignore mark word */),
2347              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2348       _task->make_reference_grey(obj, hr);
2349     }
2350   }
2351 
2352 public:
2353   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2354     : _task(task), _g1h(g1h) { }
2355 
2356   virtual void do_buffer(void** buffer, size_t size) {
2357     for (size_t i = 0; i < size; ++i) {
2358       do_entry(buffer[i]);
2359     }
2360   }
2361 };
2362 
2363 class G1RemarkThreadsClosure : public ThreadClosure {
2364   CMSATBBufferClosure _cm_satb_cl;
2365   G1CMOopClosure _cm_cl;
2366   MarkingCodeBlobClosure _code_cl;
2367   int _thread_parity;
2368 
2369  public:
2370   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2371     _cm_satb_cl(task, g1h),
2372     _cm_cl(g1h, g1h->concurrent_mark(), task),
2373     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2374     _thread_parity(Threads::thread_claim_parity()) {}
2375 
2376   void do_thread(Thread* thread) {
2377     if (thread->is_Java_thread()) {
2378       if (thread->claim_oops_do(true, _thread_parity)) {
2379         JavaThread* jt = (JavaThread*)thread;
2380 
2381         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2382         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2383         // * Alive if on the stack of an executing method
2384         // * Weakly reachable otherwise
2385         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2386         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2387         jt->nmethods_do(&_code_cl);
2388 
2389         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2390       }
2391     } else if (thread->is_VM_thread()) {
2392       if (thread->claim_oops_do(true, _thread_parity)) {
2393         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2394       }
2395     }
2396   }
2397 };
2398 
2399 class CMRemarkTask: public AbstractGangTask {
2400 private:
2401   ConcurrentMark* _cm;
2402 public:
2403   void work(uint worker_id) {
2404     // Since all available tasks are actually started, we should
2405     // only proceed if we're supposed to be active.
2406     if (worker_id < _cm->active_tasks()) {
2407       CMTask* task = _cm->task(worker_id);
2408       task->record_start_time();
2409       {
2410         ResourceMark rm;
2411         HandleMark hm;
2412 
2413         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2414         Threads::threads_do(&threads_f);
2415       }
2416 
2417       do {
2418         task->do_marking_step(1000000000.0 /* something very large */,
2419                               true         /* do_termination       */,
2420                               false        /* is_serial            */);
2421       } while (task->has_aborted() && !_cm->has_overflown());
2422       // If we overflow, then we do not want to restart. We instead
2423       // want to abort remark and do concurrent marking again.
2424       task->record_end_time();
2425     }
2426   }
2427 
2428   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2429     AbstractGangTask("Par Remark"), _cm(cm) {
2430     _cm->terminator()->reset_for_reuse(active_workers);
2431   }
2432 };
2433 
2434 void ConcurrentMark::checkpointRootsFinalWork() {
2435   ResourceMark rm;
2436   HandleMark   hm;
2437   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2438 
2439   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2440 
2441   g1h->ensure_parsability(false);
2442 
2443   // this is remark, so we'll use up all active threads
2444   uint active_workers = g1h->workers()->active_workers();
2445   set_concurrency_and_phase(active_workers, false /* concurrent */);
2446   // Leave _parallel_marking_threads at it's
2447   // value originally calculated in the ConcurrentMark
2448   // constructor and pass values of the active workers
2449   // through the gang in the task.
2450 
2451   {
2452     StrongRootsScope srs(active_workers);
2453 
2454     CMRemarkTask remarkTask(this, active_workers);
2455     // We will start all available threads, even if we decide that the
2456     // active_workers will be fewer. The extra ones will just bail out
2457     // immediately.
2458     g1h->workers()->run_task(&remarkTask);
2459   }
2460 
2461   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2462   guarantee(has_overflown() ||
2463             satb_mq_set.completed_buffers_num() == 0,
2464             "Invariant: has_overflown = %s, num buffers = %d",
2465             BOOL_TO_STR(has_overflown()),
2466             satb_mq_set.completed_buffers_num());
2467 
2468   print_stats();
2469 }
2470 
2471 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2472   // Note we are overriding the read-only view of the prev map here, via
2473   // the cast.
2474   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2475 }
2476 
2477 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2478   _nextMarkBitMap->clearRange(mr);
2479 }
2480 
2481 HeapRegion*
2482 ConcurrentMark::claim_region(uint worker_id) {
2483   // "checkpoint" the finger
2484   HeapWord* finger = _finger;
2485 
2486   // _heap_end will not change underneath our feet; it only changes at
2487   // yield points.
2488   while (finger < _heap_end) {
2489     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2490 
2491     // Note on how this code handles humongous regions. In the
2492     // normal case the finger will reach the start of a "starts
2493     // humongous" (SH) region. Its end will either be the end of the
2494     // last "continues humongous" (CH) region in the sequence, or the
2495     // standard end of the SH region (if the SH is the only region in
2496     // the sequence). That way claim_region() will skip over the CH
2497     // regions. However, there is a subtle race between a CM thread
2498     // executing this method and a mutator thread doing a humongous
2499     // object allocation. The two are not mutually exclusive as the CM
2500     // thread does not need to hold the Heap_lock when it gets
2501     // here. So there is a chance that claim_region() will come across
2502     // a free region that's in the progress of becoming a SH or a CH
2503     // region. In the former case, it will either
2504     //   a) Miss the update to the region's end, in which case it will
2505     //      visit every subsequent CH region, will find their bitmaps
2506     //      empty, and do nothing, or
2507     //   b) Will observe the update of the region's end (in which case
2508     //      it will skip the subsequent CH regions).
2509     // If it comes across a region that suddenly becomes CH, the
2510     // scenario will be similar to b). So, the race between
2511     // claim_region() and a humongous object allocation might force us
2512     // to do a bit of unnecessary work (due to some unnecessary bitmap
2513     // iterations) but it should not introduce and correctness issues.
2514     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2515 
2516     // Above heap_region_containing_raw may return NULL as we always scan claim
2517     // until the end of the heap. In this case, just jump to the next region.
2518     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2519 
2520     // Is the gap between reading the finger and doing the CAS too long?
2521     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2522     if (res == finger && curr_region != NULL) {
2523       // we succeeded
2524       HeapWord*   bottom        = curr_region->bottom();
2525       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2526 
2527       // notice that _finger == end cannot be guaranteed here since,
2528       // someone else might have moved the finger even further
2529       assert(_finger >= end, "the finger should have moved forward");
2530 
2531       if (limit > bottom) {
2532         return curr_region;
2533       } else {
2534         assert(limit == bottom,
2535                "the region limit should be at bottom");
2536         // we return NULL and the caller should try calling
2537         // claim_region() again.
2538         return NULL;
2539       }
2540     } else {
2541       assert(_finger > finger, "the finger should have moved forward");
2542       // read it again
2543       finger = _finger;
2544     }
2545   }
2546 
2547   return NULL;
2548 }
2549 
2550 #ifndef PRODUCT
2551 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2552 private:
2553   G1CollectedHeap* _g1h;
2554   const char* _phase;
2555   int _info;
2556 
2557 public:
2558   VerifyNoCSetOops(const char* phase, int info = -1) :
2559     _g1h(G1CollectedHeap::heap()),
2560     _phase(phase),
2561     _info(info)
2562   { }
2563 
2564   void operator()(oop obj) const {
2565     guarantee(obj->is_oop(),
2566               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2567               p2i(obj), _phase, _info);
2568     guarantee(!_g1h->obj_in_cs(obj),
2569               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2570               p2i(obj), _phase, _info);
2571   }
2572 };
2573 
2574 void ConcurrentMark::verify_no_cset_oops() {
2575   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2576   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2577     return;
2578   }
2579 
2580   // Verify entries on the global mark stack
2581   _markStack.iterate(VerifyNoCSetOops("Stack"));
2582 
2583   // Verify entries on the task queues
2584   for (uint i = 0; i < _max_worker_id; ++i) {
2585     CMTaskQueue* queue = _task_queues->queue(i);
2586     queue->iterate(VerifyNoCSetOops("Queue", i));
2587   }
2588 
2589   // Verify the global finger
2590   HeapWord* global_finger = finger();
2591   if (global_finger != NULL && global_finger < _heap_end) {
2592     // The global finger always points to a heap region boundary. We
2593     // use heap_region_containing_raw() to get the containing region
2594     // given that the global finger could be pointing to a free region
2595     // which subsequently becomes continues humongous. If that
2596     // happens, heap_region_containing() will return the bottom of the
2597     // corresponding starts humongous region and the check below will
2598     // not hold any more.
2599     // Since we always iterate over all regions, we might get a NULL HeapRegion
2600     // here.
2601     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2602     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2603               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2604               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2605   }
2606 
2607   // Verify the task fingers
2608   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2609   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2610     CMTask* task = _tasks[i];
2611     HeapWord* task_finger = task->finger();
2612     if (task_finger != NULL && task_finger < _heap_end) {
2613       // See above note on the global finger verification.
2614       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2615       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2616                 !task_hr->in_collection_set(),
2617                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2618                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2619     }
2620   }
2621 }
2622 #endif // PRODUCT
2623 
2624 // Aggregate the counting data that was constructed concurrently
2625 // with marking.
2626 class AggregateCountDataHRClosure: public HeapRegionClosure {
2627   G1CollectedHeap* _g1h;
2628   ConcurrentMark* _cm;
2629   CardTableModRefBS* _ct_bs;
2630   BitMap* _cm_card_bm;
2631   uint _max_worker_id;
2632 
2633  public:
2634   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2635                               BitMap* cm_card_bm,
2636                               uint max_worker_id) :
2637     _g1h(g1h), _cm(g1h->concurrent_mark()),
2638     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2639     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2640 
2641   bool doHeapRegion(HeapRegion* hr) {
2642     if (hr->is_continues_humongous()) {
2643       // We will ignore these here and process them when their
2644       // associated "starts humongous" region is processed.
2645       // Note that we cannot rely on their associated
2646       // "starts humongous" region to have their bit set to 1
2647       // since, due to the region chunking in the parallel region
2648       // iteration, a "continues humongous" region might be visited
2649       // before its associated "starts humongous".
2650       return false;
2651     }
2652 
2653     HeapWord* start = hr->bottom();
2654     HeapWord* limit = hr->next_top_at_mark_start();
2655     HeapWord* end = hr->end();
2656 
2657     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2658            "Preconditions not met - "
2659            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2660            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2661            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2662 
2663     assert(hr->next_marked_bytes() == 0, "Precondition");
2664 
2665     if (start == limit) {
2666       // NTAMS of this region has not been set so nothing to do.
2667       return false;
2668     }
2669 
2670     // 'start' should be in the heap.
2671     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2672     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2673     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2674 
2675     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2676     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2677     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2678 
2679     // If ntams is not card aligned then we bump card bitmap index
2680     // for limit so that we get the all the cards spanned by
2681     // the object ending at ntams.
2682     // Note: if this is the last region in the heap then ntams
2683     // could be actually just beyond the end of the the heap;
2684     // limit_idx will then  correspond to a (non-existent) card
2685     // that is also outside the heap.
2686     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2687       limit_idx += 1;
2688     }
2689 
2690     assert(limit_idx <= end_idx, "or else use atomics");
2691 
2692     // Aggregate the "stripe" in the count data associated with hr.
2693     uint hrm_index = hr->hrm_index();
2694     size_t marked_bytes = 0;
2695 
2696     for (uint i = 0; i < _max_worker_id; i += 1) {
2697       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2698       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2699 
2700       // Fetch the marked_bytes in this region for task i and
2701       // add it to the running total for this region.
2702       marked_bytes += marked_bytes_array[hrm_index];
2703 
2704       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2705       // into the global card bitmap.
2706       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2707 
2708       while (scan_idx < limit_idx) {
2709         assert(task_card_bm->at(scan_idx) == true, "should be");
2710         _cm_card_bm->set_bit(scan_idx);
2711         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2712 
2713         // BitMap::get_next_one_offset() can handle the case when
2714         // its left_offset parameter is greater than its right_offset
2715         // parameter. It does, however, have an early exit if
2716         // left_offset == right_offset. So let's limit the value
2717         // passed in for left offset here.
2718         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2719         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2720       }
2721     }
2722 
2723     // Update the marked bytes for this region.
2724     hr->add_to_marked_bytes(marked_bytes);
2725 
2726     // Next heap region
2727     return false;
2728   }
2729 };
2730 
2731 class G1AggregateCountDataTask: public AbstractGangTask {
2732 protected:
2733   G1CollectedHeap* _g1h;
2734   ConcurrentMark* _cm;
2735   BitMap* _cm_card_bm;
2736   uint _max_worker_id;
2737   uint _active_workers;
2738   HeapRegionClaimer _hrclaimer;
2739 
2740 public:
2741   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2742                            ConcurrentMark* cm,
2743                            BitMap* cm_card_bm,
2744                            uint max_worker_id,
2745                            uint n_workers) :
2746       AbstractGangTask("Count Aggregation"),
2747       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2748       _max_worker_id(max_worker_id),
2749       _active_workers(n_workers),
2750       _hrclaimer(_active_workers) {
2751   }
2752 
2753   void work(uint worker_id) {
2754     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2755 
2756     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2757   }
2758 };
2759 
2760 
2761 void ConcurrentMark::aggregate_count_data() {
2762   uint n_workers = _g1h->workers()->active_workers();
2763 
2764   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2765                                            _max_worker_id, n_workers);
2766 
2767   _g1h->workers()->run_task(&g1_par_agg_task);
2768 }
2769 
2770 // Clear the per-worker arrays used to store the per-region counting data
2771 void ConcurrentMark::clear_all_count_data() {
2772   // Clear the global card bitmap - it will be filled during
2773   // liveness count aggregation (during remark) and the
2774   // final counting task.
2775   _card_bm.clear();
2776 
2777   // Clear the global region bitmap - it will be filled as part
2778   // of the final counting task.
2779   _region_bm.clear();
2780 
2781   uint max_regions = _g1h->max_regions();
2782   assert(_max_worker_id > 0, "uninitialized");
2783 
2784   for (uint i = 0; i < _max_worker_id; i += 1) {
2785     BitMap* task_card_bm = count_card_bitmap_for(i);
2786     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2787 
2788     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2789     assert(marked_bytes_array != NULL, "uninitialized");
2790 
2791     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2792     task_card_bm->clear();
2793   }
2794 }
2795 
2796 void ConcurrentMark::print_stats() {
2797   if (G1MarkingVerboseLevel > 0) {
2798     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2799     for (size_t i = 0; i < _active_tasks; ++i) {
2800       _tasks[i]->print_stats();
2801       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2802     }
2803   }
2804 }
2805 
2806 // abandon current marking iteration due to a Full GC
2807 void ConcurrentMark::abort() {
2808   if (!cmThread()->during_cycle() || _has_aborted) {
2809     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2810     return;
2811   }
2812 
2813   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2814   // concurrent bitmap clearing.
2815   _nextMarkBitMap->clearAll();
2816 
2817   // Note we cannot clear the previous marking bitmap here
2818   // since VerifyDuringGC verifies the objects marked during
2819   // a full GC against the previous bitmap.
2820 
2821   // Clear the liveness counting data
2822   clear_all_count_data();
2823   // Empty mark stack
2824   reset_marking_state();
2825   for (uint i = 0; i < _max_worker_id; ++i) {
2826     _tasks[i]->clear_region_fields();
2827   }
2828   _first_overflow_barrier_sync.abort();
2829   _second_overflow_barrier_sync.abort();
2830   _has_aborted = true;
2831 
2832   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2833   satb_mq_set.abandon_partial_marking();
2834   // This can be called either during or outside marking, we'll read
2835   // the expected_active value from the SATB queue set.
2836   satb_mq_set.set_active_all_threads(
2837                                  false, /* new active value */
2838                                  satb_mq_set.is_active() /* expected_active */);
2839 
2840   _g1h->trace_heap_after_concurrent_cycle();
2841   _g1h->register_concurrent_cycle_end();
2842 }
2843 
2844 static void print_ms_time_info(const char* prefix, const char* name,
2845                                NumberSeq& ns) {
2846   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2847                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2848   if (ns.num() > 0) {
2849     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2850                            prefix, ns.sd(), ns.maximum());
2851   }
2852 }
2853 
2854 void ConcurrentMark::print_summary_info() {
2855   gclog_or_tty->print_cr(" Concurrent marking:");
2856   print_ms_time_info("  ", "init marks", _init_times);
2857   print_ms_time_info("  ", "remarks", _remark_times);
2858   {
2859     print_ms_time_info("     ", "final marks", _remark_mark_times);
2860     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2861 
2862   }
2863   print_ms_time_info("  ", "cleanups", _cleanup_times);
2864   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2865                          _total_counting_time,
2866                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2867                           (double)_cleanup_times.num()
2868                          : 0.0));
2869   if (G1ScrubRemSets) {
2870     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2871                            _total_rs_scrub_time,
2872                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2873                             (double)_cleanup_times.num()
2874                            : 0.0));
2875   }
2876   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2877                          (_init_times.sum() + _remark_times.sum() +
2878                           _cleanup_times.sum())/1000.0);
2879   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2880                 "(%8.2f s marking).",
2881                 cmThread()->vtime_accum(),
2882                 cmThread()->vtime_mark_accum());
2883 }
2884 
2885 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2886   _parallel_workers->print_worker_threads_on(st);
2887 }
2888 
2889 void ConcurrentMark::print_on_error(outputStream* st) const {
2890   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2891       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2892   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2893   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2894 }
2895 
2896 // We take a break if someone is trying to stop the world.
2897 bool ConcurrentMark::do_yield_check(uint worker_id) {
2898   if (SuspendibleThreadSet::should_yield()) {
2899     if (worker_id == 0) {
2900       _g1h->g1_policy()->record_concurrent_pause();
2901     }
2902     SuspendibleThreadSet::yield();
2903     return true;
2904   } else {
2905     return false;
2906   }
2907 }
2908 
2909 // Closure for iteration over bitmaps
2910 class CMBitMapClosure : public BitMapClosure {
2911 private:
2912   // the bitmap that is being iterated over
2913   CMBitMap*                   _nextMarkBitMap;
2914   ConcurrentMark*             _cm;
2915   CMTask*                     _task;
2916 
2917 public:
2918   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2919     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2920 
2921   bool do_bit(size_t offset) {
2922     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2923     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2924     assert( addr < _cm->finger(), "invariant");
2925     assert(addr >= _task->finger(), "invariant");
2926 
2927     // We move that task's local finger along.
2928     _task->move_finger_to(addr);
2929 
2930     _task->scan_object(oop(addr));
2931     // we only partially drain the local queue and global stack
2932     _task->drain_local_queue(true);
2933     _task->drain_global_stack(true);
2934 
2935     // if the has_aborted flag has been raised, we need to bail out of
2936     // the iteration
2937     return !_task->has_aborted();
2938   }
2939 };
2940 
2941 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2942   ReferenceProcessor* result = NULL;
2943   if (G1UseConcMarkReferenceProcessing) {
2944     result = g1h->ref_processor_cm();
2945     assert(result != NULL, "should not be NULL");
2946   }
2947   return result;
2948 }
2949 
2950 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2951                                ConcurrentMark* cm,
2952                                CMTask* task)
2953   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2954     _g1h(g1h), _cm(cm), _task(task)
2955 { }
2956 
2957 void CMTask::setup_for_region(HeapRegion* hr) {
2958   assert(hr != NULL,
2959         "claim_region() should have filtered out NULL regions");
2960   assert(!hr->is_continues_humongous(),
2961         "claim_region() should have filtered out continues humongous regions");
2962   _curr_region  = hr;
2963   _finger       = hr->bottom();
2964   update_region_limit();
2965 }
2966 
2967 void CMTask::update_region_limit() {
2968   HeapRegion* hr            = _curr_region;
2969   HeapWord* bottom          = hr->bottom();
2970   HeapWord* limit           = hr->next_top_at_mark_start();
2971 
2972   if (limit == bottom) {
2973     // The region was collected underneath our feet.
2974     // We set the finger to bottom to ensure that the bitmap
2975     // iteration that will follow this will not do anything.
2976     // (this is not a condition that holds when we set the region up,
2977     // as the region is not supposed to be empty in the first place)
2978     _finger = bottom;
2979   } else if (limit >= _region_limit) {
2980     assert(limit >= _finger, "peace of mind");
2981   } else {
2982     assert(limit < _region_limit, "only way to get here");
2983     // This can happen under some pretty unusual circumstances.  An
2984     // evacuation pause empties the region underneath our feet (NTAMS
2985     // at bottom). We then do some allocation in the region (NTAMS
2986     // stays at bottom), followed by the region being used as a GC
2987     // alloc region (NTAMS will move to top() and the objects
2988     // originally below it will be grayed). All objects now marked in
2989     // the region are explicitly grayed, if below the global finger,
2990     // and we do not need in fact to scan anything else. So, we simply
2991     // set _finger to be limit to ensure that the bitmap iteration
2992     // doesn't do anything.
2993     _finger = limit;
2994   }
2995 
2996   _region_limit = limit;
2997 }
2998 
2999 void CMTask::giveup_current_region() {
3000   assert(_curr_region != NULL, "invariant");
3001   clear_region_fields();
3002 }
3003 
3004 void CMTask::clear_region_fields() {
3005   // Values for these three fields that indicate that we're not
3006   // holding on to a region.
3007   _curr_region   = NULL;
3008   _finger        = NULL;
3009   _region_limit  = NULL;
3010 }
3011 
3012 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3013   if (cm_oop_closure == NULL) {
3014     assert(_cm_oop_closure != NULL, "invariant");
3015   } else {
3016     assert(_cm_oop_closure == NULL, "invariant");
3017   }
3018   _cm_oop_closure = cm_oop_closure;
3019 }
3020 
3021 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3022   guarantee(nextMarkBitMap != NULL, "invariant");
3023   _nextMarkBitMap                = nextMarkBitMap;
3024   clear_region_fields();
3025 
3026   _calls                         = 0;
3027   _elapsed_time_ms               = 0.0;
3028   _termination_time_ms           = 0.0;
3029   _termination_start_time_ms     = 0.0;
3030 }
3031 
3032 bool CMTask::should_exit_termination() {
3033   regular_clock_call();
3034   // This is called when we are in the termination protocol. We should
3035   // quit if, for some reason, this task wants to abort or the global
3036   // stack is not empty (this means that we can get work from it).
3037   return !_cm->mark_stack_empty() || has_aborted();
3038 }
3039 
3040 void CMTask::reached_limit() {
3041   assert(_words_scanned >= _words_scanned_limit ||
3042          _refs_reached >= _refs_reached_limit ,
3043          "shouldn't have been called otherwise");
3044   regular_clock_call();
3045 }
3046 
3047 void CMTask::regular_clock_call() {
3048   if (has_aborted()) return;
3049 
3050   // First, we need to recalculate the words scanned and refs reached
3051   // limits for the next clock call.
3052   recalculate_limits();
3053 
3054   // During the regular clock call we do the following
3055 
3056   // (1) If an overflow has been flagged, then we abort.
3057   if (_cm->has_overflown()) {
3058     set_has_aborted();
3059     return;
3060   }
3061 
3062   // If we are not concurrent (i.e. we're doing remark) we don't need
3063   // to check anything else. The other steps are only needed during
3064   // the concurrent marking phase.
3065   if (!concurrent()) return;
3066 
3067   // (2) If marking has been aborted for Full GC, then we also abort.
3068   if (_cm->has_aborted()) {
3069     set_has_aborted();
3070     return;
3071   }
3072 
3073   double curr_time_ms = os::elapsedVTime() * 1000.0;
3074 
3075   // (4) We check whether we should yield. If we have to, then we abort.
3076   if (SuspendibleThreadSet::should_yield()) {
3077     // We should yield. To do this we abort the task. The caller is
3078     // responsible for yielding.
3079     set_has_aborted();
3080     return;
3081   }
3082 
3083   // (5) We check whether we've reached our time quota. If we have,
3084   // then we abort.
3085   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3086   if (elapsed_time_ms > _time_target_ms) {
3087     set_has_aborted();
3088     _has_timed_out = true;
3089     return;
3090   }
3091 
3092   // (6) Finally, we check whether there are enough completed STAB
3093   // buffers available for processing. If there are, we abort.
3094   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3095   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3096     // we do need to process SATB buffers, we'll abort and restart
3097     // the marking task to do so
3098     set_has_aborted();
3099     return;
3100   }
3101 }
3102 
3103 void CMTask::recalculate_limits() {
3104   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3105   _words_scanned_limit      = _real_words_scanned_limit;
3106 
3107   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3108   _refs_reached_limit       = _real_refs_reached_limit;
3109 }
3110 
3111 void CMTask::decrease_limits() {
3112   // This is called when we believe that we're going to do an infrequent
3113   // operation which will increase the per byte scanned cost (i.e. move
3114   // entries to/from the global stack). It basically tries to decrease the
3115   // scanning limit so that the clock is called earlier.
3116 
3117   _words_scanned_limit = _real_words_scanned_limit -
3118     3 * words_scanned_period / 4;
3119   _refs_reached_limit  = _real_refs_reached_limit -
3120     3 * refs_reached_period / 4;
3121 }
3122 
3123 void CMTask::move_entries_to_global_stack() {
3124   // local array where we'll store the entries that will be popped
3125   // from the local queue
3126   oop buffer[global_stack_transfer_size];
3127 
3128   int n = 0;
3129   oop obj;
3130   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3131     buffer[n] = obj;
3132     ++n;
3133   }
3134 
3135   if (n > 0) {
3136     // we popped at least one entry from the local queue
3137 
3138     if (!_cm->mark_stack_push(buffer, n)) {
3139       set_has_aborted();
3140     }
3141   }
3142 
3143   // this operation was quite expensive, so decrease the limits
3144   decrease_limits();
3145 }
3146 
3147 void CMTask::get_entries_from_global_stack() {
3148   // local array where we'll store the entries that will be popped
3149   // from the global stack.
3150   oop buffer[global_stack_transfer_size];
3151   int n;
3152   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3153   assert(n <= global_stack_transfer_size,
3154          "we should not pop more than the given limit");
3155   if (n > 0) {
3156     // yes, we did actually pop at least one entry
3157     for (int i = 0; i < n; ++i) {
3158       bool success = _task_queue->push(buffer[i]);
3159       // We only call this when the local queue is empty or under a
3160       // given target limit. So, we do not expect this push to fail.
3161       assert(success, "invariant");
3162     }
3163   }
3164 
3165   // this operation was quite expensive, so decrease the limits
3166   decrease_limits();
3167 }
3168 
3169 void CMTask::drain_local_queue(bool partially) {
3170   if (has_aborted()) return;
3171 
3172   // Decide what the target size is, depending whether we're going to
3173   // drain it partially (so that other tasks can steal if they run out
3174   // of things to do) or totally (at the very end).
3175   size_t target_size;
3176   if (partially) {
3177     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3178   } else {
3179     target_size = 0;
3180   }
3181 
3182   if (_task_queue->size() > target_size) {
3183     oop obj;
3184     bool ret = _task_queue->pop_local(obj);
3185     while (ret) {
3186       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3187       assert(!_g1h->is_on_master_free_list(
3188                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3189 
3190       scan_object(obj);
3191 
3192       if (_task_queue->size() <= target_size || has_aborted()) {
3193         ret = false;
3194       } else {
3195         ret = _task_queue->pop_local(obj);
3196       }
3197     }
3198   }
3199 }
3200 
3201 void CMTask::drain_global_stack(bool partially) {
3202   if (has_aborted()) return;
3203 
3204   // We have a policy to drain the local queue before we attempt to
3205   // drain the global stack.
3206   assert(partially || _task_queue->size() == 0, "invariant");
3207 
3208   // Decide what the target size is, depending whether we're going to
3209   // drain it partially (so that other tasks can steal if they run out
3210   // of things to do) or totally (at the very end).  Notice that,
3211   // because we move entries from the global stack in chunks or
3212   // because another task might be doing the same, we might in fact
3213   // drop below the target. But, this is not a problem.
3214   size_t target_size;
3215   if (partially) {
3216     target_size = _cm->partial_mark_stack_size_target();
3217   } else {
3218     target_size = 0;
3219   }
3220 
3221   if (_cm->mark_stack_size() > target_size) {
3222     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3223       get_entries_from_global_stack();
3224       drain_local_queue(partially);
3225     }
3226   }
3227 }
3228 
3229 // SATB Queue has several assumptions on whether to call the par or
3230 // non-par versions of the methods. this is why some of the code is
3231 // replicated. We should really get rid of the single-threaded version
3232 // of the code to simplify things.
3233 void CMTask::drain_satb_buffers() {
3234   if (has_aborted()) return;
3235 
3236   // We set this so that the regular clock knows that we're in the
3237   // middle of draining buffers and doesn't set the abort flag when it
3238   // notices that SATB buffers are available for draining. It'd be
3239   // very counter productive if it did that. :-)
3240   _draining_satb_buffers = true;
3241 
3242   CMSATBBufferClosure satb_cl(this, _g1h);
3243   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3244 
3245   // This keeps claiming and applying the closure to completed buffers
3246   // until we run out of buffers or we need to abort.
3247   while (!has_aborted() &&
3248          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3249     regular_clock_call();
3250   }
3251 
3252   _draining_satb_buffers = false;
3253 
3254   assert(has_aborted() ||
3255          concurrent() ||
3256          satb_mq_set.completed_buffers_num() == 0, "invariant");
3257 
3258   // again, this was a potentially expensive operation, decrease the
3259   // limits to get the regular clock call early
3260   decrease_limits();
3261 }
3262 
3263 void CMTask::print_stats() {
3264   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3265                          _worker_id, _calls);
3266   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3267                          _elapsed_time_ms, _termination_time_ms);
3268   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3269                          _step_times_ms.num(), _step_times_ms.avg(),
3270                          _step_times_ms.sd());
3271   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3272                          _step_times_ms.maximum(), _step_times_ms.sum());
3273 }
3274 
3275 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3276   return _task_queues->steal(worker_id, hash_seed, obj);
3277 }
3278 
3279 /*****************************************************************************
3280 
3281     The do_marking_step(time_target_ms, ...) method is the building
3282     block of the parallel marking framework. It can be called in parallel
3283     with other invocations of do_marking_step() on different tasks
3284     (but only one per task, obviously) and concurrently with the
3285     mutator threads, or during remark, hence it eliminates the need
3286     for two versions of the code. When called during remark, it will
3287     pick up from where the task left off during the concurrent marking
3288     phase. Interestingly, tasks are also claimable during evacuation
3289     pauses too, since do_marking_step() ensures that it aborts before
3290     it needs to yield.
3291 
3292     The data structures that it uses to do marking work are the
3293     following:
3294 
3295       (1) Marking Bitmap. If there are gray objects that appear only
3296       on the bitmap (this happens either when dealing with an overflow
3297       or when the initial marking phase has simply marked the roots
3298       and didn't push them on the stack), then tasks claim heap
3299       regions whose bitmap they then scan to find gray objects. A
3300       global finger indicates where the end of the last claimed region
3301       is. A local finger indicates how far into the region a task has
3302       scanned. The two fingers are used to determine how to gray an
3303       object (i.e. whether simply marking it is OK, as it will be
3304       visited by a task in the future, or whether it needs to be also
3305       pushed on a stack).
3306 
3307       (2) Local Queue. The local queue of the task which is accessed
3308       reasonably efficiently by the task. Other tasks can steal from
3309       it when they run out of work. Throughout the marking phase, a
3310       task attempts to keep its local queue short but not totally
3311       empty, so that entries are available for stealing by other
3312       tasks. Only when there is no more work, a task will totally
3313       drain its local queue.
3314 
3315       (3) Global Mark Stack. This handles local queue overflow. During
3316       marking only sets of entries are moved between it and the local
3317       queues, as access to it requires a mutex and more fine-grain
3318       interaction with it which might cause contention. If it
3319       overflows, then the marking phase should restart and iterate
3320       over the bitmap to identify gray objects. Throughout the marking
3321       phase, tasks attempt to keep the global mark stack at a small
3322       length but not totally empty, so that entries are available for
3323       popping by other tasks. Only when there is no more work, tasks
3324       will totally drain the global mark stack.
3325 
3326       (4) SATB Buffer Queue. This is where completed SATB buffers are
3327       made available. Buffers are regularly removed from this queue
3328       and scanned for roots, so that the queue doesn't get too
3329       long. During remark, all completed buffers are processed, as
3330       well as the filled in parts of any uncompleted buffers.
3331 
3332     The do_marking_step() method tries to abort when the time target
3333     has been reached. There are a few other cases when the
3334     do_marking_step() method also aborts:
3335 
3336       (1) When the marking phase has been aborted (after a Full GC).
3337 
3338       (2) When a global overflow (on the global stack) has been
3339       triggered. Before the task aborts, it will actually sync up with
3340       the other tasks to ensure that all the marking data structures
3341       (local queues, stacks, fingers etc.)  are re-initialized so that
3342       when do_marking_step() completes, the marking phase can
3343       immediately restart.
3344 
3345       (3) When enough completed SATB buffers are available. The
3346       do_marking_step() method only tries to drain SATB buffers right
3347       at the beginning. So, if enough buffers are available, the
3348       marking step aborts and the SATB buffers are processed at
3349       the beginning of the next invocation.
3350 
3351       (4) To yield. when we have to yield then we abort and yield
3352       right at the end of do_marking_step(). This saves us from a lot
3353       of hassle as, by yielding we might allow a Full GC. If this
3354       happens then objects will be compacted underneath our feet, the
3355       heap might shrink, etc. We save checking for this by just
3356       aborting and doing the yield right at the end.
3357 
3358     From the above it follows that the do_marking_step() method should
3359     be called in a loop (or, otherwise, regularly) until it completes.
3360 
3361     If a marking step completes without its has_aborted() flag being
3362     true, it means it has completed the current marking phase (and
3363     also all other marking tasks have done so and have all synced up).
3364 
3365     A method called regular_clock_call() is invoked "regularly" (in
3366     sub ms intervals) throughout marking. It is this clock method that
3367     checks all the abort conditions which were mentioned above and
3368     decides when the task should abort. A work-based scheme is used to
3369     trigger this clock method: when the number of object words the
3370     marking phase has scanned or the number of references the marking
3371     phase has visited reach a given limit. Additional invocations to
3372     the method clock have been planted in a few other strategic places
3373     too. The initial reason for the clock method was to avoid calling
3374     vtime too regularly, as it is quite expensive. So, once it was in
3375     place, it was natural to piggy-back all the other conditions on it
3376     too and not constantly check them throughout the code.
3377 
3378     If do_termination is true then do_marking_step will enter its
3379     termination protocol.
3380 
3381     The value of is_serial must be true when do_marking_step is being
3382     called serially (i.e. by the VMThread) and do_marking_step should
3383     skip any synchronization in the termination and overflow code.
3384     Examples include the serial remark code and the serial reference
3385     processing closures.
3386 
3387     The value of is_serial must be false when do_marking_step is
3388     being called by any of the worker threads in a work gang.
3389     Examples include the concurrent marking code (CMMarkingTask),
3390     the MT remark code, and the MT reference processing closures.
3391 
3392  *****************************************************************************/
3393 
3394 void CMTask::do_marking_step(double time_target_ms,
3395                              bool do_termination,
3396                              bool is_serial) {
3397   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3398   assert(concurrent() == _cm->concurrent(), "they should be the same");
3399 
3400   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3401   assert(_task_queues != NULL, "invariant");
3402   assert(_task_queue != NULL, "invariant");
3403   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3404 
3405   assert(!_claimed,
3406          "only one thread should claim this task at any one time");
3407 
3408   // OK, this doesn't safeguard again all possible scenarios, as it is
3409   // possible for two threads to set the _claimed flag at the same
3410   // time. But it is only for debugging purposes anyway and it will
3411   // catch most problems.
3412   _claimed = true;
3413 
3414   _start_time_ms = os::elapsedVTime() * 1000.0;
3415 
3416   // If do_stealing is true then do_marking_step will attempt to
3417   // steal work from the other CMTasks. It only makes sense to
3418   // enable stealing when the termination protocol is enabled
3419   // and do_marking_step() is not being called serially.
3420   bool do_stealing = do_termination && !is_serial;
3421 
3422   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3423   _time_target_ms = time_target_ms - diff_prediction_ms;
3424 
3425   // set up the variables that are used in the work-based scheme to
3426   // call the regular clock method
3427   _words_scanned = 0;
3428   _refs_reached  = 0;
3429   recalculate_limits();
3430 
3431   // clear all flags
3432   clear_has_aborted();
3433   _has_timed_out = false;
3434   _draining_satb_buffers = false;
3435 
3436   ++_calls;
3437 
3438   // Set up the bitmap and oop closures. Anything that uses them is
3439   // eventually called from this method, so it is OK to allocate these
3440   // statically.
3441   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3442   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3443   set_cm_oop_closure(&cm_oop_closure);
3444 
3445   if (_cm->has_overflown()) {
3446     // This can happen if the mark stack overflows during a GC pause
3447     // and this task, after a yield point, restarts. We have to abort
3448     // as we need to get into the overflow protocol which happens
3449     // right at the end of this task.
3450     set_has_aborted();
3451   }
3452 
3453   // First drain any available SATB buffers. After this, we will not
3454   // look at SATB buffers before the next invocation of this method.
3455   // If enough completed SATB buffers are queued up, the regular clock
3456   // will abort this task so that it restarts.
3457   drain_satb_buffers();
3458   // ...then partially drain the local queue and the global stack
3459   drain_local_queue(true);
3460   drain_global_stack(true);
3461 
3462   do {
3463     if (!has_aborted() && _curr_region != NULL) {
3464       // This means that we're already holding on to a region.
3465       assert(_finger != NULL, "if region is not NULL, then the finger "
3466              "should not be NULL either");
3467 
3468       // We might have restarted this task after an evacuation pause
3469       // which might have evacuated the region we're holding on to
3470       // underneath our feet. Let's read its limit again to make sure
3471       // that we do not iterate over a region of the heap that
3472       // contains garbage (update_region_limit() will also move
3473       // _finger to the start of the region if it is found empty).
3474       update_region_limit();
3475       // We will start from _finger not from the start of the region,
3476       // as we might be restarting this task after aborting half-way
3477       // through scanning this region. In this case, _finger points to
3478       // the address where we last found a marked object. If this is a
3479       // fresh region, _finger points to start().
3480       MemRegion mr = MemRegion(_finger, _region_limit);
3481 
3482       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3483              "humongous regions should go around loop once only");
3484 
3485       // Some special cases:
3486       // If the memory region is empty, we can just give up the region.
3487       // If the current region is humongous then we only need to check
3488       // the bitmap for the bit associated with the start of the object,
3489       // scan the object if it's live, and give up the region.
3490       // Otherwise, let's iterate over the bitmap of the part of the region
3491       // that is left.
3492       // If the iteration is successful, give up the region.
3493       if (mr.is_empty()) {
3494         giveup_current_region();
3495         regular_clock_call();
3496       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3497         if (_nextMarkBitMap->isMarked(mr.start())) {
3498           // The object is marked - apply the closure
3499           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3500           bitmap_closure.do_bit(offset);
3501         }
3502         // Even if this task aborted while scanning the humongous object
3503         // we can (and should) give up the current region.
3504         giveup_current_region();
3505         regular_clock_call();
3506       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3507         giveup_current_region();
3508         regular_clock_call();
3509       } else {
3510         assert(has_aborted(), "currently the only way to do so");
3511         // The only way to abort the bitmap iteration is to return
3512         // false from the do_bit() method. However, inside the
3513         // do_bit() method we move the _finger to point to the
3514         // object currently being looked at. So, if we bail out, we
3515         // have definitely set _finger to something non-null.
3516         assert(_finger != NULL, "invariant");
3517 
3518         // Region iteration was actually aborted. So now _finger
3519         // points to the address of the object we last scanned. If we
3520         // leave it there, when we restart this task, we will rescan
3521         // the object. It is easy to avoid this. We move the finger by
3522         // enough to point to the next possible object header (the
3523         // bitmap knows by how much we need to move it as it knows its
3524         // granularity).
3525         assert(_finger < _region_limit, "invariant");
3526         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3527         // Check if bitmap iteration was aborted while scanning the last object
3528         if (new_finger >= _region_limit) {
3529           giveup_current_region();
3530         } else {
3531           move_finger_to(new_finger);
3532         }
3533       }
3534     }
3535     // At this point we have either completed iterating over the
3536     // region we were holding on to, or we have aborted.
3537 
3538     // We then partially drain the local queue and the global stack.
3539     // (Do we really need this?)
3540     drain_local_queue(true);
3541     drain_global_stack(true);
3542 
3543     // Read the note on the claim_region() method on why it might
3544     // return NULL with potentially more regions available for
3545     // claiming and why we have to check out_of_regions() to determine
3546     // whether we're done or not.
3547     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3548       // We are going to try to claim a new region. We should have
3549       // given up on the previous one.
3550       // Separated the asserts so that we know which one fires.
3551       assert(_curr_region  == NULL, "invariant");
3552       assert(_finger       == NULL, "invariant");
3553       assert(_region_limit == NULL, "invariant");
3554       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3555       if (claimed_region != NULL) {
3556         // Yes, we managed to claim one
3557         setup_for_region(claimed_region);
3558         assert(_curr_region == claimed_region, "invariant");
3559       }
3560       // It is important to call the regular clock here. It might take
3561       // a while to claim a region if, for example, we hit a large
3562       // block of empty regions. So we need to call the regular clock
3563       // method once round the loop to make sure it's called
3564       // frequently enough.
3565       regular_clock_call();
3566     }
3567 
3568     if (!has_aborted() && _curr_region == NULL) {
3569       assert(_cm->out_of_regions(),
3570              "at this point we should be out of regions");
3571     }
3572   } while ( _curr_region != NULL && !has_aborted());
3573 
3574   if (!has_aborted()) {
3575     // We cannot check whether the global stack is empty, since other
3576     // tasks might be pushing objects to it concurrently.
3577     assert(_cm->out_of_regions(),
3578            "at this point we should be out of regions");
3579     // Try to reduce the number of available SATB buffers so that
3580     // remark has less work to do.
3581     drain_satb_buffers();
3582   }
3583 
3584   // Since we've done everything else, we can now totally drain the
3585   // local queue and global stack.
3586   drain_local_queue(false);
3587   drain_global_stack(false);
3588 
3589   // Attempt at work stealing from other task's queues.
3590   if (do_stealing && !has_aborted()) {
3591     // We have not aborted. This means that we have finished all that
3592     // we could. Let's try to do some stealing...
3593 
3594     // We cannot check whether the global stack is empty, since other
3595     // tasks might be pushing objects to it concurrently.
3596     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3597            "only way to reach here");
3598     while (!has_aborted()) {
3599       oop obj;
3600       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3601         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3602                "any stolen object should be marked");
3603         scan_object(obj);
3604 
3605         // And since we're towards the end, let's totally drain the
3606         // local queue and global stack.
3607         drain_local_queue(false);
3608         drain_global_stack(false);
3609       } else {
3610         break;
3611       }
3612     }
3613   }
3614 
3615   // If we are about to wrap up and go into termination, check if we
3616   // should raise the overflow flag.
3617   if (do_termination && !has_aborted()) {
3618     if (_cm->force_overflow()->should_force()) {
3619       _cm->set_has_overflown();
3620       regular_clock_call();
3621     }
3622   }
3623 
3624   // We still haven't aborted. Now, let's try to get into the
3625   // termination protocol.
3626   if (do_termination && !has_aborted()) {
3627     // We cannot check whether the global stack is empty, since other
3628     // tasks might be concurrently pushing objects on it.
3629     // Separated the asserts so that we know which one fires.
3630     assert(_cm->out_of_regions(), "only way to reach here");
3631     assert(_task_queue->size() == 0, "only way to reach here");
3632     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3633 
3634     // The CMTask class also extends the TerminatorTerminator class,
3635     // hence its should_exit_termination() method will also decide
3636     // whether to exit the termination protocol or not.
3637     bool finished = (is_serial ||
3638                      _cm->terminator()->offer_termination(this));
3639     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3640     _termination_time_ms +=
3641       termination_end_time_ms - _termination_start_time_ms;
3642 
3643     if (finished) {
3644       // We're all done.
3645 
3646       if (_worker_id == 0) {
3647         // let's allow task 0 to do this
3648         if (concurrent()) {
3649           assert(_cm->concurrent_marking_in_progress(), "invariant");
3650           // we need to set this to false before the next
3651           // safepoint. This way we ensure that the marking phase
3652           // doesn't observe any more heap expansions.
3653           _cm->clear_concurrent_marking_in_progress();
3654         }
3655       }
3656 
3657       // We can now guarantee that the global stack is empty, since
3658       // all other tasks have finished. We separated the guarantees so
3659       // that, if a condition is false, we can immediately find out
3660       // which one.
3661       guarantee(_cm->out_of_regions(), "only way to reach here");
3662       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3663       guarantee(_task_queue->size() == 0, "only way to reach here");
3664       guarantee(!_cm->has_overflown(), "only way to reach here");
3665       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3666     } else {
3667       // Apparently there's more work to do. Let's abort this task. It
3668       // will restart it and we can hopefully find more things to do.
3669       set_has_aborted();
3670     }
3671   }
3672 
3673   // Mainly for debugging purposes to make sure that a pointer to the
3674   // closure which was statically allocated in this frame doesn't
3675   // escape it by accident.
3676   set_cm_oop_closure(NULL);
3677   double end_time_ms = os::elapsedVTime() * 1000.0;
3678   double elapsed_time_ms = end_time_ms - _start_time_ms;
3679   // Update the step history.
3680   _step_times_ms.add(elapsed_time_ms);
3681 
3682   if (has_aborted()) {
3683     // The task was aborted for some reason.
3684     if (_has_timed_out) {
3685       double diff_ms = elapsed_time_ms - _time_target_ms;
3686       // Keep statistics of how well we did with respect to hitting
3687       // our target only if we actually timed out (if we aborted for
3688       // other reasons, then the results might get skewed).
3689       _marking_step_diffs_ms.add(diff_ms);
3690     }
3691 
3692     if (_cm->has_overflown()) {
3693       // This is the interesting one. We aborted because a global
3694       // overflow was raised. This means we have to restart the
3695       // marking phase and start iterating over regions. However, in
3696       // order to do this we have to make sure that all tasks stop
3697       // what they are doing and re-initialize in a safe manner. We
3698       // will achieve this with the use of two barrier sync points.
3699 
3700       if (!is_serial) {
3701         // We only need to enter the sync barrier if being called
3702         // from a parallel context
3703         _cm->enter_first_sync_barrier(_worker_id);
3704 
3705         // When we exit this sync barrier we know that all tasks have
3706         // stopped doing marking work. So, it's now safe to
3707         // re-initialize our data structures. At the end of this method,
3708         // task 0 will clear the global data structures.
3709       }
3710 
3711       // We clear the local state of this task...
3712       clear_region_fields();
3713 
3714       if (!is_serial) {
3715         // ...and enter the second barrier.
3716         _cm->enter_second_sync_barrier(_worker_id);
3717       }
3718       // At this point, if we're during the concurrent phase of
3719       // marking, everything has been re-initialized and we're
3720       // ready to restart.
3721     }
3722   }
3723 
3724   _claimed = false;
3725 }
3726 
3727 CMTask::CMTask(uint worker_id,
3728                ConcurrentMark* cm,
3729                size_t* marked_bytes,
3730                BitMap* card_bm,
3731                CMTaskQueue* task_queue,
3732                CMTaskQueueSet* task_queues)
3733   : _g1h(G1CollectedHeap::heap()),
3734     _worker_id(worker_id), _cm(cm),
3735     _claimed(false),
3736     _nextMarkBitMap(NULL), _hash_seed(17),
3737     _task_queue(task_queue),
3738     _task_queues(task_queues),
3739     _cm_oop_closure(NULL),
3740     _marked_bytes_array(marked_bytes),
3741     _card_bm(card_bm) {
3742   guarantee(task_queue != NULL, "invariant");
3743   guarantee(task_queues != NULL, "invariant");
3744 
3745   _marking_step_diffs_ms.add(0.5);
3746 }
3747 
3748 // These are formatting macros that are used below to ensure
3749 // consistent formatting. The *_H_* versions are used to format the
3750 // header for a particular value and they should be kept consistent
3751 // with the corresponding macro. Also note that most of the macros add
3752 // the necessary white space (as a prefix) which makes them a bit
3753 // easier to compose.
3754 
3755 // All the output lines are prefixed with this string to be able to
3756 // identify them easily in a large log file.
3757 #define G1PPRL_LINE_PREFIX            "###"
3758 
3759 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3760 #ifdef _LP64
3761 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3762 #else // _LP64
3763 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3764 #endif // _LP64
3765 
3766 // For per-region info
3767 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3768 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3769 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3770 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3771 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3772 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3773 
3774 // For summary info
3775 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3776 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3777 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3778 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3779 
3780 G1PrintRegionLivenessInfoClosure::
3781 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3782   : _out(out),
3783     _total_used_bytes(0), _total_capacity_bytes(0),
3784     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3785     _hum_used_bytes(0), _hum_capacity_bytes(0),
3786     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3787     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3788   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3789   MemRegion g1_reserved = g1h->g1_reserved();
3790   double now = os::elapsedTime();
3791 
3792   // Print the header of the output.
3793   _out->cr();
3794   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3795   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3796                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3797                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3798                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3799                  HeapRegion::GrainBytes);
3800   _out->print_cr(G1PPRL_LINE_PREFIX);
3801   _out->print_cr(G1PPRL_LINE_PREFIX
3802                 G1PPRL_TYPE_H_FORMAT
3803                 G1PPRL_ADDR_BASE_H_FORMAT
3804                 G1PPRL_BYTE_H_FORMAT
3805                 G1PPRL_BYTE_H_FORMAT
3806                 G1PPRL_BYTE_H_FORMAT
3807                 G1PPRL_DOUBLE_H_FORMAT
3808                 G1PPRL_BYTE_H_FORMAT
3809                 G1PPRL_BYTE_H_FORMAT,
3810                 "type", "address-range",
3811                 "used", "prev-live", "next-live", "gc-eff",
3812                 "remset", "code-roots");
3813   _out->print_cr(G1PPRL_LINE_PREFIX
3814                 G1PPRL_TYPE_H_FORMAT
3815                 G1PPRL_ADDR_BASE_H_FORMAT
3816                 G1PPRL_BYTE_H_FORMAT
3817                 G1PPRL_BYTE_H_FORMAT
3818                 G1PPRL_BYTE_H_FORMAT
3819                 G1PPRL_DOUBLE_H_FORMAT
3820                 G1PPRL_BYTE_H_FORMAT
3821                 G1PPRL_BYTE_H_FORMAT,
3822                 "", "",
3823                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3824                 "(bytes)", "(bytes)");
3825 }
3826 
3827 // It takes as a parameter a reference to one of the _hum_* fields, it
3828 // deduces the corresponding value for a region in a humongous region
3829 // series (either the region size, or what's left if the _hum_* field
3830 // is < the region size), and updates the _hum_* field accordingly.
3831 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3832   size_t bytes = 0;
3833   // The > 0 check is to deal with the prev and next live bytes which
3834   // could be 0.
3835   if (*hum_bytes > 0) {
3836     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3837     *hum_bytes -= bytes;
3838   }
3839   return bytes;
3840 }
3841 
3842 // It deduces the values for a region in a humongous region series
3843 // from the _hum_* fields and updates those accordingly. It assumes
3844 // that that _hum_* fields have already been set up from the "starts
3845 // humongous" region and we visit the regions in address order.
3846 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3847                                                      size_t* capacity_bytes,
3848                                                      size_t* prev_live_bytes,
3849                                                      size_t* next_live_bytes) {
3850   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3851   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3852   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3853   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3854   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3855 }
3856 
3857 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3858   const char* type       = r->get_type_str();
3859   HeapWord* bottom       = r->bottom();
3860   HeapWord* end          = r->end();
3861   size_t capacity_bytes  = r->capacity();
3862   size_t used_bytes      = r->used();
3863   size_t prev_live_bytes = r->live_bytes();
3864   size_t next_live_bytes = r->next_live_bytes();
3865   double gc_eff          = r->gc_efficiency();
3866   size_t remset_bytes    = r->rem_set()->mem_size();
3867   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3868 
3869   if (r->is_starts_humongous()) {
3870     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3871            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3872            "they should have been zeroed after the last time we used them");
3873     // Set up the _hum_* fields.
3874     _hum_capacity_bytes  = capacity_bytes;
3875     _hum_used_bytes      = used_bytes;
3876     _hum_prev_live_bytes = prev_live_bytes;
3877     _hum_next_live_bytes = next_live_bytes;
3878     get_hum_bytes(&used_bytes, &capacity_bytes,
3879                   &prev_live_bytes, &next_live_bytes);
3880     end = bottom + HeapRegion::GrainWords;
3881   } else if (r->is_continues_humongous()) {
3882     get_hum_bytes(&used_bytes, &capacity_bytes,
3883                   &prev_live_bytes, &next_live_bytes);
3884     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3885   }
3886 
3887   _total_used_bytes      += used_bytes;
3888   _total_capacity_bytes  += capacity_bytes;
3889   _total_prev_live_bytes += prev_live_bytes;
3890   _total_next_live_bytes += next_live_bytes;
3891   _total_remset_bytes    += remset_bytes;
3892   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3893 
3894   // Print a line for this particular region.
3895   _out->print_cr(G1PPRL_LINE_PREFIX
3896                  G1PPRL_TYPE_FORMAT
3897                  G1PPRL_ADDR_BASE_FORMAT
3898                  G1PPRL_BYTE_FORMAT
3899                  G1PPRL_BYTE_FORMAT
3900                  G1PPRL_BYTE_FORMAT
3901                  G1PPRL_DOUBLE_FORMAT
3902                  G1PPRL_BYTE_FORMAT
3903                  G1PPRL_BYTE_FORMAT,
3904                  type, p2i(bottom), p2i(end),
3905                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3906                  remset_bytes, strong_code_roots_bytes);
3907 
3908   return false;
3909 }
3910 
3911 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3912   // add static memory usages to remembered set sizes
3913   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3914   // Print the footer of the output.
3915   _out->print_cr(G1PPRL_LINE_PREFIX);
3916   _out->print_cr(G1PPRL_LINE_PREFIX
3917                  " SUMMARY"
3918                  G1PPRL_SUM_MB_FORMAT("capacity")
3919                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3920                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3921                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3922                  G1PPRL_SUM_MB_FORMAT("remset")
3923                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3924                  bytes_to_mb(_total_capacity_bytes),
3925                  bytes_to_mb(_total_used_bytes),
3926                  perc(_total_used_bytes, _total_capacity_bytes),
3927                  bytes_to_mb(_total_prev_live_bytes),
3928                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3929                  bytes_to_mb(_total_next_live_bytes),
3930                  perc(_total_next_live_bytes, _total_capacity_bytes),
3931                  bytes_to_mb(_total_remset_bytes),
3932                  bytes_to_mb(_total_strong_code_roots_bytes));
3933   _out->cr();
3934 }