1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1RemSet.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionManager.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 CMBitMapRO::CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   if (limit == NULL) {
  77     limit = _bmStartWord + _bmWordSize;
  78   }
  79   size_t limitOffset = heapWordToOffset(limit);
  80   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  81   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  82   assert(nextAddr >= addr, "get_next_one postcondition");
  83   assert(nextAddr == limit || isMarked(nextAddr),
  84          "get_next_one postcondition");
  85   return nextAddr;
  86 }
  87 
  88 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  89                                                  const HeapWord* limit) const {
  90   size_t addrOffset = heapWordToOffset(addr);
  91   if (limit == NULL) {
  92     limit = _bmStartWord + _bmWordSize;
  93   }
  94   size_t limitOffset = heapWordToOffset(limit);
  95   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  96   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  97   assert(nextAddr >= addr, "get_next_one postcondition");
  98   assert(nextAddr == limit || !isMarked(nextAddr),
  99          "get_next_one postcondition");
 100   return nextAddr;
 101 }
 102 
 103 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 104   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 105   return (int) (diff >> _shifter);
 106 }
 107 
 108 #ifndef PRODUCT
 109 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 110   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 111   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 112          "size inconsistency");
 113   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 114          _bmWordSize  == heap_rs.word_size();
 115 }
 116 #endif
 117 
 118 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 119   _bm.print_on_error(st, prefix);
 120 }
 121 
 122 size_t CMBitMap::compute_size(size_t heap_size) {
 123   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 124 }
 125 
 126 size_t CMBitMap::mark_distance() {
 127   return MinObjAlignmentInBytes * BitsPerByte;
 128 }
 129 
 130 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 131   _bmStartWord = heap.start();
 132   _bmWordSize = heap.word_size();
 133 
 134   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 135   _bm.set_size(_bmWordSize >> _shifter);
 136 
 137   storage->set_mapping_changed_listener(&_listener);
 138 }
 139 
 140 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 141   if (zero_filled) {
 142     return;
 143   }
 144   // We need to clear the bitmap on commit, removing any existing information.
 145   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 146   _bm->clearRange(mr);
 147 }
 148 
 149 // Closure used for clearing the given mark bitmap.
 150 class ClearBitmapHRClosure : public HeapRegionClosure {
 151  private:
 152   ConcurrentMark* _cm;
 153   CMBitMap* _bitmap;
 154   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 155  public:
 156   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 157     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 158   }
 159 
 160   virtual bool doHeapRegion(HeapRegion* r) {
 161     size_t const chunk_size_in_words = M / HeapWordSize;
 162 
 163     HeapWord* cur = r->bottom();
 164     HeapWord* const end = r->end();
 165 
 166     while (cur < end) {
 167       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 168       _bitmap->clearRange(mr);
 169 
 170       cur += chunk_size_in_words;
 171 
 172       // Abort iteration if after yielding the marking has been aborted.
 173       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 174         return true;
 175       }
 176       // Repeat the asserts from before the start of the closure. We will do them
 177       // as asserts here to minimize their overhead on the product. However, we
 178       // will have them as guarantees at the beginning / end of the bitmap
 179       // clearing to get some checking in the product.
 180       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 181       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 182     }
 183 
 184     return false;
 185   }
 186 };
 187 
 188 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 189   ClearBitmapHRClosure* _cl;
 190   HeapRegionClaimer     _hrclaimer;
 191   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 192 
 193 public:
 194   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 195       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 196 
 197   void work(uint worker_id) {
 198     SuspendibleThreadSetJoiner sts_join(_suspendible);
 199     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 200   }
 201 };
 202 
 203 void CMBitMap::clearAll() {
 204   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 205   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 206   uint n_workers = g1h->workers()->active_workers();
 207   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 208   g1h->workers()->run_task(&task);
 209   guarantee(cl.complete(), "Must have completed iteration.");
 210   return;
 211 }
 212 
 213 void CMBitMap::markRange(MemRegion mr) {
 214   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 215   assert(!mr.is_empty(), "unexpected empty region");
 216   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 217           ((HeapWord *) mr.end())),
 218          "markRange memory region end is not card aligned");
 219   // convert address range into offset range
 220   _bm.at_put_range(heapWordToOffset(mr.start()),
 221                    heapWordToOffset(mr.end()), true);
 222 }
 223 
 224 void CMBitMap::clearRange(MemRegion mr) {
 225   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 226   assert(!mr.is_empty(), "unexpected empty region");
 227   // convert address range into offset range
 228   _bm.at_put_range(heapWordToOffset(mr.start()),
 229                    heapWordToOffset(mr.end()), false);
 230 }
 231 
 232 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 233                                             HeapWord* end_addr) {
 234   HeapWord* start = getNextMarkedWordAddress(addr);
 235   start = MIN2(start, end_addr);
 236   HeapWord* end   = getNextUnmarkedWordAddress(start);
 237   end = MIN2(end, end_addr);
 238   assert(start <= end, "Consistency check");
 239   MemRegion mr(start, end);
 240   if (!mr.is_empty()) {
 241     clearRange(mr);
 242   }
 243   return mr;
 244 }
 245 
 246 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 247   _base(NULL), _cm(cm)
 248 {}
 249 
 250 bool CMMarkStack::allocate(size_t capacity) {
 251   // allocate a stack of the requisite depth
 252   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 253   if (!rs.is_reserved()) {
 254     warning("ConcurrentMark MarkStack allocation failure");
 255     return false;
 256   }
 257   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 258   if (!_virtual_space.initialize(rs, rs.size())) {
 259     warning("ConcurrentMark MarkStack backing store failure");
 260     // Release the virtual memory reserved for the marking stack
 261     rs.release();
 262     return false;
 263   }
 264   assert(_virtual_space.committed_size() == rs.size(),
 265          "Didn't reserve backing store for all of ConcurrentMark stack?");
 266   _base = (oop*) _virtual_space.low();
 267   setEmpty();
 268   _capacity = (jint) capacity;
 269   _saved_index = -1;
 270   _should_expand = false;
 271   return true;
 272 }
 273 
 274 void CMMarkStack::expand() {
 275   // Called, during remark, if we've overflown the marking stack during marking.
 276   assert(isEmpty(), "stack should been emptied while handling overflow");
 277   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 278   // Clear expansion flag
 279   _should_expand = false;
 280   if (_capacity == (jint) MarkStackSizeMax) {
 281     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 282     return;
 283   }
 284   // Double capacity if possible
 285   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 286   // Do not give up existing stack until we have managed to
 287   // get the double capacity that we desired.
 288   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 289                                                            sizeof(oop)));
 290   if (rs.is_reserved()) {
 291     // Release the backing store associated with old stack
 292     _virtual_space.release();
 293     // Reinitialize virtual space for new stack
 294     if (!_virtual_space.initialize(rs, rs.size())) {
 295       fatal("Not enough swap for expanded marking stack capacity");
 296     }
 297     _base = (oop*)(_virtual_space.low());
 298     _index = 0;
 299     _capacity = new_capacity;
 300   } else {
 301     // Failed to double capacity, continue;
 302     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 303                   _capacity / K, new_capacity / K);
 304   }
 305 }
 306 
 307 void CMMarkStack::set_should_expand() {
 308   // If we're resetting the marking state because of an
 309   // marking stack overflow, record that we should, if
 310   // possible, expand the stack.
 311   _should_expand = _cm->has_overflown();
 312 }
 313 
 314 CMMarkStack::~CMMarkStack() {
 315   if (_base != NULL) {
 316     _base = NULL;
 317     _virtual_space.release();
 318   }
 319 }
 320 
 321 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 322   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 323   jint start = _index;
 324   jint next_index = start + n;
 325   if (next_index > _capacity) {
 326     _overflow = true;
 327     return;
 328   }
 329   // Otherwise.
 330   _index = next_index;
 331   for (int i = 0; i < n; i++) {
 332     int ind = start + i;
 333     assert(ind < _capacity, "By overflow test above.");
 334     _base[ind] = ptr_arr[i];
 335   }
 336 }
 337 
 338 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 339   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 340   jint index = _index;
 341   if (index == 0) {
 342     *n = 0;
 343     return false;
 344   } else {
 345     int k = MIN2(max, index);
 346     jint  new_ind = index - k;
 347     for (int j = 0; j < k; j++) {
 348       ptr_arr[j] = _base[new_ind + j];
 349     }
 350     _index = new_ind;
 351     *n = k;
 352     return true;
 353   }
 354 }
 355 
 356 void CMMarkStack::note_start_of_gc() {
 357   assert(_saved_index == -1,
 358          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 359   _saved_index = _index;
 360 }
 361 
 362 void CMMarkStack::note_end_of_gc() {
 363   // This is intentionally a guarantee, instead of an assert. If we
 364   // accidentally add something to the mark stack during GC, it
 365   // will be a correctness issue so it's better if we crash. we'll
 366   // only check this once per GC anyway, so it won't be a performance
 367   // issue in any way.
 368   guarantee(_saved_index == _index,
 369             "saved index: %d index: %d", _saved_index, _index);
 370   _saved_index = -1;
 371 }
 372 
 373 CMRootRegions::CMRootRegions() :
 374   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 375   _should_abort(false),  _next_survivor(NULL) { }
 376 
 377 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 378   _young_list = g1h->young_list();
 379   _cm = cm;
 380 }
 381 
 382 void CMRootRegions::prepare_for_scan() {
 383   assert(!scan_in_progress(), "pre-condition");
 384 
 385   // Currently, only survivors can be root regions.
 386   assert(_next_survivor == NULL, "pre-condition");
 387   _next_survivor = _young_list->first_survivor_region();
 388   _scan_in_progress = (_next_survivor != NULL);
 389   _should_abort = false;
 390 }
 391 
 392 HeapRegion* CMRootRegions::claim_next() {
 393   if (_should_abort) {
 394     // If someone has set the should_abort flag, we return NULL to
 395     // force the caller to bail out of their loop.
 396     return NULL;
 397   }
 398 
 399   // Currently, only survivors can be root regions.
 400   HeapRegion* res = _next_survivor;
 401   if (res != NULL) {
 402     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 403     // Read it again in case it changed while we were waiting for the lock.
 404     res = _next_survivor;
 405     if (res != NULL) {
 406       if (res == _young_list->last_survivor_region()) {
 407         // We just claimed the last survivor so store NULL to indicate
 408         // that we're done.
 409         _next_survivor = NULL;
 410       } else {
 411         _next_survivor = res->get_next_young_region();
 412       }
 413     } else {
 414       // Someone else claimed the last survivor while we were trying
 415       // to take the lock so nothing else to do.
 416     }
 417   }
 418   assert(res == NULL || res->is_survivor(), "post-condition");
 419 
 420   return res;
 421 }
 422 
 423 void CMRootRegions::scan_finished() {
 424   assert(scan_in_progress(), "pre-condition");
 425 
 426   // Currently, only survivors can be root regions.
 427   if (!_should_abort) {
 428     assert(_next_survivor == NULL, "we should have claimed all survivors");
 429   }
 430   _next_survivor = NULL;
 431 
 432   {
 433     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 434     _scan_in_progress = false;
 435     RootRegionScan_lock->notify_all();
 436   }
 437 }
 438 
 439 bool CMRootRegions::wait_until_scan_finished() {
 440   if (!scan_in_progress()) return false;
 441 
 442   {
 443     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 444     while (scan_in_progress()) {
 445       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 446     }
 447   }
 448   return true;
 449 }
 450 
 451 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 452   return MAX2((n_par_threads + 2) / 4, 1U);
 453 }
 454 
 455 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 456   _g1h(g1h),
 457   _markBitMap1(),
 458   _markBitMap2(),
 459   _parallel_marking_threads(0),
 460   _max_parallel_marking_threads(0),
 461   _sleep_factor(0.0),
 462   _marking_task_overhead(1.0),
 463   _cleanup_sleep_factor(0.0),
 464   _cleanup_task_overhead(1.0),
 465   _cleanup_list("Cleanup List"),
 466   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 467   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 468             CardTableModRefBS::card_shift,
 469             false /* in_resource_area*/),
 470 
 471   _prevMarkBitMap(&_markBitMap1),
 472   _nextMarkBitMap(&_markBitMap2),
 473 
 474   _markStack(this),
 475   // _finger set in set_non_marking_state
 476 
 477   _max_worker_id(ParallelGCThreads),
 478   // _active_tasks set in set_non_marking_state
 479   // _tasks set inside the constructor
 480   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 481   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 482 
 483   _has_overflown(false),
 484   _concurrent(false),
 485   _has_aborted(false),
 486   _restart_for_overflow(false),
 487   _concurrent_marking_in_progress(false),
 488 
 489   // _verbose_level set below
 490 
 491   _init_times(),
 492   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 493   _cleanup_times(),
 494   _total_counting_time(0.0),
 495   _total_rs_scrub_time(0.0),
 496 
 497   _parallel_workers(NULL),
 498 
 499   _count_card_bitmaps(NULL),
 500   _count_marked_bytes(NULL),
 501   _completed_initialization(false) {
 502 
 503   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 504   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 505 
 506   // Create & start a ConcurrentMark thread.
 507   _cmThread = new ConcurrentMarkThread(this);
 508   assert(cmThread() != NULL, "CM Thread should have been created");
 509   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 510   if (_cmThread->osthread() == NULL) {
 511       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 512   }
 513 
 514   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 515   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 516   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 517 
 518   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 519   satb_qs.set_buffer_size(G1SATBBufferSize);
 520 
 521   _root_regions.init(_g1h, this);
 522 
 523   if (ConcGCThreads > ParallelGCThreads) {
 524     warning("Can't have more ConcGCThreads (%u) "
 525             "than ParallelGCThreads (%u).",
 526             ConcGCThreads, ParallelGCThreads);
 527     return;
 528   }
 529   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 530     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 531     // if both are set
 532     _sleep_factor             = 0.0;
 533     _marking_task_overhead    = 1.0;
 534   } else if (G1MarkingOverheadPercent > 0) {
 535     // We will calculate the number of parallel marking threads based
 536     // on a target overhead with respect to the soft real-time goal
 537     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 538     double overall_cm_overhead =
 539       (double) MaxGCPauseMillis * marking_overhead /
 540       (double) GCPauseIntervalMillis;
 541     double cpu_ratio = 1.0 / (double) os::processor_count();
 542     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 543     double marking_task_overhead =
 544       overall_cm_overhead / marking_thread_num *
 545                                               (double) os::processor_count();
 546     double sleep_factor =
 547                        (1.0 - marking_task_overhead) / marking_task_overhead;
 548 
 549     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 550     _sleep_factor             = sleep_factor;
 551     _marking_task_overhead    = marking_task_overhead;
 552   } else {
 553     // Calculate the number of parallel marking threads by scaling
 554     // the number of parallel GC threads.
 555     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 556     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 557     _sleep_factor             = 0.0;
 558     _marking_task_overhead    = 1.0;
 559   }
 560 
 561   assert(ConcGCThreads > 0, "Should have been set");
 562   _parallel_marking_threads = ConcGCThreads;
 563   _max_parallel_marking_threads = _parallel_marking_threads;
 564 
 565   if (parallel_marking_threads() > 1) {
 566     _cleanup_task_overhead = 1.0;
 567   } else {
 568     _cleanup_task_overhead = marking_task_overhead();
 569   }
 570   _cleanup_sleep_factor =
 571                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 572 
 573   _parallel_workers = new WorkGang("G1 Marker",
 574        _max_parallel_marking_threads, false, true);
 575   if (_parallel_workers == NULL) {
 576     vm_exit_during_initialization("Failed necessary allocation.");
 577   } else {
 578     _parallel_workers->initialize_workers();
 579   }
 580 
 581   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 582     size_t mark_stack_size =
 583       MIN2(MarkStackSizeMax,
 584           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 585     // Verify that the calculated value for MarkStackSize is in range.
 586     // It would be nice to use the private utility routine from Arguments.
 587     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 588       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 589               "must be between 1 and " SIZE_FORMAT,
 590               mark_stack_size, MarkStackSizeMax);
 591       return;
 592     }
 593     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 594   } else {
 595     // Verify MarkStackSize is in range.
 596     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 597       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 598         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 599           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 600                   "must be between 1 and " SIZE_FORMAT,
 601                   MarkStackSize, MarkStackSizeMax);
 602           return;
 603         }
 604       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 605         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 606           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 607                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 608                   MarkStackSize, MarkStackSizeMax);
 609           return;
 610         }
 611       }
 612     }
 613   }
 614 
 615   if (!_markStack.allocate(MarkStackSize)) {
 616     warning("Failed to allocate CM marking stack");
 617     return;
 618   }
 619 
 620   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 621   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 622 
 623   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 624   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 625 
 626   BitMap::idx_t card_bm_size = _card_bm.size();
 627 
 628   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 629   _active_tasks = _max_worker_id;
 630 
 631   uint max_regions = _g1h->max_regions();
 632   for (uint i = 0; i < _max_worker_id; ++i) {
 633     CMTaskQueue* task_queue = new CMTaskQueue();
 634     task_queue->initialize();
 635     _task_queues->register_queue(i, task_queue);
 636 
 637     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 638     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 639 
 640     _tasks[i] = new CMTask(i, this,
 641                            _count_marked_bytes[i],
 642                            &_count_card_bitmaps[i],
 643                            task_queue, _task_queues);
 644 
 645     _accum_task_vtime[i] = 0.0;
 646   }
 647 
 648   // Calculate the card number for the bottom of the heap. Used
 649   // in biasing indexes into the accounting card bitmaps.
 650   _heap_bottom_card_num =
 651     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 652                                 CardTableModRefBS::card_shift);
 653 
 654   // Clear all the liveness counting data
 655   clear_all_count_data();
 656 
 657   // so that the call below can read a sensible value
 658   _heap_start = g1h->reserved_region().start();
 659   set_non_marking_state();
 660   _completed_initialization = true;
 661 }
 662 
 663 void ConcurrentMark::reset() {
 664   // Starting values for these two. This should be called in a STW
 665   // phase.
 666   MemRegion reserved = _g1h->g1_reserved();
 667   _heap_start = reserved.start();
 668   _heap_end   = reserved.end();
 669 
 670   // Separated the asserts so that we know which one fires.
 671   assert(_heap_start != NULL, "heap bounds should look ok");
 672   assert(_heap_end != NULL, "heap bounds should look ok");
 673   assert(_heap_start < _heap_end, "heap bounds should look ok");
 674 
 675   // Reset all the marking data structures and any necessary flags
 676   reset_marking_state();
 677 
 678   // We do reset all of them, since different phases will use
 679   // different number of active threads. So, it's easiest to have all
 680   // of them ready.
 681   for (uint i = 0; i < _max_worker_id; ++i) {
 682     _tasks[i]->reset(_nextMarkBitMap);
 683   }
 684 
 685   // we need this to make sure that the flag is on during the evac
 686   // pause with initial mark piggy-backed
 687   set_concurrent_marking_in_progress();
 688 }
 689 
 690 
 691 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 692   _markStack.set_should_expand();
 693   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 694   if (clear_overflow) {
 695     clear_has_overflown();
 696   } else {
 697     assert(has_overflown(), "pre-condition");
 698   }
 699   _finger = _heap_start;
 700 
 701   for (uint i = 0; i < _max_worker_id; ++i) {
 702     CMTaskQueue* queue = _task_queues->queue(i);
 703     queue->set_empty();
 704   }
 705 }
 706 
 707 void ConcurrentMark::set_concurrency(uint active_tasks) {
 708   assert(active_tasks <= _max_worker_id, "we should not have more");
 709 
 710   _active_tasks = active_tasks;
 711   // Need to update the three data structures below according to the
 712   // number of active threads for this phase.
 713   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 714   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 715   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 716 }
 717 
 718 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 719   set_concurrency(active_tasks);
 720 
 721   _concurrent = concurrent;
 722   // We propagate this to all tasks, not just the active ones.
 723   for (uint i = 0; i < _max_worker_id; ++i)
 724     _tasks[i]->set_concurrent(concurrent);
 725 
 726   if (concurrent) {
 727     set_concurrent_marking_in_progress();
 728   } else {
 729     // We currently assume that the concurrent flag has been set to
 730     // false before we start remark. At this point we should also be
 731     // in a STW phase.
 732     assert(!concurrent_marking_in_progress(), "invariant");
 733     assert(out_of_regions(),
 734            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 735            p2i(_finger), p2i(_heap_end));
 736   }
 737 }
 738 
 739 void ConcurrentMark::set_non_marking_state() {
 740   // We set the global marking state to some default values when we're
 741   // not doing marking.
 742   reset_marking_state();
 743   _active_tasks = 0;
 744   clear_concurrent_marking_in_progress();
 745 }
 746 
 747 ConcurrentMark::~ConcurrentMark() {
 748   // The ConcurrentMark instance is never freed.
 749   ShouldNotReachHere();
 750 }
 751 
 752 void ConcurrentMark::clearNextBitmap() {
 753   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 754 
 755   // Make sure that the concurrent mark thread looks to still be in
 756   // the current cycle.
 757   guarantee(cmThread()->during_cycle(), "invariant");
 758 
 759   // We are finishing up the current cycle by clearing the next
 760   // marking bitmap and getting it ready for the next cycle. During
 761   // this time no other cycle can start. So, let's make sure that this
 762   // is the case.
 763   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 764 
 765   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 766   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 767   _parallel_workers->run_task(&task);
 768 
 769   // Clear the liveness counting data. If the marking has been aborted, the abort()
 770   // call already did that.
 771   if (cl.complete()) {
 772     clear_all_count_data();
 773   }
 774 
 775   // Repeat the asserts from above.
 776   guarantee(cmThread()->during_cycle(), "invariant");
 777   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 778 }
 779 
 780 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 781   CMBitMap* _bitmap;
 782   bool _error;
 783  public:
 784   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 785   }
 786 
 787   virtual bool doHeapRegion(HeapRegion* r) {
 788     // This closure can be called concurrently to the mutator, so we must make sure
 789     // that the result of the getNextMarkedWordAddress() call is compared to the
 790     // value passed to it as limit to detect any found bits.
 791     // end never changes in G1.
 792     HeapWord* end = r->end();
 793     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 794   }
 795 };
 796 
 797 bool ConcurrentMark::nextMarkBitmapIsClear() {
 798   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 799   _g1h->heap_region_iterate(&cl);
 800   return cl.complete();
 801 }
 802 
 803 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 804 public:
 805   bool doHeapRegion(HeapRegion* r) {
 806     r->note_start_of_marking();
 807     return false;
 808   }
 809 };
 810 
 811 void ConcurrentMark::checkpointRootsInitialPre() {
 812   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 813   G1CollectorPolicy* g1p = g1h->g1_policy();
 814 
 815   _has_aborted = false;
 816 
 817   // Initialize marking structures. This has to be done in a STW phase.
 818   reset();
 819 
 820   // For each region note start of marking.
 821   NoteStartOfMarkHRClosure startcl;
 822   g1h->heap_region_iterate(&startcl);
 823 }
 824 
 825 
 826 void ConcurrentMark::checkpointRootsInitialPost() {
 827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 828 
 829   // If we force an overflow during remark, the remark operation will
 830   // actually abort and we'll restart concurrent marking. If we always
 831   // force an overflow during remark we'll never actually complete the
 832   // marking phase. So, we initialize this here, at the start of the
 833   // cycle, so that at the remaining overflow number will decrease at
 834   // every remark and we'll eventually not need to cause one.
 835   force_overflow_stw()->init();
 836 
 837   // Start Concurrent Marking weak-reference discovery.
 838   ReferenceProcessor* rp = g1h->ref_processor_cm();
 839   // enable ("weak") refs discovery
 840   rp->enable_discovery();
 841   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 842 
 843   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 844   // This is the start of  the marking cycle, we're expected all
 845   // threads to have SATB queues with active set to false.
 846   satb_mq_set.set_active_all_threads(true, /* new active value */
 847                                      false /* expected_active */);
 848 
 849   _root_regions.prepare_for_scan();
 850 
 851   // update_g1_committed() will be called at the end of an evac pause
 852   // when marking is on. So, it's also called at the end of the
 853   // initial-mark pause to update the heap end, if the heap expands
 854   // during it. No need to call it here.
 855 }
 856 
 857 /*
 858  * Notice that in the next two methods, we actually leave the STS
 859  * during the barrier sync and join it immediately afterwards. If we
 860  * do not do this, the following deadlock can occur: one thread could
 861  * be in the barrier sync code, waiting for the other thread to also
 862  * sync up, whereas another one could be trying to yield, while also
 863  * waiting for the other threads to sync up too.
 864  *
 865  * Note, however, that this code is also used during remark and in
 866  * this case we should not attempt to leave / enter the STS, otherwise
 867  * we'll either hit an assert (debug / fastdebug) or deadlock
 868  * (product). So we should only leave / enter the STS if we are
 869  * operating concurrently.
 870  *
 871  * Because the thread that does the sync barrier has left the STS, it
 872  * is possible to be suspended for a Full GC or an evacuation pause
 873  * could occur. This is actually safe, since the entering the sync
 874  * barrier is one of the last things do_marking_step() does, and it
 875  * doesn't manipulate any data structures afterwards.
 876  */
 877 
 878 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 879   bool barrier_aborted;
 880   {
 881     SuspendibleThreadSetLeaver sts_leave(concurrent());
 882     barrier_aborted = !_first_overflow_barrier_sync.enter();
 883   }
 884 
 885   // at this point everyone should have synced up and not be doing any
 886   // more work
 887 
 888   if (barrier_aborted) {
 889     // If the barrier aborted we ignore the overflow condition and
 890     // just abort the whole marking phase as quickly as possible.
 891     return;
 892   }
 893 
 894   // If we're executing the concurrent phase of marking, reset the marking
 895   // state; otherwise the marking state is reset after reference processing,
 896   // during the remark pause.
 897   // If we reset here as a result of an overflow during the remark we will
 898   // see assertion failures from any subsequent set_concurrency_and_phase()
 899   // calls.
 900   if (concurrent()) {
 901     // let the task associated with with worker 0 do this
 902     if (worker_id == 0) {
 903       // task 0 is responsible for clearing the global data structures
 904       // We should be here because of an overflow. During STW we should
 905       // not clear the overflow flag since we rely on it being true when
 906       // we exit this method to abort the pause and restart concurrent
 907       // marking.
 908       reset_marking_state(true /* clear_overflow */);
 909       force_overflow()->update();
 910 
 911       log_info(gc)("Concurrent Mark reset for overflow");
 912     }
 913   }
 914 
 915   // after this, each task should reset its own data structures then
 916   // then go into the second barrier
 917 }
 918 
 919 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 920   SuspendibleThreadSetLeaver sts_leave(concurrent());
 921   _second_overflow_barrier_sync.enter();
 922 
 923   // at this point everything should be re-initialized and ready to go
 924 }
 925 
 926 #ifndef PRODUCT
 927 void ForceOverflowSettings::init() {
 928   _num_remaining = G1ConcMarkForceOverflow;
 929   _force = false;
 930   update();
 931 }
 932 
 933 void ForceOverflowSettings::update() {
 934   if (_num_remaining > 0) {
 935     _num_remaining -= 1;
 936     _force = true;
 937   } else {
 938     _force = false;
 939   }
 940 }
 941 
 942 bool ForceOverflowSettings::should_force() {
 943   if (_force) {
 944     _force = false;
 945     return true;
 946   } else {
 947     return false;
 948   }
 949 }
 950 #endif // !PRODUCT
 951 
 952 class CMConcurrentMarkingTask: public AbstractGangTask {
 953 private:
 954   ConcurrentMark*       _cm;
 955   ConcurrentMarkThread* _cmt;
 956 
 957 public:
 958   void work(uint worker_id) {
 959     assert(Thread::current()->is_ConcurrentGC_thread(),
 960            "this should only be done by a conc GC thread");
 961     ResourceMark rm;
 962 
 963     double start_vtime = os::elapsedVTime();
 964 
 965     {
 966       SuspendibleThreadSetJoiner sts_join;
 967 
 968       assert(worker_id < _cm->active_tasks(), "invariant");
 969       CMTask* the_task = _cm->task(worker_id);
 970       the_task->record_start_time();
 971       if (!_cm->has_aborted()) {
 972         do {
 973           double start_vtime_sec = os::elapsedVTime();
 974           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 975 
 976           the_task->do_marking_step(mark_step_duration_ms,
 977                                     true  /* do_termination */,
 978                                     false /* is_serial*/);
 979 
 980           double end_vtime_sec = os::elapsedVTime();
 981           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 982           _cm->clear_has_overflown();
 983 
 984           _cm->do_yield_check(worker_id);
 985 
 986           jlong sleep_time_ms;
 987           if (!_cm->has_aborted() && the_task->has_aborted()) {
 988             sleep_time_ms =
 989               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 990             {
 991               SuspendibleThreadSetLeaver sts_leave;
 992               os::sleep(Thread::current(), sleep_time_ms, false);
 993             }
 994           }
 995         } while (!_cm->has_aborted() && the_task->has_aborted());
 996       }
 997       the_task->record_end_time();
 998       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 999     }
1000 
1001     double end_vtime = os::elapsedVTime();
1002     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1003   }
1004 
1005   CMConcurrentMarkingTask(ConcurrentMark* cm,
1006                           ConcurrentMarkThread* cmt) :
1007       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1008 
1009   ~CMConcurrentMarkingTask() { }
1010 };
1011 
1012 // Calculates the number of active workers for a concurrent
1013 // phase.
1014 uint ConcurrentMark::calc_parallel_marking_threads() {
1015   uint n_conc_workers = 0;
1016   if (!UseDynamicNumberOfGCThreads ||
1017       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1018        !ForceDynamicNumberOfGCThreads)) {
1019     n_conc_workers = max_parallel_marking_threads();
1020   } else {
1021     n_conc_workers =
1022       AdaptiveSizePolicy::calc_default_active_workers(
1023                                    max_parallel_marking_threads(),
1024                                    1, /* Minimum workers */
1025                                    parallel_marking_threads(),
1026                                    Threads::number_of_non_daemon_threads());
1027     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1028     // that scaling has already gone into "_max_parallel_marking_threads".
1029   }
1030   assert(n_conc_workers > 0, "Always need at least 1");
1031   return n_conc_workers;
1032 }
1033 
1034 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1035   // Currently, only survivors can be root regions.
1036   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1037   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1038 
1039   const uintx interval = PrefetchScanIntervalInBytes;
1040   HeapWord* curr = hr->bottom();
1041   const HeapWord* end = hr->top();
1042   while (curr < end) {
1043     Prefetch::read(curr, interval);
1044     oop obj = oop(curr);
1045     int size = obj->oop_iterate_size(&cl);
1046     assert(size == obj->size(), "sanity");
1047     curr += size;
1048   }
1049 }
1050 
1051 class CMRootRegionScanTask : public AbstractGangTask {
1052 private:
1053   ConcurrentMark* _cm;
1054 
1055 public:
1056   CMRootRegionScanTask(ConcurrentMark* cm) :
1057     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1058 
1059   void work(uint worker_id) {
1060     assert(Thread::current()->is_ConcurrentGC_thread(),
1061            "this should only be done by a conc GC thread");
1062 
1063     CMRootRegions* root_regions = _cm->root_regions();
1064     HeapRegion* hr = root_regions->claim_next();
1065     while (hr != NULL) {
1066       _cm->scanRootRegion(hr, worker_id);
1067       hr = root_regions->claim_next();
1068     }
1069   }
1070 };
1071 
1072 void ConcurrentMark::scanRootRegions() {
1073   // Start of concurrent marking.
1074   ClassLoaderDataGraph::clear_claimed_marks();
1075 
1076   // scan_in_progress() will have been set to true only if there was
1077   // at least one root region to scan. So, if it's false, we
1078   // should not attempt to do any further work.
1079   if (root_regions()->scan_in_progress()) {
1080     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
1081 
1082     _parallel_marking_threads = calc_parallel_marking_threads();
1083     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1084            "Maximum number of marking threads exceeded");
1085     uint active_workers = MAX2(1U, parallel_marking_threads());
1086 
1087     CMRootRegionScanTask task(this);
1088     _parallel_workers->set_active_workers(active_workers);
1089     _parallel_workers->run_task(&task);
1090 
1091     // It's possible that has_aborted() is true here without actually
1092     // aborting the survivor scan earlier. This is OK as it's
1093     // mainly used for sanity checking.
1094     root_regions()->scan_finished();
1095   }
1096 }
1097 
1098 void ConcurrentMark::markFromRoots() {
1099   // we might be tempted to assert that:
1100   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1101   //        "inconsistent argument?");
1102   // However that wouldn't be right, because it's possible that
1103   // a safepoint is indeed in progress as a younger generation
1104   // stop-the-world GC happens even as we mark in this generation.
1105 
1106   _restart_for_overflow = false;
1107   force_overflow_conc()->init();
1108 
1109   // _g1h has _n_par_threads
1110   _parallel_marking_threads = calc_parallel_marking_threads();
1111   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1112     "Maximum number of marking threads exceeded");
1113 
1114   uint active_workers = MAX2(1U, parallel_marking_threads());
1115   assert(active_workers > 0, "Should have been set");
1116 
1117   // Parallel task terminator is set in "set_concurrency_and_phase()"
1118   set_concurrency_and_phase(active_workers, true /* concurrent */);
1119 
1120   CMConcurrentMarkingTask markingTask(this, cmThread());
1121   _parallel_workers->set_active_workers(active_workers);
1122   _parallel_workers->run_task(&markingTask);
1123   print_stats();
1124 }
1125 
1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1127   // world is stopped at this checkpoint
1128   assert(SafepointSynchronize::is_at_safepoint(),
1129          "world should be stopped");
1130 
1131   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1132 
1133   // If a full collection has happened, we shouldn't do this.
1134   if (has_aborted()) {
1135     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1136     return;
1137   }
1138 
1139   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1140 
1141   if (VerifyDuringGC) {
1142     HandleMark hm;  // handle scope
1143     g1h->prepare_for_verify();
1144     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1145   }
1146   g1h->check_bitmaps("Remark Start");
1147 
1148   G1CollectorPolicy* g1p = g1h->g1_policy();
1149   g1p->record_concurrent_mark_remark_start();
1150 
1151   double start = os::elapsedTime();
1152 
1153   checkpointRootsFinalWork();
1154 
1155   double mark_work_end = os::elapsedTime();
1156 
1157   weakRefsWork(clear_all_soft_refs);
1158 
1159   if (has_overflown()) {
1160     // Oops.  We overflowed.  Restart concurrent marking.
1161     _restart_for_overflow = true;
1162     log_develop(gc)("Remark led to restart for overflow.");
1163 
1164     // Verify the heap w.r.t. the previous marking bitmap.
1165     if (VerifyDuringGC) {
1166       HandleMark hm;  // handle scope
1167       g1h->prepare_for_verify();
1168       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1169     }
1170 
1171     // Clear the marking state because we will be restarting
1172     // marking due to overflowing the global mark stack.
1173     reset_marking_state();
1174   } else {
1175     {
1176       GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
1177 
1178       // Aggregate the per-task counting data that we have accumulated
1179       // while marking.
1180       aggregate_count_data();
1181     }
1182 
1183     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1184     // We're done with marking.
1185     // This is the end of  the marking cycle, we're expected all
1186     // threads to have SATB queues with active set to true.
1187     satb_mq_set.set_active_all_threads(false, /* new active value */
1188                                        true /* expected_active */);
1189 
1190     if (VerifyDuringGC) {
1191       HandleMark hm;  // handle scope
1192       g1h->prepare_for_verify();
1193       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1194     }
1195     g1h->check_bitmaps("Remark End");
1196     assert(!restart_for_overflow(), "sanity");
1197     // Completely reset the marking state since marking completed
1198     set_non_marking_state();
1199   }
1200 
1201   // Expand the marking stack, if we have to and if we can.
1202   if (_markStack.should_expand()) {
1203     _markStack.expand();
1204   }
1205 
1206   // Statistics
1207   double now = os::elapsedTime();
1208   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1209   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1210   _remark_times.add((now - start) * 1000.0);
1211 
1212   g1p->record_concurrent_mark_remark_end();
1213 
1214   G1CMIsAliveClosure is_alive(g1h);
1215   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1216 }
1217 
1218 // Base class of the closures that finalize and verify the
1219 // liveness counting data.
1220 class CMCountDataClosureBase: public HeapRegionClosure {
1221 protected:
1222   G1CollectedHeap* _g1h;
1223   ConcurrentMark* _cm;
1224   CardTableModRefBS* _ct_bs;
1225 
1226   BitMap* _region_bm;
1227   BitMap* _card_bm;
1228 
1229   // Takes a region that's not empty (i.e., it has at least one
1230   // live object in it and sets its corresponding bit on the region
1231   // bitmap to 1.
1232   void set_bit_for_region(HeapRegion* hr) {
1233     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1234     _region_bm->par_at_put(index, true);
1235   }
1236 
1237 public:
1238   CMCountDataClosureBase(G1CollectedHeap* g1h,
1239                          BitMap* region_bm, BitMap* card_bm):
1240     _g1h(g1h), _cm(g1h->concurrent_mark()),
1241     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1242     _region_bm(region_bm), _card_bm(card_bm) { }
1243 };
1244 
1245 // Closure that calculates the # live objects per region. Used
1246 // for verification purposes during the cleanup pause.
1247 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1248   CMBitMapRO* _bm;
1249   size_t _region_marked_bytes;
1250 
1251 public:
1252   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1253                          BitMap* region_bm, BitMap* card_bm) :
1254     CMCountDataClosureBase(g1h, region_bm, card_bm),
1255     _bm(bm), _region_marked_bytes(0) { }
1256 
1257   bool doHeapRegion(HeapRegion* hr) {
1258     HeapWord* ntams = hr->next_top_at_mark_start();
1259     HeapWord* start = hr->bottom();
1260 
1261     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1262            "Preconditions not met - "
1263            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1264            p2i(start), p2i(ntams), p2i(hr->end()));
1265 
1266     // Find the first marked object at or after "start".
1267     start = _bm->getNextMarkedWordAddress(start, ntams);
1268 
1269     size_t marked_bytes = 0;
1270 
1271     while (start < ntams) {
1272       oop obj = oop(start);
1273       int obj_sz = obj->size();
1274       HeapWord* obj_end = start + obj_sz;
1275 
1276       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1277       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1278 
1279       // Note: if we're looking at the last region in heap - obj_end
1280       // could be actually just beyond the end of the heap; end_idx
1281       // will then correspond to a (non-existent) card that is also
1282       // just beyond the heap.
1283       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1284         // end of object is not card aligned - increment to cover
1285         // all the cards spanned by the object
1286         end_idx += 1;
1287       }
1288 
1289       // Set the bits in the card BM for the cards spanned by this object.
1290       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1291 
1292       // Add the size of this object to the number of marked bytes.
1293       marked_bytes += (size_t)obj_sz * HeapWordSize;
1294 
1295       // This will happen if we are handling a humongous object that spans
1296       // several heap regions.
1297       if (obj_end > hr->end()) {
1298         break;
1299       }
1300       // Find the next marked object after this one.
1301       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1302     }
1303 
1304     // Mark the allocated-since-marking portion...
1305     HeapWord* top = hr->top();
1306     if (ntams < top) {
1307       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1308       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1309 
1310       // Note: if we're looking at the last region in heap - top
1311       // could be actually just beyond the end of the heap; end_idx
1312       // will then correspond to a (non-existent) card that is also
1313       // just beyond the heap.
1314       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1315         // end of object is not card aligned - increment to cover
1316         // all the cards spanned by the object
1317         end_idx += 1;
1318       }
1319       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1320 
1321       // This definitely means the region has live objects.
1322       set_bit_for_region(hr);
1323     }
1324 
1325     // Update the live region bitmap.
1326     if (marked_bytes > 0) {
1327       set_bit_for_region(hr);
1328     }
1329 
1330     // Set the marked bytes for the current region so that
1331     // it can be queried by a calling verification routine
1332     _region_marked_bytes = marked_bytes;
1333 
1334     return false;
1335   }
1336 
1337   size_t region_marked_bytes() const { return _region_marked_bytes; }
1338 };
1339 
1340 // Heap region closure used for verifying the counting data
1341 // that was accumulated concurrently and aggregated during
1342 // the remark pause. This closure is applied to the heap
1343 // regions during the STW cleanup pause.
1344 
1345 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1346   G1CollectedHeap* _g1h;
1347   ConcurrentMark* _cm;
1348   CalcLiveObjectsClosure _calc_cl;
1349   BitMap* _region_bm;   // Region BM to be verified
1350   BitMap* _card_bm;     // Card BM to be verified
1351 
1352   BitMap* _exp_region_bm; // Expected Region BM values
1353   BitMap* _exp_card_bm;   // Expected card BM values
1354 
1355   int _failures;
1356 
1357 public:
1358   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1359                                 BitMap* region_bm,
1360                                 BitMap* card_bm,
1361                                 BitMap* exp_region_bm,
1362                                 BitMap* exp_card_bm) :
1363     _g1h(g1h), _cm(g1h->concurrent_mark()),
1364     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1365     _region_bm(region_bm), _card_bm(card_bm),
1366     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1367     _failures(0) { }
1368 
1369   int failures() const { return _failures; }
1370 
1371   bool doHeapRegion(HeapRegion* hr) {
1372     int failures = 0;
1373 
1374     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1375     // this region and set the corresponding bits in the expected region
1376     // and card bitmaps.
1377     bool res = _calc_cl.doHeapRegion(hr);
1378     assert(res == false, "should be continuing");
1379 
1380     // Verify the marked bytes for this region.
1381     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1382     size_t act_marked_bytes = hr->next_marked_bytes();
1383 
1384     if (exp_marked_bytes > act_marked_bytes) {
1385       if (hr->is_starts_humongous()) {
1386         // For start_humongous regions, the size of the whole object will be
1387         // in exp_marked_bytes.
1388         HeapRegion* region = hr;
1389         int num_regions;
1390         for (num_regions = 0; region != NULL; num_regions++) {
1391           region = _g1h->next_region_in_humongous(region);
1392         }
1393         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1394           failures += 1;
1395         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1396           failures += 1;
1397         }
1398       } else {
1399         // We're not OK if expected marked bytes > actual marked bytes. It means
1400         // we have missed accounting some objects during the actual marking.
1401         failures += 1;
1402       }
1403     }
1404 
1405     // Verify the bit, for this region, in the actual and expected
1406     // (which was just calculated) region bit maps.
1407     // We're not OK if the bit in the calculated expected region
1408     // bitmap is set and the bit in the actual region bitmap is not.
1409     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1410 
1411     bool expected = _exp_region_bm->at(index);
1412     bool actual = _region_bm->at(index);
1413     if (expected && !actual) {
1414       failures += 1;
1415     }
1416 
1417     // Verify that the card bit maps for the cards spanned by the current
1418     // region match. We have an error if we have a set bit in the expected
1419     // bit map and the corresponding bit in the actual bitmap is not set.
1420 
1421     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1422     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1423 
1424     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1425       expected = _exp_card_bm->at(i);
1426       actual = _card_bm->at(i);
1427 
1428       if (expected && !actual) {
1429         failures += 1;
1430       }
1431     }
1432 
1433     _failures += failures;
1434 
1435     // We could stop iteration over the heap when we
1436     // find the first violating region by returning true.
1437     return false;
1438   }
1439 };
1440 
1441 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1442 protected:
1443   G1CollectedHeap* _g1h;
1444   ConcurrentMark* _cm;
1445   BitMap* _actual_region_bm;
1446   BitMap* _actual_card_bm;
1447 
1448   uint    _n_workers;
1449 
1450   BitMap* _expected_region_bm;
1451   BitMap* _expected_card_bm;
1452 
1453   int  _failures;
1454 
1455   HeapRegionClaimer _hrclaimer;
1456 
1457 public:
1458   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1459                             BitMap* region_bm, BitMap* card_bm,
1460                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1461     : AbstractGangTask("G1 verify final counting"),
1462       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1463       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1464       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1465       _failures(0),
1466       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1467     assert(VerifyDuringGC, "don't call this otherwise");
1468     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1469     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1470   }
1471 
1472   void work(uint worker_id) {
1473     assert(worker_id < _n_workers, "invariant");
1474 
1475     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1476                                             _actual_region_bm, _actual_card_bm,
1477                                             _expected_region_bm,
1478                                             _expected_card_bm);
1479 
1480     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1481 
1482     Atomic::add(verify_cl.failures(), &_failures);
1483   }
1484 
1485   int failures() const { return _failures; }
1486 };
1487 
1488 // Closure that finalizes the liveness counting data.
1489 // Used during the cleanup pause.
1490 // Sets the bits corresponding to the interval [NTAMS, top]
1491 // (which contains the implicitly live objects) in the
1492 // card liveness bitmap. Also sets the bit for each region,
1493 // containing live data, in the region liveness bitmap.
1494 
1495 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1496  public:
1497   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1498                               BitMap* region_bm,
1499                               BitMap* card_bm) :
1500     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1501 
1502   bool doHeapRegion(HeapRegion* hr) {
1503     HeapWord* ntams = hr->next_top_at_mark_start();
1504     HeapWord* top   = hr->top();
1505 
1506     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1507 
1508     // Mark the allocated-since-marking portion...
1509     if (ntams < top) {
1510       // This definitely means the region has live objects.
1511       set_bit_for_region(hr);
1512 
1513       // Now set the bits in the card bitmap for [ntams, top)
1514       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1515       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1516 
1517       // Note: if we're looking at the last region in heap - top
1518       // could be actually just beyond the end of the heap; end_idx
1519       // will then correspond to a (non-existent) card that is also
1520       // just beyond the heap.
1521       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1522         // end of object is not card aligned - increment to cover
1523         // all the cards spanned by the object
1524         end_idx += 1;
1525       }
1526 
1527       assert(end_idx <= _card_bm->size(),
1528              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1529              end_idx, _card_bm->size());
1530       assert(start_idx < _card_bm->size(),
1531              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1532              start_idx, _card_bm->size());
1533 
1534       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1535     }
1536 
1537     // Set the bit for the region if it contains live data
1538     if (hr->next_marked_bytes() > 0) {
1539       set_bit_for_region(hr);
1540     }
1541 
1542     return false;
1543   }
1544 };
1545 
1546 class G1ParFinalCountTask: public AbstractGangTask {
1547 protected:
1548   G1CollectedHeap* _g1h;
1549   ConcurrentMark* _cm;
1550   BitMap* _actual_region_bm;
1551   BitMap* _actual_card_bm;
1552 
1553   uint    _n_workers;
1554   HeapRegionClaimer _hrclaimer;
1555 
1556 public:
1557   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1558     : AbstractGangTask("G1 final counting"),
1559       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1560       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1561       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1562   }
1563 
1564   void work(uint worker_id) {
1565     assert(worker_id < _n_workers, "invariant");
1566 
1567     FinalCountDataUpdateClosure final_update_cl(_g1h,
1568                                                 _actual_region_bm,
1569                                                 _actual_card_bm);
1570 
1571     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1572   }
1573 };
1574 
1575 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1576   G1CollectedHeap* _g1;
1577   size_t _freed_bytes;
1578   FreeRegionList* _local_cleanup_list;
1579   HeapRegionSetCount _old_regions_removed;
1580   HeapRegionSetCount _humongous_regions_removed;
1581   HRRSCleanupTask* _hrrs_cleanup_task;
1582 
1583 public:
1584   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1585                              FreeRegionList* local_cleanup_list,
1586                              HRRSCleanupTask* hrrs_cleanup_task) :
1587     _g1(g1),
1588     _freed_bytes(0),
1589     _local_cleanup_list(local_cleanup_list),
1590     _old_regions_removed(),
1591     _humongous_regions_removed(),
1592     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1593 
1594   size_t freed_bytes() { return _freed_bytes; }
1595   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1596   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1597 
1598   bool doHeapRegion(HeapRegion *hr) {
1599     if (hr->is_archive()) {
1600       return false;
1601     }
1602     // We use a claim value of zero here because all regions
1603     // were claimed with value 1 in the FinalCount task.
1604     _g1->reset_gc_time_stamps(hr);
1605     hr->note_end_of_marking();
1606 
1607     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1608       _freed_bytes += hr->used();
1609       hr->set_containing_set(NULL);
1610       if (hr->is_humongous()) {
1611         _humongous_regions_removed.increment(1u, hr->capacity());
1612         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1613       } else {
1614         _old_regions_removed.increment(1u, hr->capacity());
1615         _g1->free_region(hr, _local_cleanup_list, true);
1616       }
1617     } else {
1618       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1619     }
1620 
1621     return false;
1622   }
1623 };
1624 
1625 class G1ParNoteEndTask: public AbstractGangTask {
1626   friend class G1NoteEndOfConcMarkClosure;
1627 
1628 protected:
1629   G1CollectedHeap* _g1h;
1630   FreeRegionList* _cleanup_list;
1631   HeapRegionClaimer _hrclaimer;
1632 
1633 public:
1634   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1635       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1636   }
1637 
1638   void work(uint worker_id) {
1639     FreeRegionList local_cleanup_list("Local Cleanup List");
1640     HRRSCleanupTask hrrs_cleanup_task;
1641     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1642                                            &hrrs_cleanup_task);
1643     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1644     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1645 
1646     // Now update the lists
1647     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1648     {
1649       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1650       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1651 
1652       // If we iterate over the global cleanup list at the end of
1653       // cleanup to do this printing we will not guarantee to only
1654       // generate output for the newly-reclaimed regions (the list
1655       // might not be empty at the beginning of cleanup; we might
1656       // still be working on its previous contents). So we do the
1657       // printing here, before we append the new regions to the global
1658       // cleanup list.
1659 
1660       G1HRPrinter* hr_printer = _g1h->hr_printer();
1661       if (hr_printer->is_active()) {
1662         FreeRegionListIterator iter(&local_cleanup_list);
1663         while (iter.more_available()) {
1664           HeapRegion* hr = iter.get_next();
1665           hr_printer->cleanup(hr);
1666         }
1667       }
1668 
1669       _cleanup_list->add_ordered(&local_cleanup_list);
1670       assert(local_cleanup_list.is_empty(), "post-condition");
1671 
1672       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1673     }
1674   }
1675 };
1676 
1677 class G1ParScrubRemSetTask: public AbstractGangTask {
1678 protected:
1679   G1RemSet* _g1rs;
1680   BitMap* _region_bm;
1681   BitMap* _card_bm;
1682   HeapRegionClaimer _hrclaimer;
1683 
1684 public:
1685   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1686       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1687   }
1688 
1689   void work(uint worker_id) {
1690     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1691   }
1692 
1693 };
1694 
1695 void ConcurrentMark::cleanup() {
1696   // world is stopped at this checkpoint
1697   assert(SafepointSynchronize::is_at_safepoint(),
1698          "world should be stopped");
1699   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1700 
1701   // If a full collection has happened, we shouldn't do this.
1702   if (has_aborted()) {
1703     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1704     return;
1705   }
1706 
1707   g1h->verify_region_sets_optional();
1708 
1709   if (VerifyDuringGC) {
1710     HandleMark hm;  // handle scope
1711     g1h->prepare_for_verify();
1712     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1713   }
1714   g1h->check_bitmaps("Cleanup Start");
1715 
1716   G1CollectorPolicy* g1p = g1h->g1_policy();
1717   g1p->record_concurrent_mark_cleanup_start();
1718 
1719   double start = os::elapsedTime();
1720 
1721   HeapRegionRemSet::reset_for_cleanup_tasks();
1722 
1723   // Do counting once more with the world stopped for good measure.
1724   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1725 
1726   g1h->workers()->run_task(&g1_par_count_task);
1727 
1728   if (VerifyDuringGC) {
1729     // Verify that the counting data accumulated during marking matches
1730     // that calculated by walking the marking bitmap.
1731 
1732     // Bitmaps to hold expected values
1733     BitMap expected_region_bm(_region_bm.size(), true);
1734     BitMap expected_card_bm(_card_bm.size(), true);
1735 
1736     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1737                                                  &_region_bm,
1738                                                  &_card_bm,
1739                                                  &expected_region_bm,
1740                                                  &expected_card_bm);
1741 
1742     g1h->workers()->run_task(&g1_par_verify_task);
1743 
1744     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1745   }
1746 
1747   size_t start_used_bytes = g1h->used();
1748   g1h->collector_state()->set_mark_in_progress(false);
1749 
1750   double count_end = os::elapsedTime();
1751   double this_final_counting_time = (count_end - start);
1752   _total_counting_time += this_final_counting_time;
1753 
1754   if (Log<LOG_TAGS(gc, liveness)>::is_trace()) {
1755     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1756     _g1h->heap_region_iterate(&cl);
1757   }
1758 
1759   // Install newly created mark bitMap as "prev".
1760   swapMarkBitMaps();
1761 
1762   g1h->reset_gc_time_stamp();
1763 
1764   uint n_workers = _g1h->workers()->active_workers();
1765 
1766   // Note end of marking in all heap regions.
1767   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1768   g1h->workers()->run_task(&g1_par_note_end_task);
1769   g1h->check_gc_time_stamps();
1770 
1771   if (!cleanup_list_is_empty()) {
1772     // The cleanup list is not empty, so we'll have to process it
1773     // concurrently. Notify anyone else that might be wanting free
1774     // regions that there will be more free regions coming soon.
1775     g1h->set_free_regions_coming();
1776   }
1777 
1778   // call below, since it affects the metric by which we sort the heap
1779   // regions.
1780   if (G1ScrubRemSets) {
1781     double rs_scrub_start = os::elapsedTime();
1782     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1783     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1784 
1785     double rs_scrub_end = os::elapsedTime();
1786     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1787     _total_rs_scrub_time += this_rs_scrub_time;
1788   }
1789 
1790   // this will also free any regions totally full of garbage objects,
1791   // and sort the regions.
1792   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1793 
1794   // Statistics.
1795   double end = os::elapsedTime();
1796   _cleanup_times.add((end - start) * 1000.0);
1797 
1798   // Clean up will have freed any regions completely full of garbage.
1799   // Update the soft reference policy with the new heap occupancy.
1800   Universe::update_heap_info_at_gc();
1801 
1802   if (VerifyDuringGC) {
1803     HandleMark hm;  // handle scope
1804     g1h->prepare_for_verify();
1805     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1806   }
1807 
1808   g1h->check_bitmaps("Cleanup End");
1809 
1810   g1h->verify_region_sets_optional();
1811 
1812   // We need to make this be a "collection" so any collection pause that
1813   // races with it goes around and waits for completeCleanup to finish.
1814   g1h->increment_total_collections();
1815 
1816   // Clean out dead classes and update Metaspace sizes.
1817   if (ClassUnloadingWithConcurrentMark) {
1818     ClassLoaderDataGraph::purge();
1819   }
1820   MetaspaceGC::compute_new_size();
1821 
1822   // We reclaimed old regions so we should calculate the sizes to make
1823   // sure we update the old gen/space data.
1824   g1h->g1mm()->update_sizes();
1825   g1h->allocation_context_stats().update_after_mark();
1826 
1827   g1h->trace_heap_after_concurrent_cycle();
1828 }
1829 
1830 void ConcurrentMark::completeCleanup() {
1831   if (has_aborted()) return;
1832 
1833   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1834 
1835   _cleanup_list.verify_optional();
1836   FreeRegionList tmp_free_list("Tmp Free List");
1837 
1838   log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1839                             "cleanup list has %u entries",
1840                             _cleanup_list.length());
1841 
1842   // No one else should be accessing the _cleanup_list at this point,
1843   // so it is not necessary to take any locks
1844   while (!_cleanup_list.is_empty()) {
1845     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1846     assert(hr != NULL, "Got NULL from a non-empty list");
1847     hr->par_clear();
1848     tmp_free_list.add_ordered(hr);
1849 
1850     // Instead of adding one region at a time to the secondary_free_list,
1851     // we accumulate them in the local list and move them a few at a
1852     // time. This also cuts down on the number of notify_all() calls
1853     // we do during this process. We'll also append the local list when
1854     // _cleanup_list is empty (which means we just removed the last
1855     // region from the _cleanup_list).
1856     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1857         _cleanup_list.is_empty()) {
1858       log_develop(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1859                                 "appending %u entries to the secondary_free_list, "
1860                                 "cleanup list still has %u entries",
1861                                 tmp_free_list.length(),
1862                                 _cleanup_list.length());
1863 
1864       {
1865         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1866         g1h->secondary_free_list_add(&tmp_free_list);
1867         SecondaryFreeList_lock->notify_all();
1868       }
1869 #ifndef PRODUCT
1870       if (G1StressConcRegionFreeing) {
1871         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1872           os::sleep(Thread::current(), (jlong) 1, false);
1873         }
1874       }
1875 #endif
1876     }
1877   }
1878   assert(tmp_free_list.is_empty(), "post-condition");
1879 }
1880 
1881 // Supporting Object and Oop closures for reference discovery
1882 // and processing in during marking
1883 
1884 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1885   HeapWord* addr = (HeapWord*)obj;
1886   return addr != NULL &&
1887          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1888 }
1889 
1890 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1891 // Uses the CMTask associated with a worker thread (for serial reference
1892 // processing the CMTask for worker 0 is used) to preserve (mark) and
1893 // trace referent objects.
1894 //
1895 // Using the CMTask and embedded local queues avoids having the worker
1896 // threads operating on the global mark stack. This reduces the risk
1897 // of overflowing the stack - which we would rather avoid at this late
1898 // state. Also using the tasks' local queues removes the potential
1899 // of the workers interfering with each other that could occur if
1900 // operating on the global stack.
1901 
1902 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1903   ConcurrentMark* _cm;
1904   CMTask*         _task;
1905   int             _ref_counter_limit;
1906   int             _ref_counter;
1907   bool            _is_serial;
1908  public:
1909   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1910     _cm(cm), _task(task), _is_serial(is_serial),
1911     _ref_counter_limit(G1RefProcDrainInterval) {
1912     assert(_ref_counter_limit > 0, "sanity");
1913     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1914     _ref_counter = _ref_counter_limit;
1915   }
1916 
1917   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1918   virtual void do_oop(      oop* p) { do_oop_work(p); }
1919 
1920   template <class T> void do_oop_work(T* p) {
1921     if (!_cm->has_overflown()) {
1922       oop obj = oopDesc::load_decode_heap_oop(p);
1923       _task->deal_with_reference(obj);
1924       _ref_counter--;
1925 
1926       if (_ref_counter == 0) {
1927         // We have dealt with _ref_counter_limit references, pushing them
1928         // and objects reachable from them on to the local stack (and
1929         // possibly the global stack). Call CMTask::do_marking_step() to
1930         // process these entries.
1931         //
1932         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1933         // there's nothing more to do (i.e. we're done with the entries that
1934         // were pushed as a result of the CMTask::deal_with_reference() calls
1935         // above) or we overflow.
1936         //
1937         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1938         // flag while there may still be some work to do. (See the comment at
1939         // the beginning of CMTask::do_marking_step() for those conditions -
1940         // one of which is reaching the specified time target.) It is only
1941         // when CMTask::do_marking_step() returns without setting the
1942         // has_aborted() flag that the marking step has completed.
1943         do {
1944           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1945           _task->do_marking_step(mark_step_duration_ms,
1946                                  false      /* do_termination */,
1947                                  _is_serial);
1948         } while (_task->has_aborted() && !_cm->has_overflown());
1949         _ref_counter = _ref_counter_limit;
1950       }
1951     }
1952   }
1953 };
1954 
1955 // 'Drain' oop closure used by both serial and parallel reference processing.
1956 // Uses the CMTask associated with a given worker thread (for serial
1957 // reference processing the CMtask for worker 0 is used). Calls the
1958 // do_marking_step routine, with an unbelievably large timeout value,
1959 // to drain the marking data structures of the remaining entries
1960 // added by the 'keep alive' oop closure above.
1961 
1962 class G1CMDrainMarkingStackClosure: public VoidClosure {
1963   ConcurrentMark* _cm;
1964   CMTask*         _task;
1965   bool            _is_serial;
1966  public:
1967   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1968     _cm(cm), _task(task), _is_serial(is_serial) {
1969     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1970   }
1971 
1972   void do_void() {
1973     do {
1974       // We call CMTask::do_marking_step() to completely drain the local
1975       // and global marking stacks of entries pushed by the 'keep alive'
1976       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1977       //
1978       // CMTask::do_marking_step() is called in a loop, which we'll exit
1979       // if there's nothing more to do (i.e. we've completely drained the
1980       // entries that were pushed as a a result of applying the 'keep alive'
1981       // closure to the entries on the discovered ref lists) or we overflow
1982       // the global marking stack.
1983       //
1984       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1985       // flag while there may still be some work to do. (See the comment at
1986       // the beginning of CMTask::do_marking_step() for those conditions -
1987       // one of which is reaching the specified time target.) It is only
1988       // when CMTask::do_marking_step() returns without setting the
1989       // has_aborted() flag that the marking step has completed.
1990 
1991       _task->do_marking_step(1000000000.0 /* something very large */,
1992                              true         /* do_termination */,
1993                              _is_serial);
1994     } while (_task->has_aborted() && !_cm->has_overflown());
1995   }
1996 };
1997 
1998 // Implementation of AbstractRefProcTaskExecutor for parallel
1999 // reference processing at the end of G1 concurrent marking
2000 
2001 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2002 private:
2003   G1CollectedHeap* _g1h;
2004   ConcurrentMark*  _cm;
2005   WorkGang*        _workers;
2006   uint             _active_workers;
2007 
2008 public:
2009   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2010                           ConcurrentMark* cm,
2011                           WorkGang* workers,
2012                           uint n_workers) :
2013     _g1h(g1h), _cm(cm),
2014     _workers(workers), _active_workers(n_workers) { }
2015 
2016   // Executes the given task using concurrent marking worker threads.
2017   virtual void execute(ProcessTask& task);
2018   virtual void execute(EnqueueTask& task);
2019 };
2020 
2021 class G1CMRefProcTaskProxy: public AbstractGangTask {
2022   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2023   ProcessTask&     _proc_task;
2024   G1CollectedHeap* _g1h;
2025   ConcurrentMark*  _cm;
2026 
2027 public:
2028   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2029                      G1CollectedHeap* g1h,
2030                      ConcurrentMark* cm) :
2031     AbstractGangTask("Process reference objects in parallel"),
2032     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2033     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2034     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2035   }
2036 
2037   virtual void work(uint worker_id) {
2038     ResourceMark rm;
2039     HandleMark hm;
2040     CMTask* task = _cm->task(worker_id);
2041     G1CMIsAliveClosure g1_is_alive(_g1h);
2042     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2043     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2044 
2045     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2046   }
2047 };
2048 
2049 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2050   assert(_workers != NULL, "Need parallel worker threads.");
2051   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2052 
2053   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2054 
2055   // We need to reset the concurrency level before each
2056   // proxy task execution, so that the termination protocol
2057   // and overflow handling in CMTask::do_marking_step() knows
2058   // how many workers to wait for.
2059   _cm->set_concurrency(_active_workers);
2060   _workers->run_task(&proc_task_proxy);
2061 }
2062 
2063 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2064   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2065   EnqueueTask& _enq_task;
2066 
2067 public:
2068   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2069     AbstractGangTask("Enqueue reference objects in parallel"),
2070     _enq_task(enq_task) { }
2071 
2072   virtual void work(uint worker_id) {
2073     _enq_task.work(worker_id);
2074   }
2075 };
2076 
2077 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2078   assert(_workers != NULL, "Need parallel worker threads.");
2079   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2080 
2081   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2082 
2083   // Not strictly necessary but...
2084   //
2085   // We need to reset the concurrency level before each
2086   // proxy task execution, so that the termination protocol
2087   // and overflow handling in CMTask::do_marking_step() knows
2088   // how many workers to wait for.
2089   _cm->set_concurrency(_active_workers);
2090   _workers->run_task(&enq_task_proxy);
2091 }
2092 
2093 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2094   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2095 }
2096 
2097 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2098   if (has_overflown()) {
2099     // Skip processing the discovered references if we have
2100     // overflown the global marking stack. Reference objects
2101     // only get discovered once so it is OK to not
2102     // de-populate the discovered reference lists. We could have,
2103     // but the only benefit would be that, when marking restarts,
2104     // less reference objects are discovered.
2105     return;
2106   }
2107 
2108   ResourceMark rm;
2109   HandleMark   hm;
2110 
2111   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2112 
2113   // Is alive closure.
2114   G1CMIsAliveClosure g1_is_alive(g1h);
2115 
2116   // Inner scope to exclude the cleaning of the string and symbol
2117   // tables from the displayed time.
2118   {
2119     GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
2120 
2121     ReferenceProcessor* rp = g1h->ref_processor_cm();
2122 
2123     // See the comment in G1CollectedHeap::ref_processing_init()
2124     // about how reference processing currently works in G1.
2125 
2126     // Set the soft reference policy
2127     rp->setup_policy(clear_all_soft_refs);
2128     assert(_markStack.isEmpty(), "mark stack should be empty");
2129 
2130     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2131     // in serial reference processing. Note these closures are also
2132     // used for serially processing (by the the current thread) the
2133     // JNI references during parallel reference processing.
2134     //
2135     // These closures do not need to synchronize with the worker
2136     // threads involved in parallel reference processing as these
2137     // instances are executed serially by the current thread (e.g.
2138     // reference processing is not multi-threaded and is thus
2139     // performed by the current thread instead of a gang worker).
2140     //
2141     // The gang tasks involved in parallel reference processing create
2142     // their own instances of these closures, which do their own
2143     // synchronization among themselves.
2144     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2145     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2146 
2147     // We need at least one active thread. If reference processing
2148     // is not multi-threaded we use the current (VMThread) thread,
2149     // otherwise we use the work gang from the G1CollectedHeap and
2150     // we utilize all the worker threads we can.
2151     bool processing_is_mt = rp->processing_is_mt();
2152     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2153     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2154 
2155     // Parallel processing task executor.
2156     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2157                                               g1h->workers(), active_workers);
2158     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2159 
2160     // Set the concurrency level. The phase was already set prior to
2161     // executing the remark task.
2162     set_concurrency(active_workers);
2163 
2164     // Set the degree of MT processing here.  If the discovery was done MT,
2165     // the number of threads involved during discovery could differ from
2166     // the number of active workers.  This is OK as long as the discovered
2167     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2168     rp->set_active_mt_degree(active_workers);
2169 
2170     // Process the weak references.
2171     const ReferenceProcessorStats& stats =
2172         rp->process_discovered_references(&g1_is_alive,
2173                                           &g1_keep_alive,
2174                                           &g1_drain_mark_stack,
2175                                           executor,
2176                                           g1h->gc_timer_cm());
2177     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2178 
2179     // The do_oop work routines of the keep_alive and drain_marking_stack
2180     // oop closures will set the has_overflown flag if we overflow the
2181     // global marking stack.
2182 
2183     assert(_markStack.overflow() || _markStack.isEmpty(),
2184             "mark stack should be empty (unless it overflowed)");
2185 
2186     if (_markStack.overflow()) {
2187       // This should have been done already when we tried to push an
2188       // entry on to the global mark stack. But let's do it again.
2189       set_has_overflown();
2190     }
2191 
2192     assert(rp->num_q() == active_workers, "why not");
2193 
2194     rp->enqueue_discovered_references(executor);
2195 
2196     rp->verify_no_references_recorded();
2197     assert(!rp->discovery_enabled(), "Post condition");
2198   }
2199 
2200   if (has_overflown()) {
2201     // We can not trust g1_is_alive if the marking stack overflowed
2202     return;
2203   }
2204 
2205   assert(_markStack.isEmpty(), "Marking should have completed");
2206 
2207   // Unload Klasses, String, Symbols, Code Cache, etc.
2208   {
2209     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2210 
2211     if (ClassUnloadingWithConcurrentMark) {
2212       bool purged_classes;
2213 
2214       {
2215         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2216         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2217       }
2218 
2219       {
2220         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2221         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2222       }
2223     }
2224 
2225     if (G1StringDedup::is_enabled()) {
2226       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2227       G1StringDedup::unlink(&g1_is_alive);
2228     }
2229   }
2230 }
2231 
2232 void ConcurrentMark::swapMarkBitMaps() {
2233   CMBitMapRO* temp = _prevMarkBitMap;
2234   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2235   _nextMarkBitMap  = (CMBitMap*)  temp;
2236 }
2237 
2238 // Closure for marking entries in SATB buffers.
2239 class CMSATBBufferClosure : public SATBBufferClosure {
2240 private:
2241   CMTask* _task;
2242   G1CollectedHeap* _g1h;
2243 
2244   // This is very similar to CMTask::deal_with_reference, but with
2245   // more relaxed requirements for the argument, so this must be more
2246   // circumspect about treating the argument as an object.
2247   void do_entry(void* entry) const {
2248     _task->increment_refs_reached();
2249     HeapRegion* hr = _g1h->heap_region_containing(entry);
2250     if (entry < hr->next_top_at_mark_start()) {
2251       // Until we get here, we don't know whether entry refers to a valid
2252       // object; it could instead have been a stale reference.
2253       oop obj = static_cast<oop>(entry);
2254       assert(obj->is_oop(true /* ignore mark word */),
2255              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2256       _task->make_reference_grey(obj, hr);
2257     }
2258   }
2259 
2260 public:
2261   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2262     : _task(task), _g1h(g1h) { }
2263 
2264   virtual void do_buffer(void** buffer, size_t size) {
2265     for (size_t i = 0; i < size; ++i) {
2266       do_entry(buffer[i]);
2267     }
2268   }
2269 };
2270 
2271 class G1RemarkThreadsClosure : public ThreadClosure {
2272   CMSATBBufferClosure _cm_satb_cl;
2273   G1CMOopClosure _cm_cl;
2274   MarkingCodeBlobClosure _code_cl;
2275   int _thread_parity;
2276 
2277  public:
2278   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2279     _cm_satb_cl(task, g1h),
2280     _cm_cl(g1h, g1h->concurrent_mark(), task),
2281     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2282     _thread_parity(Threads::thread_claim_parity()) {}
2283 
2284   void do_thread(Thread* thread) {
2285     if (thread->is_Java_thread()) {
2286       if (thread->claim_oops_do(true, _thread_parity)) {
2287         JavaThread* jt = (JavaThread*)thread;
2288 
2289         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2290         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2291         // * Alive if on the stack of an executing method
2292         // * Weakly reachable otherwise
2293         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2294         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2295         jt->nmethods_do(&_code_cl);
2296 
2297         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2298       }
2299     } else if (thread->is_VM_thread()) {
2300       if (thread->claim_oops_do(true, _thread_parity)) {
2301         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2302       }
2303     }
2304   }
2305 };
2306 
2307 class CMRemarkTask: public AbstractGangTask {
2308 private:
2309   ConcurrentMark* _cm;
2310 public:
2311   void work(uint worker_id) {
2312     // Since all available tasks are actually started, we should
2313     // only proceed if we're supposed to be active.
2314     if (worker_id < _cm->active_tasks()) {
2315       CMTask* task = _cm->task(worker_id);
2316       task->record_start_time();
2317       {
2318         ResourceMark rm;
2319         HandleMark hm;
2320 
2321         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2322         Threads::threads_do(&threads_f);
2323       }
2324 
2325       do {
2326         task->do_marking_step(1000000000.0 /* something very large */,
2327                               true         /* do_termination       */,
2328                               false        /* is_serial            */);
2329       } while (task->has_aborted() && !_cm->has_overflown());
2330       // If we overflow, then we do not want to restart. We instead
2331       // want to abort remark and do concurrent marking again.
2332       task->record_end_time();
2333     }
2334   }
2335 
2336   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2337     AbstractGangTask("Par Remark"), _cm(cm) {
2338     _cm->terminator()->reset_for_reuse(active_workers);
2339   }
2340 };
2341 
2342 void ConcurrentMark::checkpointRootsFinalWork() {
2343   ResourceMark rm;
2344   HandleMark   hm;
2345   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2346 
2347   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2348 
2349   g1h->ensure_parsability(false);
2350 
2351   // this is remark, so we'll use up all active threads
2352   uint active_workers = g1h->workers()->active_workers();
2353   set_concurrency_and_phase(active_workers, false /* concurrent */);
2354   // Leave _parallel_marking_threads at it's
2355   // value originally calculated in the ConcurrentMark
2356   // constructor and pass values of the active workers
2357   // through the gang in the task.
2358 
2359   {
2360     StrongRootsScope srs(active_workers);
2361 
2362     CMRemarkTask remarkTask(this, active_workers);
2363     // We will start all available threads, even if we decide that the
2364     // active_workers will be fewer. The extra ones will just bail out
2365     // immediately.
2366     g1h->workers()->run_task(&remarkTask);
2367   }
2368 
2369   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2370   guarantee(has_overflown() ||
2371             satb_mq_set.completed_buffers_num() == 0,
2372             "Invariant: has_overflown = %s, num buffers = %d",
2373             BOOL_TO_STR(has_overflown()),
2374             satb_mq_set.completed_buffers_num());
2375 
2376   print_stats();
2377 }
2378 
2379 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2380   // Note we are overriding the read-only view of the prev map here, via
2381   // the cast.
2382   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2383 }
2384 
2385 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2386   _nextMarkBitMap->clearRange(mr);
2387 }
2388 
2389 HeapRegion*
2390 ConcurrentMark::claim_region(uint worker_id) {
2391   // "checkpoint" the finger
2392   HeapWord* finger = _finger;
2393 
2394   // _heap_end will not change underneath our feet; it only changes at
2395   // yield points.
2396   while (finger < _heap_end) {
2397     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2398 
2399     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2400 
2401     // Above heap_region_containing may return NULL as we always scan claim
2402     // until the end of the heap. In this case, just jump to the next region.
2403     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2404 
2405     // Is the gap between reading the finger and doing the CAS too long?
2406     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2407     if (res == finger && curr_region != NULL) {
2408       // we succeeded
2409       HeapWord*   bottom        = curr_region->bottom();
2410       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2411 
2412       // notice that _finger == end cannot be guaranteed here since,
2413       // someone else might have moved the finger even further
2414       assert(_finger >= end, "the finger should have moved forward");
2415 
2416       if (limit > bottom) {
2417         return curr_region;
2418       } else {
2419         assert(limit == bottom,
2420                "the region limit should be at bottom");
2421         // we return NULL and the caller should try calling
2422         // claim_region() again.
2423         return NULL;
2424       }
2425     } else {
2426       assert(_finger > finger, "the finger should have moved forward");
2427       // read it again
2428       finger = _finger;
2429     }
2430   }
2431 
2432   return NULL;
2433 }
2434 
2435 #ifndef PRODUCT
2436 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2437 private:
2438   G1CollectedHeap* _g1h;
2439   const char* _phase;
2440   int _info;
2441 
2442 public:
2443   VerifyNoCSetOops(const char* phase, int info = -1) :
2444     _g1h(G1CollectedHeap::heap()),
2445     _phase(phase),
2446     _info(info)
2447   { }
2448 
2449   void operator()(oop obj) const {
2450     guarantee(obj->is_oop(),
2451               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2452               p2i(obj), _phase, _info);
2453     guarantee(!_g1h->obj_in_cs(obj),
2454               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2455               p2i(obj), _phase, _info);
2456   }
2457 };
2458 
2459 void ConcurrentMark::verify_no_cset_oops() {
2460   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2461   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2462     return;
2463   }
2464 
2465   // Verify entries on the global mark stack
2466   _markStack.iterate(VerifyNoCSetOops("Stack"));
2467 
2468   // Verify entries on the task queues
2469   for (uint i = 0; i < _max_worker_id; ++i) {
2470     CMTaskQueue* queue = _task_queues->queue(i);
2471     queue->iterate(VerifyNoCSetOops("Queue", i));
2472   }
2473 
2474   // Verify the global finger
2475   HeapWord* global_finger = finger();
2476   if (global_finger != NULL && global_finger < _heap_end) {
2477     // Since we always iterate over all regions, we might get a NULL HeapRegion
2478     // here.
2479     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2480     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2481               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2482               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2483   }
2484 
2485   // Verify the task fingers
2486   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2487   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2488     CMTask* task = _tasks[i];
2489     HeapWord* task_finger = task->finger();
2490     if (task_finger != NULL && task_finger < _heap_end) {
2491       // See above note on the global finger verification.
2492       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2493       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2494                 !task_hr->in_collection_set(),
2495                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2496                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2497     }
2498   }
2499 }
2500 #endif // PRODUCT
2501 
2502 // Aggregate the counting data that was constructed concurrently
2503 // with marking.
2504 class AggregateCountDataHRClosure: public HeapRegionClosure {
2505   G1CollectedHeap* _g1h;
2506   ConcurrentMark* _cm;
2507   CardTableModRefBS* _ct_bs;
2508   BitMap* _cm_card_bm;
2509   uint _max_worker_id;
2510 
2511  public:
2512   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2513                               BitMap* cm_card_bm,
2514                               uint max_worker_id) :
2515     _g1h(g1h), _cm(g1h->concurrent_mark()),
2516     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2517     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2518 
2519   bool doHeapRegion(HeapRegion* hr) {
2520     HeapWord* start = hr->bottom();
2521     HeapWord* limit = hr->next_top_at_mark_start();
2522     HeapWord* end = hr->end();
2523 
2524     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2525            "Preconditions not met - "
2526            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2527            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2528            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2529 
2530     assert(hr->next_marked_bytes() == 0, "Precondition");
2531 
2532     if (start == limit) {
2533       // NTAMS of this region has not been set so nothing to do.
2534       return false;
2535     }
2536 
2537     // 'start' should be in the heap.
2538     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2539     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2540     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2541 
2542     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2543     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2544     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2545 
2546     // If ntams is not card aligned then we bump card bitmap index
2547     // for limit so that we get the all the cards spanned by
2548     // the object ending at ntams.
2549     // Note: if this is the last region in the heap then ntams
2550     // could be actually just beyond the end of the the heap;
2551     // limit_idx will then  correspond to a (non-existent) card
2552     // that is also outside the heap.
2553     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2554       limit_idx += 1;
2555     }
2556 
2557     assert(limit_idx <= end_idx, "or else use atomics");
2558 
2559     // Aggregate the "stripe" in the count data associated with hr.
2560     uint hrm_index = hr->hrm_index();
2561     size_t marked_bytes = 0;
2562 
2563     for (uint i = 0; i < _max_worker_id; i += 1) {
2564       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2565       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2566 
2567       // Fetch the marked_bytes in this region for task i and
2568       // add it to the running total for this region.
2569       marked_bytes += marked_bytes_array[hrm_index];
2570 
2571       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2572       // into the global card bitmap.
2573       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2574 
2575       while (scan_idx < limit_idx) {
2576         assert(task_card_bm->at(scan_idx) == true, "should be");
2577         _cm_card_bm->set_bit(scan_idx);
2578         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2579 
2580         // BitMap::get_next_one_offset() can handle the case when
2581         // its left_offset parameter is greater than its right_offset
2582         // parameter. It does, however, have an early exit if
2583         // left_offset == right_offset. So let's limit the value
2584         // passed in for left offset here.
2585         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2586         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2587       }
2588     }
2589 
2590     // Update the marked bytes for this region.
2591     hr->add_to_marked_bytes(marked_bytes);
2592 
2593     // Next heap region
2594     return false;
2595   }
2596 };
2597 
2598 class G1AggregateCountDataTask: public AbstractGangTask {
2599 protected:
2600   G1CollectedHeap* _g1h;
2601   ConcurrentMark* _cm;
2602   BitMap* _cm_card_bm;
2603   uint _max_worker_id;
2604   uint _active_workers;
2605   HeapRegionClaimer _hrclaimer;
2606 
2607 public:
2608   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2609                            ConcurrentMark* cm,
2610                            BitMap* cm_card_bm,
2611                            uint max_worker_id,
2612                            uint n_workers) :
2613       AbstractGangTask("Count Aggregation"),
2614       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2615       _max_worker_id(max_worker_id),
2616       _active_workers(n_workers),
2617       _hrclaimer(_active_workers) {
2618   }
2619 
2620   void work(uint worker_id) {
2621     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2622 
2623     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2624   }
2625 };
2626 
2627 
2628 void ConcurrentMark::aggregate_count_data() {
2629   uint n_workers = _g1h->workers()->active_workers();
2630 
2631   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2632                                            _max_worker_id, n_workers);
2633 
2634   _g1h->workers()->run_task(&g1_par_agg_task);
2635 }
2636 
2637 // Clear the per-worker arrays used to store the per-region counting data
2638 void ConcurrentMark::clear_all_count_data() {
2639   // Clear the global card bitmap - it will be filled during
2640   // liveness count aggregation (during remark) and the
2641   // final counting task.
2642   _card_bm.clear();
2643 
2644   // Clear the global region bitmap - it will be filled as part
2645   // of the final counting task.
2646   _region_bm.clear();
2647 
2648   uint max_regions = _g1h->max_regions();
2649   assert(_max_worker_id > 0, "uninitialized");
2650 
2651   for (uint i = 0; i < _max_worker_id; i += 1) {
2652     BitMap* task_card_bm = count_card_bitmap_for(i);
2653     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2654 
2655     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2656     assert(marked_bytes_array != NULL, "uninitialized");
2657 
2658     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2659     task_card_bm->clear();
2660   }
2661 }
2662 
2663 void ConcurrentMark::print_stats() {
2664   if (!log_is_enabled(Debug, gc, stats)) {
2665     return;
2666   }
2667   log_debug(gc, stats)("---------------------------------------------------------------------");
2668   for (size_t i = 0; i < _active_tasks; ++i) {
2669     _tasks[i]->print_stats();
2670     log_debug(gc, stats)("---------------------------------------------------------------------");
2671   }
2672 }
2673 
2674 // abandon current marking iteration due to a Full GC
2675 void ConcurrentMark::abort() {
2676   if (!cmThread()->during_cycle() || _has_aborted) {
2677     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2678     return;
2679   }
2680 
2681   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2682   // concurrent bitmap clearing.
2683   _nextMarkBitMap->clearAll();
2684 
2685   // Note we cannot clear the previous marking bitmap here
2686   // since VerifyDuringGC verifies the objects marked during
2687   // a full GC against the previous bitmap.
2688 
2689   // Clear the liveness counting data
2690   clear_all_count_data();
2691   // Empty mark stack
2692   reset_marking_state();
2693   for (uint i = 0; i < _max_worker_id; ++i) {
2694     _tasks[i]->clear_region_fields();
2695   }
2696   _first_overflow_barrier_sync.abort();
2697   _second_overflow_barrier_sync.abort();
2698   _has_aborted = true;
2699 
2700   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2701   satb_mq_set.abandon_partial_marking();
2702   // This can be called either during or outside marking, we'll read
2703   // the expected_active value from the SATB queue set.
2704   satb_mq_set.set_active_all_threads(
2705                                  false, /* new active value */
2706                                  satb_mq_set.is_active() /* expected_active */);
2707 
2708   _g1h->trace_heap_after_concurrent_cycle();
2709   _g1h->register_concurrent_cycle_end();
2710 }
2711 
2712 static void print_ms_time_info(const char* prefix, const char* name,
2713                                NumberSeq& ns) {
2714   log_trace(gc, marking, stats, exit)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2715                                       prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2716   if (ns.num() > 0) {
2717     log_trace(gc, marking, stats, exit)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2718                                         prefix, ns.sd(), ns.maximum());
2719   }
2720 }
2721 
2722 void ConcurrentMark::print_summary_info() {
2723   LogHandle(gc, marking, stats, exit) log;
2724   if (!log.is_trace()) {
2725     return;
2726   }
2727 
2728   log.trace(" Concurrent marking:");
2729   print_ms_time_info("  ", "init marks", _init_times);
2730   print_ms_time_info("  ", "remarks", _remark_times);
2731   {
2732     print_ms_time_info("     ", "final marks", _remark_mark_times);
2733     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2734 
2735   }
2736   print_ms_time_info("  ", "cleanups", _cleanup_times);
2737   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2738             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2739   if (G1ScrubRemSets) {
2740     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2741               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2742   }
2743   log.trace("  Total stop_world time = %8.2f s.",
2744             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2745   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2746             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2747 }
2748 
2749 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2750   _parallel_workers->print_worker_threads_on(st);
2751 }
2752 
2753 void ConcurrentMark::print_on_error(outputStream* st) const {
2754   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2755       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2756   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2757   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2758 }
2759 
2760 // We take a break if someone is trying to stop the world.
2761 bool ConcurrentMark::do_yield_check(uint worker_id) {
2762   if (SuspendibleThreadSet::should_yield()) {
2763     if (worker_id == 0) {
2764       _g1h->g1_policy()->record_concurrent_pause();
2765     }
2766     SuspendibleThreadSet::yield();
2767     return true;
2768   } else {
2769     return false;
2770   }
2771 }
2772 
2773 // Closure for iteration over bitmaps
2774 class CMBitMapClosure : public BitMapClosure {
2775 private:
2776   // the bitmap that is being iterated over
2777   CMBitMap*                   _nextMarkBitMap;
2778   ConcurrentMark*             _cm;
2779   CMTask*                     _task;
2780 
2781 public:
2782   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2783     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2784 
2785   bool do_bit(size_t offset) {
2786     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2787     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2788     assert( addr < _cm->finger(), "invariant");
2789     assert(addr >= _task->finger(), "invariant");
2790 
2791     // We move that task's local finger along.
2792     _task->move_finger_to(addr);
2793 
2794     _task->scan_object(oop(addr));
2795     // we only partially drain the local queue and global stack
2796     _task->drain_local_queue(true);
2797     _task->drain_global_stack(true);
2798 
2799     // if the has_aborted flag has been raised, we need to bail out of
2800     // the iteration
2801     return !_task->has_aborted();
2802   }
2803 };
2804 
2805 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2806   ReferenceProcessor* result = NULL;
2807   if (G1UseConcMarkReferenceProcessing) {
2808     result = g1h->ref_processor_cm();
2809     assert(result != NULL, "should not be NULL");
2810   }
2811   return result;
2812 }
2813 
2814 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2815                                ConcurrentMark* cm,
2816                                CMTask* task)
2817   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2818     _g1h(g1h), _cm(cm), _task(task)
2819 { }
2820 
2821 void CMTask::setup_for_region(HeapRegion* hr) {
2822   assert(hr != NULL,
2823         "claim_region() should have filtered out NULL regions");
2824   _curr_region  = hr;
2825   _finger       = hr->bottom();
2826   update_region_limit();
2827 }
2828 
2829 void CMTask::update_region_limit() {
2830   HeapRegion* hr            = _curr_region;
2831   HeapWord* bottom          = hr->bottom();
2832   HeapWord* limit           = hr->next_top_at_mark_start();
2833 
2834   if (limit == bottom) {
2835     // The region was collected underneath our feet.
2836     // We set the finger to bottom to ensure that the bitmap
2837     // iteration that will follow this will not do anything.
2838     // (this is not a condition that holds when we set the region up,
2839     // as the region is not supposed to be empty in the first place)
2840     _finger = bottom;
2841   } else if (limit >= _region_limit) {
2842     assert(limit >= _finger, "peace of mind");
2843   } else {
2844     assert(limit < _region_limit, "only way to get here");
2845     // This can happen under some pretty unusual circumstances.  An
2846     // evacuation pause empties the region underneath our feet (NTAMS
2847     // at bottom). We then do some allocation in the region (NTAMS
2848     // stays at bottom), followed by the region being used as a GC
2849     // alloc region (NTAMS will move to top() and the objects
2850     // originally below it will be grayed). All objects now marked in
2851     // the region are explicitly grayed, if below the global finger,
2852     // and we do not need in fact to scan anything else. So, we simply
2853     // set _finger to be limit to ensure that the bitmap iteration
2854     // doesn't do anything.
2855     _finger = limit;
2856   }
2857 
2858   _region_limit = limit;
2859 }
2860 
2861 void CMTask::giveup_current_region() {
2862   assert(_curr_region != NULL, "invariant");
2863   clear_region_fields();
2864 }
2865 
2866 void CMTask::clear_region_fields() {
2867   // Values for these three fields that indicate that we're not
2868   // holding on to a region.
2869   _curr_region   = NULL;
2870   _finger        = NULL;
2871   _region_limit  = NULL;
2872 }
2873 
2874 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2875   if (cm_oop_closure == NULL) {
2876     assert(_cm_oop_closure != NULL, "invariant");
2877   } else {
2878     assert(_cm_oop_closure == NULL, "invariant");
2879   }
2880   _cm_oop_closure = cm_oop_closure;
2881 }
2882 
2883 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2884   guarantee(nextMarkBitMap != NULL, "invariant");
2885   _nextMarkBitMap                = nextMarkBitMap;
2886   clear_region_fields();
2887 
2888   _calls                         = 0;
2889   _elapsed_time_ms               = 0.0;
2890   _termination_time_ms           = 0.0;
2891   _termination_start_time_ms     = 0.0;
2892 }
2893 
2894 bool CMTask::should_exit_termination() {
2895   regular_clock_call();
2896   // This is called when we are in the termination protocol. We should
2897   // quit if, for some reason, this task wants to abort or the global
2898   // stack is not empty (this means that we can get work from it).
2899   return !_cm->mark_stack_empty() || has_aborted();
2900 }
2901 
2902 void CMTask::reached_limit() {
2903   assert(_words_scanned >= _words_scanned_limit ||
2904          _refs_reached >= _refs_reached_limit ,
2905          "shouldn't have been called otherwise");
2906   regular_clock_call();
2907 }
2908 
2909 void CMTask::regular_clock_call() {
2910   if (has_aborted()) return;
2911 
2912   // First, we need to recalculate the words scanned and refs reached
2913   // limits for the next clock call.
2914   recalculate_limits();
2915 
2916   // During the regular clock call we do the following
2917 
2918   // (1) If an overflow has been flagged, then we abort.
2919   if (_cm->has_overflown()) {
2920     set_has_aborted();
2921     return;
2922   }
2923 
2924   // If we are not concurrent (i.e. we're doing remark) we don't need
2925   // to check anything else. The other steps are only needed during
2926   // the concurrent marking phase.
2927   if (!concurrent()) return;
2928 
2929   // (2) If marking has been aborted for Full GC, then we also abort.
2930   if (_cm->has_aborted()) {
2931     set_has_aborted();
2932     return;
2933   }
2934 
2935   double curr_time_ms = os::elapsedVTime() * 1000.0;
2936 
2937   // (4) We check whether we should yield. If we have to, then we abort.
2938   if (SuspendibleThreadSet::should_yield()) {
2939     // We should yield. To do this we abort the task. The caller is
2940     // responsible for yielding.
2941     set_has_aborted();
2942     return;
2943   }
2944 
2945   // (5) We check whether we've reached our time quota. If we have,
2946   // then we abort.
2947   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2948   if (elapsed_time_ms > _time_target_ms) {
2949     set_has_aborted();
2950     _has_timed_out = true;
2951     return;
2952   }
2953 
2954   // (6) Finally, we check whether there are enough completed STAB
2955   // buffers available for processing. If there are, we abort.
2956   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2957   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2958     // we do need to process SATB buffers, we'll abort and restart
2959     // the marking task to do so
2960     set_has_aborted();
2961     return;
2962   }
2963 }
2964 
2965 void CMTask::recalculate_limits() {
2966   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2967   _words_scanned_limit      = _real_words_scanned_limit;
2968 
2969   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2970   _refs_reached_limit       = _real_refs_reached_limit;
2971 }
2972 
2973 void CMTask::decrease_limits() {
2974   // This is called when we believe that we're going to do an infrequent
2975   // operation which will increase the per byte scanned cost (i.e. move
2976   // entries to/from the global stack). It basically tries to decrease the
2977   // scanning limit so that the clock is called earlier.
2978 
2979   _words_scanned_limit = _real_words_scanned_limit -
2980     3 * words_scanned_period / 4;
2981   _refs_reached_limit  = _real_refs_reached_limit -
2982     3 * refs_reached_period / 4;
2983 }
2984 
2985 void CMTask::move_entries_to_global_stack() {
2986   // local array where we'll store the entries that will be popped
2987   // from the local queue
2988   oop buffer[global_stack_transfer_size];
2989 
2990   int n = 0;
2991   oop obj;
2992   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2993     buffer[n] = obj;
2994     ++n;
2995   }
2996 
2997   if (n > 0) {
2998     // we popped at least one entry from the local queue
2999 
3000     if (!_cm->mark_stack_push(buffer, n)) {
3001       set_has_aborted();
3002     }
3003   }
3004 
3005   // this operation was quite expensive, so decrease the limits
3006   decrease_limits();
3007 }
3008 
3009 void CMTask::get_entries_from_global_stack() {
3010   // local array where we'll store the entries that will be popped
3011   // from the global stack.
3012   oop buffer[global_stack_transfer_size];
3013   int n;
3014   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3015   assert(n <= global_stack_transfer_size,
3016          "we should not pop more than the given limit");
3017   if (n > 0) {
3018     // yes, we did actually pop at least one entry
3019     for (int i = 0; i < n; ++i) {
3020       bool success = _task_queue->push(buffer[i]);
3021       // We only call this when the local queue is empty or under a
3022       // given target limit. So, we do not expect this push to fail.
3023       assert(success, "invariant");
3024     }
3025   }
3026 
3027   // this operation was quite expensive, so decrease the limits
3028   decrease_limits();
3029 }
3030 
3031 void CMTask::drain_local_queue(bool partially) {
3032   if (has_aborted()) return;
3033 
3034   // Decide what the target size is, depending whether we're going to
3035   // drain it partially (so that other tasks can steal if they run out
3036   // of things to do) or totally (at the very end).
3037   size_t target_size;
3038   if (partially) {
3039     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3040   } else {
3041     target_size = 0;
3042   }
3043 
3044   if (_task_queue->size() > target_size) {
3045     oop obj;
3046     bool ret = _task_queue->pop_local(obj);
3047     while (ret) {
3048       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3049       assert(!_g1h->is_on_master_free_list(
3050                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3051 
3052       scan_object(obj);
3053 
3054       if (_task_queue->size() <= target_size || has_aborted()) {
3055         ret = false;
3056       } else {
3057         ret = _task_queue->pop_local(obj);
3058       }
3059     }
3060   }
3061 }
3062 
3063 void CMTask::drain_global_stack(bool partially) {
3064   if (has_aborted()) return;
3065 
3066   // We have a policy to drain the local queue before we attempt to
3067   // drain the global stack.
3068   assert(partially || _task_queue->size() == 0, "invariant");
3069 
3070   // Decide what the target size is, depending whether we're going to
3071   // drain it partially (so that other tasks can steal if they run out
3072   // of things to do) or totally (at the very end).  Notice that,
3073   // because we move entries from the global stack in chunks or
3074   // because another task might be doing the same, we might in fact
3075   // drop below the target. But, this is not a problem.
3076   size_t target_size;
3077   if (partially) {
3078     target_size = _cm->partial_mark_stack_size_target();
3079   } else {
3080     target_size = 0;
3081   }
3082 
3083   if (_cm->mark_stack_size() > target_size) {
3084     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3085       get_entries_from_global_stack();
3086       drain_local_queue(partially);
3087     }
3088   }
3089 }
3090 
3091 // SATB Queue has several assumptions on whether to call the par or
3092 // non-par versions of the methods. this is why some of the code is
3093 // replicated. We should really get rid of the single-threaded version
3094 // of the code to simplify things.
3095 void CMTask::drain_satb_buffers() {
3096   if (has_aborted()) return;
3097 
3098   // We set this so that the regular clock knows that we're in the
3099   // middle of draining buffers and doesn't set the abort flag when it
3100   // notices that SATB buffers are available for draining. It'd be
3101   // very counter productive if it did that. :-)
3102   _draining_satb_buffers = true;
3103 
3104   CMSATBBufferClosure satb_cl(this, _g1h);
3105   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3106 
3107   // This keeps claiming and applying the closure to completed buffers
3108   // until we run out of buffers or we need to abort.
3109   while (!has_aborted() &&
3110          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3111     regular_clock_call();
3112   }
3113 
3114   _draining_satb_buffers = false;
3115 
3116   assert(has_aborted() ||
3117          concurrent() ||
3118          satb_mq_set.completed_buffers_num() == 0, "invariant");
3119 
3120   // again, this was a potentially expensive operation, decrease the
3121   // limits to get the regular clock call early
3122   decrease_limits();
3123 }
3124 
3125 void CMTask::print_stats() {
3126   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3127                        _worker_id, _calls);
3128   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3129                        _elapsed_time_ms, _termination_time_ms);
3130   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3131                        _step_times_ms.num(), _step_times_ms.avg(),
3132                        _step_times_ms.sd());
3133   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3134                        _step_times_ms.maximum(), _step_times_ms.sum());
3135 }
3136 
3137 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3138   return _task_queues->steal(worker_id, hash_seed, obj);
3139 }
3140 
3141 /*****************************************************************************
3142 
3143     The do_marking_step(time_target_ms, ...) method is the building
3144     block of the parallel marking framework. It can be called in parallel
3145     with other invocations of do_marking_step() on different tasks
3146     (but only one per task, obviously) and concurrently with the
3147     mutator threads, or during remark, hence it eliminates the need
3148     for two versions of the code. When called during remark, it will
3149     pick up from where the task left off during the concurrent marking
3150     phase. Interestingly, tasks are also claimable during evacuation
3151     pauses too, since do_marking_step() ensures that it aborts before
3152     it needs to yield.
3153 
3154     The data structures that it uses to do marking work are the
3155     following:
3156 
3157       (1) Marking Bitmap. If there are gray objects that appear only
3158       on the bitmap (this happens either when dealing with an overflow
3159       or when the initial marking phase has simply marked the roots
3160       and didn't push them on the stack), then tasks claim heap
3161       regions whose bitmap they then scan to find gray objects. A
3162       global finger indicates where the end of the last claimed region
3163       is. A local finger indicates how far into the region a task has
3164       scanned. The two fingers are used to determine how to gray an
3165       object (i.e. whether simply marking it is OK, as it will be
3166       visited by a task in the future, or whether it needs to be also
3167       pushed on a stack).
3168 
3169       (2) Local Queue. The local queue of the task which is accessed
3170       reasonably efficiently by the task. Other tasks can steal from
3171       it when they run out of work. Throughout the marking phase, a
3172       task attempts to keep its local queue short but not totally
3173       empty, so that entries are available for stealing by other
3174       tasks. Only when there is no more work, a task will totally
3175       drain its local queue.
3176 
3177       (3) Global Mark Stack. This handles local queue overflow. During
3178       marking only sets of entries are moved between it and the local
3179       queues, as access to it requires a mutex and more fine-grain
3180       interaction with it which might cause contention. If it
3181       overflows, then the marking phase should restart and iterate
3182       over the bitmap to identify gray objects. Throughout the marking
3183       phase, tasks attempt to keep the global mark stack at a small
3184       length but not totally empty, so that entries are available for
3185       popping by other tasks. Only when there is no more work, tasks
3186       will totally drain the global mark stack.
3187 
3188       (4) SATB Buffer Queue. This is where completed SATB buffers are
3189       made available. Buffers are regularly removed from this queue
3190       and scanned for roots, so that the queue doesn't get too
3191       long. During remark, all completed buffers are processed, as
3192       well as the filled in parts of any uncompleted buffers.
3193 
3194     The do_marking_step() method tries to abort when the time target
3195     has been reached. There are a few other cases when the
3196     do_marking_step() method also aborts:
3197 
3198       (1) When the marking phase has been aborted (after a Full GC).
3199 
3200       (2) When a global overflow (on the global stack) has been
3201       triggered. Before the task aborts, it will actually sync up with
3202       the other tasks to ensure that all the marking data structures
3203       (local queues, stacks, fingers etc.)  are re-initialized so that
3204       when do_marking_step() completes, the marking phase can
3205       immediately restart.
3206 
3207       (3) When enough completed SATB buffers are available. The
3208       do_marking_step() method only tries to drain SATB buffers right
3209       at the beginning. So, if enough buffers are available, the
3210       marking step aborts and the SATB buffers are processed at
3211       the beginning of the next invocation.
3212 
3213       (4) To yield. when we have to yield then we abort and yield
3214       right at the end of do_marking_step(). This saves us from a lot
3215       of hassle as, by yielding we might allow a Full GC. If this
3216       happens then objects will be compacted underneath our feet, the
3217       heap might shrink, etc. We save checking for this by just
3218       aborting and doing the yield right at the end.
3219 
3220     From the above it follows that the do_marking_step() method should
3221     be called in a loop (or, otherwise, regularly) until it completes.
3222 
3223     If a marking step completes without its has_aborted() flag being
3224     true, it means it has completed the current marking phase (and
3225     also all other marking tasks have done so and have all synced up).
3226 
3227     A method called regular_clock_call() is invoked "regularly" (in
3228     sub ms intervals) throughout marking. It is this clock method that
3229     checks all the abort conditions which were mentioned above and
3230     decides when the task should abort. A work-based scheme is used to
3231     trigger this clock method: when the number of object words the
3232     marking phase has scanned or the number of references the marking
3233     phase has visited reach a given limit. Additional invocations to
3234     the method clock have been planted in a few other strategic places
3235     too. The initial reason for the clock method was to avoid calling
3236     vtime too regularly, as it is quite expensive. So, once it was in
3237     place, it was natural to piggy-back all the other conditions on it
3238     too and not constantly check them throughout the code.
3239 
3240     If do_termination is true then do_marking_step will enter its
3241     termination protocol.
3242 
3243     The value of is_serial must be true when do_marking_step is being
3244     called serially (i.e. by the VMThread) and do_marking_step should
3245     skip any synchronization in the termination and overflow code.
3246     Examples include the serial remark code and the serial reference
3247     processing closures.
3248 
3249     The value of is_serial must be false when do_marking_step is
3250     being called by any of the worker threads in a work gang.
3251     Examples include the concurrent marking code (CMMarkingTask),
3252     the MT remark code, and the MT reference processing closures.
3253 
3254  *****************************************************************************/
3255 
3256 void CMTask::do_marking_step(double time_target_ms,
3257                              bool do_termination,
3258                              bool is_serial) {
3259   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3260   assert(concurrent() == _cm->concurrent(), "they should be the same");
3261 
3262   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3263   assert(_task_queues != NULL, "invariant");
3264   assert(_task_queue != NULL, "invariant");
3265   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3266 
3267   assert(!_claimed,
3268          "only one thread should claim this task at any one time");
3269 
3270   // OK, this doesn't safeguard again all possible scenarios, as it is
3271   // possible for two threads to set the _claimed flag at the same
3272   // time. But it is only for debugging purposes anyway and it will
3273   // catch most problems.
3274   _claimed = true;
3275 
3276   _start_time_ms = os::elapsedVTime() * 1000.0;
3277 
3278   // If do_stealing is true then do_marking_step will attempt to
3279   // steal work from the other CMTasks. It only makes sense to
3280   // enable stealing when the termination protocol is enabled
3281   // and do_marking_step() is not being called serially.
3282   bool do_stealing = do_termination && !is_serial;
3283 
3284   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3285   _time_target_ms = time_target_ms - diff_prediction_ms;
3286 
3287   // set up the variables that are used in the work-based scheme to
3288   // call the regular clock method
3289   _words_scanned = 0;
3290   _refs_reached  = 0;
3291   recalculate_limits();
3292 
3293   // clear all flags
3294   clear_has_aborted();
3295   _has_timed_out = false;
3296   _draining_satb_buffers = false;
3297 
3298   ++_calls;
3299 
3300   // Set up the bitmap and oop closures. Anything that uses them is
3301   // eventually called from this method, so it is OK to allocate these
3302   // statically.
3303   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3304   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3305   set_cm_oop_closure(&cm_oop_closure);
3306 
3307   if (_cm->has_overflown()) {
3308     // This can happen if the mark stack overflows during a GC pause
3309     // and this task, after a yield point, restarts. We have to abort
3310     // as we need to get into the overflow protocol which happens
3311     // right at the end of this task.
3312     set_has_aborted();
3313   }
3314 
3315   // First drain any available SATB buffers. After this, we will not
3316   // look at SATB buffers before the next invocation of this method.
3317   // If enough completed SATB buffers are queued up, the regular clock
3318   // will abort this task so that it restarts.
3319   drain_satb_buffers();
3320   // ...then partially drain the local queue and the global stack
3321   drain_local_queue(true);
3322   drain_global_stack(true);
3323 
3324   do {
3325     if (!has_aborted() && _curr_region != NULL) {
3326       // This means that we're already holding on to a region.
3327       assert(_finger != NULL, "if region is not NULL, then the finger "
3328              "should not be NULL either");
3329 
3330       // We might have restarted this task after an evacuation pause
3331       // which might have evacuated the region we're holding on to
3332       // underneath our feet. Let's read its limit again to make sure
3333       // that we do not iterate over a region of the heap that
3334       // contains garbage (update_region_limit() will also move
3335       // _finger to the start of the region if it is found empty).
3336       update_region_limit();
3337       // We will start from _finger not from the start of the region,
3338       // as we might be restarting this task after aborting half-way
3339       // through scanning this region. In this case, _finger points to
3340       // the address where we last found a marked object. If this is a
3341       // fresh region, _finger points to start().
3342       MemRegion mr = MemRegion(_finger, _region_limit);
3343 
3344       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3345              "humongous regions should go around loop once only");
3346 
3347       // Some special cases:
3348       // If the memory region is empty, we can just give up the region.
3349       // If the current region is humongous then we only need to check
3350       // the bitmap for the bit associated with the start of the object,
3351       // scan the object if it's live, and give up the region.
3352       // Otherwise, let's iterate over the bitmap of the part of the region
3353       // that is left.
3354       // If the iteration is successful, give up the region.
3355       if (mr.is_empty()) {
3356         giveup_current_region();
3357         regular_clock_call();
3358       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3359         if (_nextMarkBitMap->isMarked(mr.start())) {
3360           // The object is marked - apply the closure
3361           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3362           bitmap_closure.do_bit(offset);
3363         }
3364         // Even if this task aborted while scanning the humongous object
3365         // we can (and should) give up the current region.
3366         giveup_current_region();
3367         regular_clock_call();
3368       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3369         giveup_current_region();
3370         regular_clock_call();
3371       } else {
3372         assert(has_aborted(), "currently the only way to do so");
3373         // The only way to abort the bitmap iteration is to return
3374         // false from the do_bit() method. However, inside the
3375         // do_bit() method we move the _finger to point to the
3376         // object currently being looked at. So, if we bail out, we
3377         // have definitely set _finger to something non-null.
3378         assert(_finger != NULL, "invariant");
3379 
3380         // Region iteration was actually aborted. So now _finger
3381         // points to the address of the object we last scanned. If we
3382         // leave it there, when we restart this task, we will rescan
3383         // the object. It is easy to avoid this. We move the finger by
3384         // enough to point to the next possible object header (the
3385         // bitmap knows by how much we need to move it as it knows its
3386         // granularity).
3387         assert(_finger < _region_limit, "invariant");
3388         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3389         // Check if bitmap iteration was aborted while scanning the last object
3390         if (new_finger >= _region_limit) {
3391           giveup_current_region();
3392         } else {
3393           move_finger_to(new_finger);
3394         }
3395       }
3396     }
3397     // At this point we have either completed iterating over the
3398     // region we were holding on to, or we have aborted.
3399 
3400     // We then partially drain the local queue and the global stack.
3401     // (Do we really need this?)
3402     drain_local_queue(true);
3403     drain_global_stack(true);
3404 
3405     // Read the note on the claim_region() method on why it might
3406     // return NULL with potentially more regions available for
3407     // claiming and why we have to check out_of_regions() to determine
3408     // whether we're done or not.
3409     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3410       // We are going to try to claim a new region. We should have
3411       // given up on the previous one.
3412       // Separated the asserts so that we know which one fires.
3413       assert(_curr_region  == NULL, "invariant");
3414       assert(_finger       == NULL, "invariant");
3415       assert(_region_limit == NULL, "invariant");
3416       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3417       if (claimed_region != NULL) {
3418         // Yes, we managed to claim one
3419         setup_for_region(claimed_region);
3420         assert(_curr_region == claimed_region, "invariant");
3421       }
3422       // It is important to call the regular clock here. It might take
3423       // a while to claim a region if, for example, we hit a large
3424       // block of empty regions. So we need to call the regular clock
3425       // method once round the loop to make sure it's called
3426       // frequently enough.
3427       regular_clock_call();
3428     }
3429 
3430     if (!has_aborted() && _curr_region == NULL) {
3431       assert(_cm->out_of_regions(),
3432              "at this point we should be out of regions");
3433     }
3434   } while ( _curr_region != NULL && !has_aborted());
3435 
3436   if (!has_aborted()) {
3437     // We cannot check whether the global stack is empty, since other
3438     // tasks might be pushing objects to it concurrently.
3439     assert(_cm->out_of_regions(),
3440            "at this point we should be out of regions");
3441     // Try to reduce the number of available SATB buffers so that
3442     // remark has less work to do.
3443     drain_satb_buffers();
3444   }
3445 
3446   // Since we've done everything else, we can now totally drain the
3447   // local queue and global stack.
3448   drain_local_queue(false);
3449   drain_global_stack(false);
3450 
3451   // Attempt at work stealing from other task's queues.
3452   if (do_stealing && !has_aborted()) {
3453     // We have not aborted. This means that we have finished all that
3454     // we could. Let's try to do some stealing...
3455 
3456     // We cannot check whether the global stack is empty, since other
3457     // tasks might be pushing objects to it concurrently.
3458     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3459            "only way to reach here");
3460     while (!has_aborted()) {
3461       oop obj;
3462       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3463         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3464                "any stolen object should be marked");
3465         scan_object(obj);
3466 
3467         // And since we're towards the end, let's totally drain the
3468         // local queue and global stack.
3469         drain_local_queue(false);
3470         drain_global_stack(false);
3471       } else {
3472         break;
3473       }
3474     }
3475   }
3476 
3477   // If we are about to wrap up and go into termination, check if we
3478   // should raise the overflow flag.
3479   if (do_termination && !has_aborted()) {
3480     if (_cm->force_overflow()->should_force()) {
3481       _cm->set_has_overflown();
3482       regular_clock_call();
3483     }
3484   }
3485 
3486   // We still haven't aborted. Now, let's try to get into the
3487   // termination protocol.
3488   if (do_termination && !has_aborted()) {
3489     // We cannot check whether the global stack is empty, since other
3490     // tasks might be concurrently pushing objects on it.
3491     // Separated the asserts so that we know which one fires.
3492     assert(_cm->out_of_regions(), "only way to reach here");
3493     assert(_task_queue->size() == 0, "only way to reach here");
3494     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3495 
3496     // The CMTask class also extends the TerminatorTerminator class,
3497     // hence its should_exit_termination() method will also decide
3498     // whether to exit the termination protocol or not.
3499     bool finished = (is_serial ||
3500                      _cm->terminator()->offer_termination(this));
3501     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3502     _termination_time_ms +=
3503       termination_end_time_ms - _termination_start_time_ms;
3504 
3505     if (finished) {
3506       // We're all done.
3507 
3508       if (_worker_id == 0) {
3509         // let's allow task 0 to do this
3510         if (concurrent()) {
3511           assert(_cm->concurrent_marking_in_progress(), "invariant");
3512           // we need to set this to false before the next
3513           // safepoint. This way we ensure that the marking phase
3514           // doesn't observe any more heap expansions.
3515           _cm->clear_concurrent_marking_in_progress();
3516         }
3517       }
3518 
3519       // We can now guarantee that the global stack is empty, since
3520       // all other tasks have finished. We separated the guarantees so
3521       // that, if a condition is false, we can immediately find out
3522       // which one.
3523       guarantee(_cm->out_of_regions(), "only way to reach here");
3524       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3525       guarantee(_task_queue->size() == 0, "only way to reach here");
3526       guarantee(!_cm->has_overflown(), "only way to reach here");
3527       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3528     } else {
3529       // Apparently there's more work to do. Let's abort this task. It
3530       // will restart it and we can hopefully find more things to do.
3531       set_has_aborted();
3532     }
3533   }
3534 
3535   // Mainly for debugging purposes to make sure that a pointer to the
3536   // closure which was statically allocated in this frame doesn't
3537   // escape it by accident.
3538   set_cm_oop_closure(NULL);
3539   double end_time_ms = os::elapsedVTime() * 1000.0;
3540   double elapsed_time_ms = end_time_ms - _start_time_ms;
3541   // Update the step history.
3542   _step_times_ms.add(elapsed_time_ms);
3543 
3544   if (has_aborted()) {
3545     // The task was aborted for some reason.
3546     if (_has_timed_out) {
3547       double diff_ms = elapsed_time_ms - _time_target_ms;
3548       // Keep statistics of how well we did with respect to hitting
3549       // our target only if we actually timed out (if we aborted for
3550       // other reasons, then the results might get skewed).
3551       _marking_step_diffs_ms.add(diff_ms);
3552     }
3553 
3554     if (_cm->has_overflown()) {
3555       // This is the interesting one. We aborted because a global
3556       // overflow was raised. This means we have to restart the
3557       // marking phase and start iterating over regions. However, in
3558       // order to do this we have to make sure that all tasks stop
3559       // what they are doing and re-initialize in a safe manner. We
3560       // will achieve this with the use of two barrier sync points.
3561 
3562       if (!is_serial) {
3563         // We only need to enter the sync barrier if being called
3564         // from a parallel context
3565         _cm->enter_first_sync_barrier(_worker_id);
3566 
3567         // When we exit this sync barrier we know that all tasks have
3568         // stopped doing marking work. So, it's now safe to
3569         // re-initialize our data structures. At the end of this method,
3570         // task 0 will clear the global data structures.
3571       }
3572 
3573       // We clear the local state of this task...
3574       clear_region_fields();
3575 
3576       if (!is_serial) {
3577         // ...and enter the second barrier.
3578         _cm->enter_second_sync_barrier(_worker_id);
3579       }
3580       // At this point, if we're during the concurrent phase of
3581       // marking, everything has been re-initialized and we're
3582       // ready to restart.
3583     }
3584   }
3585 
3586   _claimed = false;
3587 }
3588 
3589 CMTask::CMTask(uint worker_id,
3590                ConcurrentMark* cm,
3591                size_t* marked_bytes,
3592                BitMap* card_bm,
3593                CMTaskQueue* task_queue,
3594                CMTaskQueueSet* task_queues)
3595   : _g1h(G1CollectedHeap::heap()),
3596     _worker_id(worker_id), _cm(cm),
3597     _claimed(false),
3598     _nextMarkBitMap(NULL), _hash_seed(17),
3599     _task_queue(task_queue),
3600     _task_queues(task_queues),
3601     _cm_oop_closure(NULL),
3602     _marked_bytes_array(marked_bytes),
3603     _card_bm(card_bm) {
3604   guarantee(task_queue != NULL, "invariant");
3605   guarantee(task_queues != NULL, "invariant");
3606 
3607   _marking_step_diffs_ms.add(0.5);
3608 }
3609 
3610 // These are formatting macros that are used below to ensure
3611 // consistent formatting. The *_H_* versions are used to format the
3612 // header for a particular value and they should be kept consistent
3613 // with the corresponding macro. Also note that most of the macros add
3614 // the necessary white space (as a prefix) which makes them a bit
3615 // easier to compose.
3616 
3617 // All the output lines are prefixed with this string to be able to
3618 // identify them easily in a large log file.
3619 #define G1PPRL_LINE_PREFIX            "###"
3620 
3621 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3622 #ifdef _LP64
3623 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3624 #else // _LP64
3625 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3626 #endif // _LP64
3627 
3628 // For per-region info
3629 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3630 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3631 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3632 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3633 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3634 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3635 
3636 // For summary info
3637 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3638 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3639 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3640 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3641 
3642 G1PrintRegionLivenessInfoClosure::
3643 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3644   : _total_used_bytes(0), _total_capacity_bytes(0),
3645     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3646     _hum_used_bytes(0), _hum_capacity_bytes(0),
3647     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3648     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3649   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3650   MemRegion g1_reserved = g1h->g1_reserved();
3651   double now = os::elapsedTime();
3652 
3653   // Print the header of the output.
3654   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3655   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3656                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3657                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3658                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3659                           HeapRegion::GrainBytes);
3660   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3661   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3662                           G1PPRL_TYPE_H_FORMAT
3663                           G1PPRL_ADDR_BASE_H_FORMAT
3664                           G1PPRL_BYTE_H_FORMAT
3665                           G1PPRL_BYTE_H_FORMAT
3666                           G1PPRL_BYTE_H_FORMAT
3667                           G1PPRL_DOUBLE_H_FORMAT
3668                           G1PPRL_BYTE_H_FORMAT
3669                           G1PPRL_BYTE_H_FORMAT,
3670                           "type", "address-range",
3671                           "used", "prev-live", "next-live", "gc-eff",
3672                           "remset", "code-roots");
3673   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3674                           G1PPRL_TYPE_H_FORMAT
3675                           G1PPRL_ADDR_BASE_H_FORMAT
3676                           G1PPRL_BYTE_H_FORMAT
3677                           G1PPRL_BYTE_H_FORMAT
3678                           G1PPRL_BYTE_H_FORMAT
3679                           G1PPRL_DOUBLE_H_FORMAT
3680                           G1PPRL_BYTE_H_FORMAT
3681                           G1PPRL_BYTE_H_FORMAT,
3682                           "", "",
3683                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3684                           "(bytes)", "(bytes)");
3685 }
3686 
3687 // It takes as a parameter a reference to one of the _hum_* fields, it
3688 // deduces the corresponding value for a region in a humongous region
3689 // series (either the region size, or what's left if the _hum_* field
3690 // is < the region size), and updates the _hum_* field accordingly.
3691 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3692   size_t bytes = 0;
3693   // The > 0 check is to deal with the prev and next live bytes which
3694   // could be 0.
3695   if (*hum_bytes > 0) {
3696     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3697     *hum_bytes -= bytes;
3698   }
3699   return bytes;
3700 }
3701 
3702 // It deduces the values for a region in a humongous region series
3703 // from the _hum_* fields and updates those accordingly. It assumes
3704 // that that _hum_* fields have already been set up from the "starts
3705 // humongous" region and we visit the regions in address order.
3706 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3707                                                      size_t* capacity_bytes,
3708                                                      size_t* prev_live_bytes,
3709                                                      size_t* next_live_bytes) {
3710   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3711   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3712   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3713   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3714   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3715 }
3716 
3717 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3718   const char* type       = r->get_type_str();
3719   HeapWord* bottom       = r->bottom();
3720   HeapWord* end          = r->end();
3721   size_t capacity_bytes  = r->capacity();
3722   size_t used_bytes      = r->used();
3723   size_t prev_live_bytes = r->live_bytes();
3724   size_t next_live_bytes = r->next_live_bytes();
3725   double gc_eff          = r->gc_efficiency();
3726   size_t remset_bytes    = r->rem_set()->mem_size();
3727   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3728 
3729   if (r->is_starts_humongous()) {
3730     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3731            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3732            "they should have been zeroed after the last time we used them");
3733     // Set up the _hum_* fields.
3734     _hum_capacity_bytes  = capacity_bytes;
3735     _hum_used_bytes      = used_bytes;
3736     _hum_prev_live_bytes = prev_live_bytes;
3737     _hum_next_live_bytes = next_live_bytes;
3738     get_hum_bytes(&used_bytes, &capacity_bytes,
3739                   &prev_live_bytes, &next_live_bytes);
3740     end = bottom + HeapRegion::GrainWords;
3741   } else if (r->is_continues_humongous()) {
3742     get_hum_bytes(&used_bytes, &capacity_bytes,
3743                   &prev_live_bytes, &next_live_bytes);
3744     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3745   }
3746 
3747   _total_used_bytes      += used_bytes;
3748   _total_capacity_bytes  += capacity_bytes;
3749   _total_prev_live_bytes += prev_live_bytes;
3750   _total_next_live_bytes += next_live_bytes;
3751   _total_remset_bytes    += remset_bytes;
3752   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3753 
3754   // Print a line for this particular region.
3755   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3756                           G1PPRL_TYPE_FORMAT
3757                           G1PPRL_ADDR_BASE_FORMAT
3758                           G1PPRL_BYTE_FORMAT
3759                           G1PPRL_BYTE_FORMAT
3760                           G1PPRL_BYTE_FORMAT
3761                           G1PPRL_DOUBLE_FORMAT
3762                           G1PPRL_BYTE_FORMAT
3763                           G1PPRL_BYTE_FORMAT,
3764                           type, p2i(bottom), p2i(end),
3765                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3766                           remset_bytes, strong_code_roots_bytes);
3767 
3768   return false;
3769 }
3770 
3771 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3772   // add static memory usages to remembered set sizes
3773   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3774   // Print the footer of the output.
3775   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3776   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3777                          " SUMMARY"
3778                          G1PPRL_SUM_MB_FORMAT("capacity")
3779                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3780                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3781                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3782                          G1PPRL_SUM_MB_FORMAT("remset")
3783                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3784                          bytes_to_mb(_total_capacity_bytes),
3785                          bytes_to_mb(_total_used_bytes),
3786                          perc(_total_used_bytes, _total_capacity_bytes),
3787                          bytes_to_mb(_total_prev_live_bytes),
3788                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3789                          bytes_to_mb(_total_next_live_bytes),
3790                          perc(_total_next_live_bytes, _total_capacity_bytes),
3791                          bytes_to_mb(_total_remset_bytes),
3792                          bytes_to_mb(_total_strong_code_roots_bytes));
3793 }