1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1RemSet.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionManager.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 CMBitMapRO::CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   assert(limit != NULL, "limit must not be NULL");
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 #ifndef PRODUCT
  87 bool CMBitMapRO::covers(MemRegion heap_rs) const {
  88   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  89   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  90          "size inconsistency");
  91   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  92          _bmWordSize  == heap_rs.word_size();
  93 }
  94 #endif
  95 
  96 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  97   _bm.print_on_error(st, prefix);
  98 }
  99 
 100 size_t CMBitMap::compute_size(size_t heap_size) {
 101   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 102 }
 103 
 104 size_t CMBitMap::mark_distance() {
 105   return MinObjAlignmentInBytes * BitsPerByte;
 106 }
 107 
 108 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 109   _bmStartWord = heap.start();
 110   _bmWordSize = heap.word_size();
 111 
 112   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 113   _bm.set_size(_bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clearRange(mr);
 125 }
 126 
 127 // Closure used for clearing the given mark bitmap.
 128 class ClearBitmapHRClosure : public HeapRegionClosure {
 129  private:
 130   ConcurrentMark* _cm;
 131   CMBitMap* _bitmap;
 132   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 133  public:
 134   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 135     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 136   }
 137 
 138   virtual bool doHeapRegion(HeapRegion* r) {
 139     size_t const chunk_size_in_words = M / HeapWordSize;
 140 
 141     HeapWord* cur = r->bottom();
 142     HeapWord* const end = r->end();
 143 
 144     while (cur < end) {
 145       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 146       _bitmap->clearRange(mr);
 147 
 148       cur += chunk_size_in_words;
 149 
 150       // Abort iteration if after yielding the marking has been aborted.
 151       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 152         return true;
 153       }
 154       // Repeat the asserts from before the start of the closure. We will do them
 155       // as asserts here to minimize their overhead on the product. However, we
 156       // will have them as guarantees at the beginning / end of the bitmap
 157       // clearing to get some checking in the product.
 158       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 159       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 160     }
 161 
 162     return false;
 163   }
 164 };
 165 
 166 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 167   ClearBitmapHRClosure* _cl;
 168   HeapRegionClaimer     _hrclaimer;
 169   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 170 
 171 public:
 172   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 173       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 174 
 175   void work(uint worker_id) {
 176     SuspendibleThreadSetJoiner sts_join(_suspendible);
 177     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 178   }
 179 };
 180 
 181 void CMBitMap::clearAll() {
 182   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 183   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 184   uint n_workers = g1h->workers()->active_workers();
 185   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 186   g1h->workers()->run_task(&task);
 187   guarantee(cl.complete(), "Must have completed iteration.");
 188   return;
 189 }
 190 
 191 void CMBitMap::clearRange(MemRegion mr) {
 192   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 193   assert(!mr.is_empty(), "unexpected empty region");
 194   // convert address range into offset range
 195   _bm.at_put_range(heapWordToOffset(mr.start()),
 196                    heapWordToOffset(mr.end()), false);
 197 }
 198 
 199 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 200   _base(NULL), _cm(cm)
 201 {}
 202 
 203 bool CMMarkStack::allocate(size_t capacity) {
 204   // allocate a stack of the requisite depth
 205   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 206   if (!rs.is_reserved()) {
 207     warning("ConcurrentMark MarkStack allocation failure");
 208     return false;
 209   }
 210   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 211   if (!_virtual_space.initialize(rs, rs.size())) {
 212     warning("ConcurrentMark MarkStack backing store failure");
 213     // Release the virtual memory reserved for the marking stack
 214     rs.release();
 215     return false;
 216   }
 217   assert(_virtual_space.committed_size() == rs.size(),
 218          "Didn't reserve backing store for all of ConcurrentMark stack?");
 219   _base = (oop*) _virtual_space.low();
 220   setEmpty();
 221   _capacity = (jint) capacity;
 222   _saved_index = -1;
 223   _should_expand = false;
 224   return true;
 225 }
 226 
 227 void CMMarkStack::expand() {
 228   // Called, during remark, if we've overflown the marking stack during marking.
 229   assert(isEmpty(), "stack should been emptied while handling overflow");
 230   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 231   // Clear expansion flag
 232   _should_expand = false;
 233   if (_capacity == (jint) MarkStackSizeMax) {
 234     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 235     return;
 236   }
 237   // Double capacity if possible
 238   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 239   // Do not give up existing stack until we have managed to
 240   // get the double capacity that we desired.
 241   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 242                                                            sizeof(oop)));
 243   if (rs.is_reserved()) {
 244     // Release the backing store associated with old stack
 245     _virtual_space.release();
 246     // Reinitialize virtual space for new stack
 247     if (!_virtual_space.initialize(rs, rs.size())) {
 248       fatal("Not enough swap for expanded marking stack capacity");
 249     }
 250     _base = (oop*)(_virtual_space.low());
 251     _index = 0;
 252     _capacity = new_capacity;
 253   } else {
 254     // Failed to double capacity, continue;
 255     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 256                   _capacity / K, new_capacity / K);
 257   }
 258 }
 259 
 260 void CMMarkStack::set_should_expand() {
 261   // If we're resetting the marking state because of an
 262   // marking stack overflow, record that we should, if
 263   // possible, expand the stack.
 264   _should_expand = _cm->has_overflown();
 265 }
 266 
 267 CMMarkStack::~CMMarkStack() {
 268   if (_base != NULL) {
 269     _base = NULL;
 270     _virtual_space.release();
 271   }
 272 }
 273 
 274 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 275   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 276   jint start = _index;
 277   jint next_index = start + n;
 278   if (next_index > _capacity) {
 279     _overflow = true;
 280     return;
 281   }
 282   // Otherwise.
 283   _index = next_index;
 284   for (int i = 0; i < n; i++) {
 285     int ind = start + i;
 286     assert(ind < _capacity, "By overflow test above.");
 287     _base[ind] = ptr_arr[i];
 288   }
 289 }
 290 
 291 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 292   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 293   jint index = _index;
 294   if (index == 0) {
 295     *n = 0;
 296     return false;
 297   } else {
 298     int k = MIN2(max, index);
 299     jint  new_ind = index - k;
 300     for (int j = 0; j < k; j++) {
 301       ptr_arr[j] = _base[new_ind + j];
 302     }
 303     _index = new_ind;
 304     *n = k;
 305     return true;
 306   }
 307 }
 308 
 309 void CMMarkStack::note_start_of_gc() {
 310   assert(_saved_index == -1,
 311          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 312   _saved_index = _index;
 313 }
 314 
 315 void CMMarkStack::note_end_of_gc() {
 316   // This is intentionally a guarantee, instead of an assert. If we
 317   // accidentally add something to the mark stack during GC, it
 318   // will be a correctness issue so it's better if we crash. we'll
 319   // only check this once per GC anyway, so it won't be a performance
 320   // issue in any way.
 321   guarantee(_saved_index == _index,
 322             "saved index: %d index: %d", _saved_index, _index);
 323   _saved_index = -1;
 324 }
 325 
 326 CMRootRegions::CMRootRegions() :
 327   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 328   _should_abort(false),  _next_survivor(NULL) { }
 329 
 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 331   _young_list = g1h->young_list();
 332   _cm = cm;
 333 }
 334 
 335 void CMRootRegions::prepare_for_scan() {
 336   assert(!scan_in_progress(), "pre-condition");
 337 
 338   // Currently, only survivors can be root regions.
 339   assert(_next_survivor == NULL, "pre-condition");
 340   _next_survivor = _young_list->first_survivor_region();
 341   _scan_in_progress = (_next_survivor != NULL);
 342   _should_abort = false;
 343 }
 344 
 345 HeapRegion* CMRootRegions::claim_next() {
 346   if (_should_abort) {
 347     // If someone has set the should_abort flag, we return NULL to
 348     // force the caller to bail out of their loop.
 349     return NULL;
 350   }
 351 
 352   // Currently, only survivors can be root regions.
 353   HeapRegion* res = _next_survivor;
 354   if (res != NULL) {
 355     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 356     // Read it again in case it changed while we were waiting for the lock.
 357     res = _next_survivor;
 358     if (res != NULL) {
 359       if (res == _young_list->last_survivor_region()) {
 360         // We just claimed the last survivor so store NULL to indicate
 361         // that we're done.
 362         _next_survivor = NULL;
 363       } else {
 364         _next_survivor = res->get_next_young_region();
 365       }
 366     } else {
 367       // Someone else claimed the last survivor while we were trying
 368       // to take the lock so nothing else to do.
 369     }
 370   }
 371   assert(res == NULL || res->is_survivor(), "post-condition");
 372 
 373   return res;
 374 }
 375 
 376 void CMRootRegions::scan_finished() {
 377   assert(scan_in_progress(), "pre-condition");
 378 
 379   // Currently, only survivors can be root regions.
 380   if (!_should_abort) {
 381     assert(_next_survivor == NULL, "we should have claimed all survivors");
 382   }
 383   _next_survivor = NULL;
 384 
 385   {
 386     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 387     _scan_in_progress = false;
 388     RootRegionScan_lock->notify_all();
 389   }
 390 }
 391 
 392 bool CMRootRegions::wait_until_scan_finished() {
 393   if (!scan_in_progress()) return false;
 394 
 395   {
 396     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 397     while (scan_in_progress()) {
 398       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 399     }
 400   }
 401   return true;
 402 }
 403 
 404 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 405   return MAX2((n_par_threads + 2) / 4, 1U);
 406 }
 407 
 408 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 409   _g1h(g1h),
 410   _markBitMap1(),
 411   _markBitMap2(),
 412   _parallel_marking_threads(0),
 413   _max_parallel_marking_threads(0),
 414   _sleep_factor(0.0),
 415   _marking_task_overhead(1.0),
 416   _cleanup_list("Cleanup List"),
 417   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 418   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 419             CardTableModRefBS::card_shift,
 420             false /* in_resource_area*/),
 421 
 422   _prevMarkBitMap(&_markBitMap1),
 423   _nextMarkBitMap(&_markBitMap2),
 424 
 425   _markStack(this),
 426   // _finger set in set_non_marking_state
 427 
 428   _max_worker_id(ParallelGCThreads),
 429   // _active_tasks set in set_non_marking_state
 430   // _tasks set inside the constructor
 431   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 432   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 433 
 434   _has_overflown(false),
 435   _concurrent(false),
 436   _has_aborted(false),
 437   _restart_for_overflow(false),
 438   _concurrent_marking_in_progress(false),
 439   _concurrent_phase_started(false),
 440 
 441   // _verbose_level set below
 442 
 443   _init_times(),
 444   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 445   _cleanup_times(),
 446   _total_counting_time(0.0),
 447   _total_rs_scrub_time(0.0),
 448 
 449   _parallel_workers(NULL),
 450 
 451   _count_card_bitmaps(NULL),
 452   _count_marked_bytes(NULL),
 453   _completed_initialization(false) {
 454 
 455   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 456   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 457 
 458   // Create & start a ConcurrentMark thread.
 459   _cmThread = new ConcurrentMarkThread(this);
 460   assert(cmThread() != NULL, "CM Thread should have been created");
 461   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 462   if (_cmThread->osthread() == NULL) {
 463       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 464   }
 465 
 466   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 467   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 468   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 469 
 470   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 471   satb_qs.set_buffer_size(G1SATBBufferSize);
 472 
 473   _root_regions.init(_g1h, this);
 474 
 475   if (ConcGCThreads > ParallelGCThreads) {
 476     warning("Can't have more ConcGCThreads (%u) "
 477             "than ParallelGCThreads (%u).",
 478             ConcGCThreads, ParallelGCThreads);
 479     return;
 480   }
 481   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 482     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 483     // if both are set
 484     _sleep_factor             = 0.0;
 485     _marking_task_overhead    = 1.0;
 486   } else if (G1MarkingOverheadPercent > 0) {
 487     // We will calculate the number of parallel marking threads based
 488     // on a target overhead with respect to the soft real-time goal
 489     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 490     double overall_cm_overhead =
 491       (double) MaxGCPauseMillis * marking_overhead /
 492       (double) GCPauseIntervalMillis;
 493     double cpu_ratio = 1.0 / (double) os::processor_count();
 494     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 495     double marking_task_overhead =
 496       overall_cm_overhead / marking_thread_num *
 497                                               (double) os::processor_count();
 498     double sleep_factor =
 499                        (1.0 - marking_task_overhead) / marking_task_overhead;
 500 
 501     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 502     _sleep_factor             = sleep_factor;
 503     _marking_task_overhead    = marking_task_overhead;
 504   } else {
 505     // Calculate the number of parallel marking threads by scaling
 506     // the number of parallel GC threads.
 507     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 508     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 509     _sleep_factor             = 0.0;
 510     _marking_task_overhead    = 1.0;
 511   }
 512 
 513   assert(ConcGCThreads > 0, "Should have been set");
 514   _parallel_marking_threads = ConcGCThreads;
 515   _max_parallel_marking_threads = _parallel_marking_threads;
 516 
 517   _parallel_workers = new WorkGang("G1 Marker",
 518        _max_parallel_marking_threads, false, true);
 519   if (_parallel_workers == NULL) {
 520     vm_exit_during_initialization("Failed necessary allocation.");
 521   } else {
 522     _parallel_workers->initialize_workers();
 523   }
 524 
 525   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 526     size_t mark_stack_size =
 527       MIN2(MarkStackSizeMax,
 528           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 529     // Verify that the calculated value for MarkStackSize is in range.
 530     // It would be nice to use the private utility routine from Arguments.
 531     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 532       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 533               "must be between 1 and " SIZE_FORMAT,
 534               mark_stack_size, MarkStackSizeMax);
 535       return;
 536     }
 537     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 538   } else {
 539     // Verify MarkStackSize is in range.
 540     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 541       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 542         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 543           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 544                   "must be between 1 and " SIZE_FORMAT,
 545                   MarkStackSize, MarkStackSizeMax);
 546           return;
 547         }
 548       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 549         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 550           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 551                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 552                   MarkStackSize, MarkStackSizeMax);
 553           return;
 554         }
 555       }
 556     }
 557   }
 558 
 559   if (!_markStack.allocate(MarkStackSize)) {
 560     warning("Failed to allocate CM marking stack");
 561     return;
 562   }
 563 
 564   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 565   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 566 
 567   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 568   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 569 
 570   BitMap::idx_t card_bm_size = _card_bm.size();
 571 
 572   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 573   _active_tasks = _max_worker_id;
 574 
 575   uint max_regions = _g1h->max_regions();
 576   for (uint i = 0; i < _max_worker_id; ++i) {
 577     CMTaskQueue* task_queue = new CMTaskQueue();
 578     task_queue->initialize();
 579     _task_queues->register_queue(i, task_queue);
 580 
 581     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 582     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 583 
 584     _tasks[i] = new CMTask(i, this,
 585                            _count_marked_bytes[i],
 586                            &_count_card_bitmaps[i],
 587                            task_queue, _task_queues);
 588 
 589     _accum_task_vtime[i] = 0.0;
 590   }
 591 
 592   // Calculate the card number for the bottom of the heap. Used
 593   // in biasing indexes into the accounting card bitmaps.
 594   _heap_bottom_card_num =
 595     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 596                                 CardTableModRefBS::card_shift);
 597 
 598   // Clear all the liveness counting data
 599   clear_all_count_data();
 600 
 601   // so that the call below can read a sensible value
 602   _heap_start = g1h->reserved_region().start();
 603   set_non_marking_state();
 604   _completed_initialization = true;
 605 }
 606 
 607 void ConcurrentMark::reset() {
 608   // Starting values for these two. This should be called in a STW
 609   // phase.
 610   MemRegion reserved = _g1h->g1_reserved();
 611   _heap_start = reserved.start();
 612   _heap_end   = reserved.end();
 613 
 614   // Separated the asserts so that we know which one fires.
 615   assert(_heap_start != NULL, "heap bounds should look ok");
 616   assert(_heap_end != NULL, "heap bounds should look ok");
 617   assert(_heap_start < _heap_end, "heap bounds should look ok");
 618 
 619   // Reset all the marking data structures and any necessary flags
 620   reset_marking_state();
 621 
 622   // We do reset all of them, since different phases will use
 623   // different number of active threads. So, it's easiest to have all
 624   // of them ready.
 625   for (uint i = 0; i < _max_worker_id; ++i) {
 626     _tasks[i]->reset(_nextMarkBitMap);
 627   }
 628 
 629   // we need this to make sure that the flag is on during the evac
 630   // pause with initial mark piggy-backed
 631   set_concurrent_marking_in_progress();
 632 }
 633 
 634 
 635 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 636   _markStack.set_should_expand();
 637   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 638   if (clear_overflow) {
 639     clear_has_overflown();
 640   } else {
 641     assert(has_overflown(), "pre-condition");
 642   }
 643   _finger = _heap_start;
 644 
 645   for (uint i = 0; i < _max_worker_id; ++i) {
 646     CMTaskQueue* queue = _task_queues->queue(i);
 647     queue->set_empty();
 648   }
 649 }
 650 
 651 void ConcurrentMark::set_concurrency(uint active_tasks) {
 652   assert(active_tasks <= _max_worker_id, "we should not have more");
 653 
 654   _active_tasks = active_tasks;
 655   // Need to update the three data structures below according to the
 656   // number of active threads for this phase.
 657   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 658   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 660 }
 661 
 662 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 663   set_concurrency(active_tasks);
 664 
 665   _concurrent = concurrent;
 666   // We propagate this to all tasks, not just the active ones.
 667   for (uint i = 0; i < _max_worker_id; ++i)
 668     _tasks[i]->set_concurrent(concurrent);
 669 
 670   if (concurrent) {
 671     set_concurrent_marking_in_progress();
 672   } else {
 673     // We currently assume that the concurrent flag has been set to
 674     // false before we start remark. At this point we should also be
 675     // in a STW phase.
 676     assert(!concurrent_marking_in_progress(), "invariant");
 677     assert(out_of_regions(),
 678            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 679            p2i(_finger), p2i(_heap_end));
 680   }
 681 }
 682 
 683 void ConcurrentMark::set_non_marking_state() {
 684   // We set the global marking state to some default values when we're
 685   // not doing marking.
 686   reset_marking_state();
 687   _active_tasks = 0;
 688   clear_concurrent_marking_in_progress();
 689 }
 690 
 691 ConcurrentMark::~ConcurrentMark() {
 692   // The ConcurrentMark instance is never freed.
 693   ShouldNotReachHere();
 694 }
 695 
 696 void ConcurrentMark::clearNextBitmap() {
 697   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 698 
 699   // Make sure that the concurrent mark thread looks to still be in
 700   // the current cycle.
 701   guarantee(cmThread()->during_cycle(), "invariant");
 702 
 703   // We are finishing up the current cycle by clearing the next
 704   // marking bitmap and getting it ready for the next cycle. During
 705   // this time no other cycle can start. So, let's make sure that this
 706   // is the case.
 707   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 708 
 709   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 710   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 711   _parallel_workers->run_task(&task);
 712 
 713   // Clear the liveness counting data. If the marking has been aborted, the abort()
 714   // call already did that.
 715   if (cl.complete()) {
 716     clear_all_count_data();
 717   }
 718 
 719   // Repeat the asserts from above.
 720   guarantee(cmThread()->during_cycle(), "invariant");
 721   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 722 }
 723 
 724 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 725   CMBitMap* _bitmap;
 726   bool _error;
 727  public:
 728   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 729   }
 730 
 731   virtual bool doHeapRegion(HeapRegion* r) {
 732     // This closure can be called concurrently to the mutator, so we must make sure
 733     // that the result of the getNextMarkedWordAddress() call is compared to the
 734     // value passed to it as limit to detect any found bits.
 735     // end never changes in G1.
 736     HeapWord* end = r->end();
 737     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 738   }
 739 };
 740 
 741 bool ConcurrentMark::nextMarkBitmapIsClear() {
 742   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 743   _g1h->heap_region_iterate(&cl);
 744   return cl.complete();
 745 }
 746 
 747 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 748 public:
 749   bool doHeapRegion(HeapRegion* r) {
 750     r->note_start_of_marking();
 751     return false;
 752   }
 753 };
 754 
 755 void ConcurrentMark::checkpointRootsInitialPre() {
 756   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 757   G1CollectorPolicy* g1p = g1h->g1_policy();
 758 
 759   _has_aborted = false;
 760 
 761   // Initialize marking structures. This has to be done in a STW phase.
 762   reset();
 763 
 764   // For each region note start of marking.
 765   NoteStartOfMarkHRClosure startcl;
 766   g1h->heap_region_iterate(&startcl);
 767 }
 768 
 769 
 770 void ConcurrentMark::checkpointRootsInitialPost() {
 771   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 772 
 773   // Start Concurrent Marking weak-reference discovery.
 774   ReferenceProcessor* rp = g1h->ref_processor_cm();
 775   // enable ("weak") refs discovery
 776   rp->enable_discovery();
 777   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 778 
 779   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 780   // This is the start of  the marking cycle, we're expected all
 781   // threads to have SATB queues with active set to false.
 782   satb_mq_set.set_active_all_threads(true, /* new active value */
 783                                      false /* expected_active */);
 784 
 785   _root_regions.prepare_for_scan();
 786 
 787   // update_g1_committed() will be called at the end of an evac pause
 788   // when marking is on. So, it's also called at the end of the
 789   // initial-mark pause to update the heap end, if the heap expands
 790   // during it. No need to call it here.
 791 }
 792 
 793 /*
 794  * Notice that in the next two methods, we actually leave the STS
 795  * during the barrier sync and join it immediately afterwards. If we
 796  * do not do this, the following deadlock can occur: one thread could
 797  * be in the barrier sync code, waiting for the other thread to also
 798  * sync up, whereas another one could be trying to yield, while also
 799  * waiting for the other threads to sync up too.
 800  *
 801  * Note, however, that this code is also used during remark and in
 802  * this case we should not attempt to leave / enter the STS, otherwise
 803  * we'll either hit an assert (debug / fastdebug) or deadlock
 804  * (product). So we should only leave / enter the STS if we are
 805  * operating concurrently.
 806  *
 807  * Because the thread that does the sync barrier has left the STS, it
 808  * is possible to be suspended for a Full GC or an evacuation pause
 809  * could occur. This is actually safe, since the entering the sync
 810  * barrier is one of the last things do_marking_step() does, and it
 811  * doesn't manipulate any data structures afterwards.
 812  */
 813 
 814 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 815   bool barrier_aborted;
 816   {
 817     SuspendibleThreadSetLeaver sts_leave(concurrent());
 818     barrier_aborted = !_first_overflow_barrier_sync.enter();
 819   }
 820 
 821   // at this point everyone should have synced up and not be doing any
 822   // more work
 823 
 824   if (barrier_aborted) {
 825     // If the barrier aborted we ignore the overflow condition and
 826     // just abort the whole marking phase as quickly as possible.
 827     return;
 828   }
 829 
 830   // If we're executing the concurrent phase of marking, reset the marking
 831   // state; otherwise the marking state is reset after reference processing,
 832   // during the remark pause.
 833   // If we reset here as a result of an overflow during the remark we will
 834   // see assertion failures from any subsequent set_concurrency_and_phase()
 835   // calls.
 836   if (concurrent()) {
 837     // let the task associated with with worker 0 do this
 838     if (worker_id == 0) {
 839       // task 0 is responsible for clearing the global data structures
 840       // We should be here because of an overflow. During STW we should
 841       // not clear the overflow flag since we rely on it being true when
 842       // we exit this method to abort the pause and restart concurrent
 843       // marking.
 844       reset_marking_state(true /* clear_overflow */);
 845 
 846       log_info(gc)("Concurrent Mark reset for overflow");
 847     }
 848   }
 849 
 850   // after this, each task should reset its own data structures then
 851   // then go into the second barrier
 852 }
 853 
 854 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 855   SuspendibleThreadSetLeaver sts_leave(concurrent());
 856   _second_overflow_barrier_sync.enter();
 857 
 858   // at this point everything should be re-initialized and ready to go
 859 }
 860 
 861 class CMConcurrentMarkingTask: public AbstractGangTask {
 862 private:
 863   ConcurrentMark*       _cm;
 864   ConcurrentMarkThread* _cmt;
 865 
 866 public:
 867   void work(uint worker_id) {
 868     assert(Thread::current()->is_ConcurrentGC_thread(),
 869            "this should only be done by a conc GC thread");
 870     ResourceMark rm;
 871 
 872     double start_vtime = os::elapsedVTime();
 873 
 874     {
 875       SuspendibleThreadSetJoiner sts_join;
 876 
 877       assert(worker_id < _cm->active_tasks(), "invariant");
 878       CMTask* the_task = _cm->task(worker_id);
 879       the_task->record_start_time();
 880       if (!_cm->has_aborted()) {
 881         do {
 882           double start_vtime_sec = os::elapsedVTime();
 883           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 884 
 885           the_task->do_marking_step(mark_step_duration_ms,
 886                                     true  /* do_termination */,
 887                                     false /* is_serial*/);
 888 
 889           double end_vtime_sec = os::elapsedVTime();
 890           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 891           _cm->clear_has_overflown();
 892 
 893           _cm->do_yield_check(worker_id);
 894 
 895           jlong sleep_time_ms;
 896           if (!_cm->has_aborted() && the_task->has_aborted()) {
 897             sleep_time_ms =
 898               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 899             {
 900               SuspendibleThreadSetLeaver sts_leave;
 901               os::sleep(Thread::current(), sleep_time_ms, false);
 902             }
 903           }
 904         } while (!_cm->has_aborted() && the_task->has_aborted());
 905       }
 906       the_task->record_end_time();
 907       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 908     }
 909 
 910     double end_vtime = os::elapsedVTime();
 911     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 912   }
 913 
 914   CMConcurrentMarkingTask(ConcurrentMark* cm,
 915                           ConcurrentMarkThread* cmt) :
 916       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 917 
 918   ~CMConcurrentMarkingTask() { }
 919 };
 920 
 921 // Calculates the number of active workers for a concurrent
 922 // phase.
 923 uint ConcurrentMark::calc_parallel_marking_threads() {
 924   uint n_conc_workers = 0;
 925   if (!UseDynamicNumberOfGCThreads ||
 926       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 927        !ForceDynamicNumberOfGCThreads)) {
 928     n_conc_workers = max_parallel_marking_threads();
 929   } else {
 930     n_conc_workers =
 931       AdaptiveSizePolicy::calc_default_active_workers(
 932                                    max_parallel_marking_threads(),
 933                                    1, /* Minimum workers */
 934                                    parallel_marking_threads(),
 935                                    Threads::number_of_non_daemon_threads());
 936     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 937     // that scaling has already gone into "_max_parallel_marking_threads".
 938   }
 939   assert(n_conc_workers > 0, "Always need at least 1");
 940   return n_conc_workers;
 941 }
 942 
 943 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 944   // Currently, only survivors can be root regions.
 945   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 946   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 947 
 948   const uintx interval = PrefetchScanIntervalInBytes;
 949   HeapWord* curr = hr->bottom();
 950   const HeapWord* end = hr->top();
 951   while (curr < end) {
 952     Prefetch::read(curr, interval);
 953     oop obj = oop(curr);
 954     int size = obj->oop_iterate_size(&cl);
 955     assert(size == obj->size(), "sanity");
 956     curr += size;
 957   }
 958 }
 959 
 960 class CMRootRegionScanTask : public AbstractGangTask {
 961 private:
 962   ConcurrentMark* _cm;
 963 
 964 public:
 965   CMRootRegionScanTask(ConcurrentMark* cm) :
 966     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 967 
 968   void work(uint worker_id) {
 969     assert(Thread::current()->is_ConcurrentGC_thread(),
 970            "this should only be done by a conc GC thread");
 971 
 972     CMRootRegions* root_regions = _cm->root_regions();
 973     HeapRegion* hr = root_regions->claim_next();
 974     while (hr != NULL) {
 975       _cm->scanRootRegion(hr, worker_id);
 976       hr = root_regions->claim_next();
 977     }
 978   }
 979 };
 980 
 981 void ConcurrentMark::scanRootRegions() {
 982   // Start of concurrent marking.
 983   ClassLoaderDataGraph::clear_claimed_marks();
 984 
 985   // scan_in_progress() will have been set to true only if there was
 986   // at least one root region to scan. So, if it's false, we
 987   // should not attempt to do any further work.
 988   if (root_regions()->scan_in_progress()) {
 989     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 990 
 991     _parallel_marking_threads = calc_parallel_marking_threads();
 992     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 993            "Maximum number of marking threads exceeded");
 994     uint active_workers = MAX2(1U, parallel_marking_threads());
 995 
 996     CMRootRegionScanTask task(this);
 997     _parallel_workers->set_active_workers(active_workers);
 998     _parallel_workers->run_task(&task);
 999 
1000     // It's possible that has_aborted() is true here without actually
1001     // aborting the survivor scan earlier. This is OK as it's
1002     // mainly used for sanity checking.
1003     root_regions()->scan_finished();
1004   }
1005 }
1006 
1007 void ConcurrentMark::register_concurrent_phase_start(const char* title) {
1008   assert(!_concurrent_phase_started, "Sanity");
1009   _concurrent_phase_started = true;
1010   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
1011 }
1012 
1013 void ConcurrentMark::register_concurrent_phase_end() {
1014   if (_concurrent_phase_started) {
1015     _concurrent_phase_started = false;
1016     _g1h->gc_timer_cm()->register_gc_concurrent_end();
1017   }
1018 }
1019 
1020 void ConcurrentMark::markFromRoots() {
1021   // we might be tempted to assert that:
1022   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1023   //        "inconsistent argument?");
1024   // However that wouldn't be right, because it's possible that
1025   // a safepoint is indeed in progress as a younger generation
1026   // stop-the-world GC happens even as we mark in this generation.
1027 
1028   _restart_for_overflow = false;
1029 
1030   // _g1h has _n_par_threads
1031   _parallel_marking_threads = calc_parallel_marking_threads();
1032   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1033     "Maximum number of marking threads exceeded");
1034 
1035   uint active_workers = MAX2(1U, parallel_marking_threads());
1036   assert(active_workers > 0, "Should have been set");
1037 
1038   // Parallel task terminator is set in "set_concurrency_and_phase()"
1039   set_concurrency_and_phase(active_workers, true /* concurrent */);
1040 
1041   CMConcurrentMarkingTask markingTask(this, cmThread());
1042   _parallel_workers->set_active_workers(active_workers);
1043   _parallel_workers->run_task(&markingTask);
1044 
1045   print_stats();
1046 }
1047 
1048 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1049   // world is stopped at this checkpoint
1050   assert(SafepointSynchronize::is_at_safepoint(),
1051          "world should be stopped");
1052 
1053   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1054 
1055   // If a full collection has happened, we shouldn't do this.
1056   if (has_aborted()) {
1057     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1058     return;
1059   }
1060 
1061   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1062 
1063   if (VerifyDuringGC) {
1064     HandleMark hm;  // handle scope
1065     g1h->prepare_for_verify();
1066     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1067   }
1068   g1h->check_bitmaps("Remark Start");
1069 
1070   G1CollectorPolicy* g1p = g1h->g1_policy();
1071   g1p->record_concurrent_mark_remark_start();
1072 
1073   double start = os::elapsedTime();
1074 
1075   checkpointRootsFinalWork();
1076 
1077   double mark_work_end = os::elapsedTime();
1078 
1079   weakRefsWork(clear_all_soft_refs);
1080 
1081   if (has_overflown()) {
1082     // Oops.  We overflowed.  Restart concurrent marking.
1083     _restart_for_overflow = true;
1084     log_develop_trace(gc)("Remark led to restart for overflow.");
1085 
1086     // Verify the heap w.r.t. the previous marking bitmap.
1087     if (VerifyDuringGC) {
1088       HandleMark hm;  // handle scope
1089       g1h->prepare_for_verify();
1090       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1091     }
1092 
1093     // Clear the marking state because we will be restarting
1094     // marking due to overflowing the global mark stack.
1095     reset_marking_state();
1096   } else {
1097     {
1098       GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
1099 
1100       // Aggregate the per-task counting data that we have accumulated
1101       // while marking.
1102       aggregate_count_data();
1103     }
1104 
1105     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1106     // We're done with marking.
1107     // This is the end of  the marking cycle, we're expected all
1108     // threads to have SATB queues with active set to true.
1109     satb_mq_set.set_active_all_threads(false, /* new active value */
1110                                        true /* expected_active */);
1111 
1112     if (VerifyDuringGC) {
1113       HandleMark hm;  // handle scope
1114       g1h->prepare_for_verify();
1115       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1116     }
1117     g1h->check_bitmaps("Remark End");
1118     assert(!restart_for_overflow(), "sanity");
1119     // Completely reset the marking state since marking completed
1120     set_non_marking_state();
1121   }
1122 
1123   // Expand the marking stack, if we have to and if we can.
1124   if (_markStack.should_expand()) {
1125     _markStack.expand();
1126   }
1127 
1128   // Statistics
1129   double now = os::elapsedTime();
1130   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1131   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1132   _remark_times.add((now - start) * 1000.0);
1133 
1134   g1p->record_concurrent_mark_remark_end();
1135 
1136   G1CMIsAliveClosure is_alive(g1h);
1137   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1138 }
1139 
1140 // Base class of the closures that finalize and verify the
1141 // liveness counting data.
1142 class CMCountDataClosureBase: public HeapRegionClosure {
1143 protected:
1144   G1CollectedHeap* _g1h;
1145   ConcurrentMark* _cm;
1146   CardTableModRefBS* _ct_bs;
1147 
1148   BitMap* _region_bm;
1149   BitMap* _card_bm;
1150 
1151   // Takes a region that's not empty (i.e., it has at least one
1152   // live object in it and sets its corresponding bit on the region
1153   // bitmap to 1.
1154   void set_bit_for_region(HeapRegion* hr) {
1155     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1156     _region_bm->par_at_put(index, true);
1157   }
1158 
1159 public:
1160   CMCountDataClosureBase(G1CollectedHeap* g1h,
1161                          BitMap* region_bm, BitMap* card_bm):
1162     _g1h(g1h), _cm(g1h->concurrent_mark()),
1163     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1164     _region_bm(region_bm), _card_bm(card_bm) { }
1165 };
1166 
1167 // Closure that calculates the # live objects per region. Used
1168 // for verification purposes during the cleanup pause.
1169 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1170   CMBitMapRO* _bm;
1171   size_t _region_marked_bytes;
1172 
1173 public:
1174   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1175                          BitMap* region_bm, BitMap* card_bm) :
1176     CMCountDataClosureBase(g1h, region_bm, card_bm),
1177     _bm(bm), _region_marked_bytes(0) { }
1178 
1179   bool doHeapRegion(HeapRegion* hr) {
1180     HeapWord* ntams = hr->next_top_at_mark_start();
1181     HeapWord* start = hr->bottom();
1182 
1183     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1184            "Preconditions not met - "
1185            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1186            p2i(start), p2i(ntams), p2i(hr->end()));
1187 
1188     // Find the first marked object at or after "start".
1189     start = _bm->getNextMarkedWordAddress(start, ntams);
1190 
1191     size_t marked_bytes = 0;
1192 
1193     while (start < ntams) {
1194       oop obj = oop(start);
1195       int obj_sz = obj->size();
1196       HeapWord* obj_end = start + obj_sz;
1197 
1198       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1199       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1200 
1201       // Note: if we're looking at the last region in heap - obj_end
1202       // could be actually just beyond the end of the heap; end_idx
1203       // will then correspond to a (non-existent) card that is also
1204       // just beyond the heap.
1205       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1206         // end of object is not card aligned - increment to cover
1207         // all the cards spanned by the object
1208         end_idx += 1;
1209       }
1210 
1211       // Set the bits in the card BM for the cards spanned by this object.
1212       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1213 
1214       // Add the size of this object to the number of marked bytes.
1215       marked_bytes += (size_t)obj_sz * HeapWordSize;
1216 
1217       // This will happen if we are handling a humongous object that spans
1218       // several heap regions.
1219       if (obj_end > hr->end()) {
1220         break;
1221       }
1222       // Find the next marked object after this one.
1223       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1224     }
1225 
1226     // Mark the allocated-since-marking portion...
1227     HeapWord* top = hr->top();
1228     if (ntams < top) {
1229       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1230       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1231 
1232       // Note: if we're looking at the last region in heap - top
1233       // could be actually just beyond the end of the heap; end_idx
1234       // will then correspond to a (non-existent) card that is also
1235       // just beyond the heap.
1236       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1237         // end of object is not card aligned - increment to cover
1238         // all the cards spanned by the object
1239         end_idx += 1;
1240       }
1241       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1242 
1243       // This definitely means the region has live objects.
1244       set_bit_for_region(hr);
1245     }
1246 
1247     // Update the live region bitmap.
1248     if (marked_bytes > 0) {
1249       set_bit_for_region(hr);
1250     }
1251 
1252     // Set the marked bytes for the current region so that
1253     // it can be queried by a calling verification routine
1254     _region_marked_bytes = marked_bytes;
1255 
1256     return false;
1257   }
1258 
1259   size_t region_marked_bytes() const { return _region_marked_bytes; }
1260 };
1261 
1262 // Heap region closure used for verifying the counting data
1263 // that was accumulated concurrently and aggregated during
1264 // the remark pause. This closure is applied to the heap
1265 // regions during the STW cleanup pause.
1266 
1267 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1268   G1CollectedHeap* _g1h;
1269   ConcurrentMark* _cm;
1270   CalcLiveObjectsClosure _calc_cl;
1271   BitMap* _region_bm;   // Region BM to be verified
1272   BitMap* _card_bm;     // Card BM to be verified
1273 
1274   BitMap* _exp_region_bm; // Expected Region BM values
1275   BitMap* _exp_card_bm;   // Expected card BM values
1276 
1277   int _failures;
1278 
1279 public:
1280   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1281                                 BitMap* region_bm,
1282                                 BitMap* card_bm,
1283                                 BitMap* exp_region_bm,
1284                                 BitMap* exp_card_bm) :
1285     _g1h(g1h), _cm(g1h->concurrent_mark()),
1286     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1287     _region_bm(region_bm), _card_bm(card_bm),
1288     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1289     _failures(0) { }
1290 
1291   int failures() const { return _failures; }
1292 
1293   bool doHeapRegion(HeapRegion* hr) {
1294     int failures = 0;
1295 
1296     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1297     // this region and set the corresponding bits in the expected region
1298     // and card bitmaps.
1299     bool res = _calc_cl.doHeapRegion(hr);
1300     assert(res == false, "should be continuing");
1301 
1302     // Verify the marked bytes for this region.
1303     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1304     size_t act_marked_bytes = hr->next_marked_bytes();
1305 
1306     if (exp_marked_bytes > act_marked_bytes) {
1307       if (hr->is_starts_humongous()) {
1308         // For start_humongous regions, the size of the whole object will be
1309         // in exp_marked_bytes.
1310         HeapRegion* region = hr;
1311         int num_regions;
1312         for (num_regions = 0; region != NULL; num_regions++) {
1313           region = _g1h->next_region_in_humongous(region);
1314         }
1315         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1316           failures += 1;
1317         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1318           failures += 1;
1319         }
1320       } else {
1321         // We're not OK if expected marked bytes > actual marked bytes. It means
1322         // we have missed accounting some objects during the actual marking.
1323         failures += 1;
1324       }
1325     }
1326 
1327     // Verify the bit, for this region, in the actual and expected
1328     // (which was just calculated) region bit maps.
1329     // We're not OK if the bit in the calculated expected region
1330     // bitmap is set and the bit in the actual region bitmap is not.
1331     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1332 
1333     bool expected = _exp_region_bm->at(index);
1334     bool actual = _region_bm->at(index);
1335     if (expected && !actual) {
1336       failures += 1;
1337     }
1338 
1339     // Verify that the card bit maps for the cards spanned by the current
1340     // region match. We have an error if we have a set bit in the expected
1341     // bit map and the corresponding bit in the actual bitmap is not set.
1342 
1343     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1344     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1345 
1346     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1347       expected = _exp_card_bm->at(i);
1348       actual = _card_bm->at(i);
1349 
1350       if (expected && !actual) {
1351         failures += 1;
1352       }
1353     }
1354 
1355     _failures += failures;
1356 
1357     // We could stop iteration over the heap when we
1358     // find the first violating region by returning true.
1359     return false;
1360   }
1361 };
1362 
1363 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1364 protected:
1365   G1CollectedHeap* _g1h;
1366   ConcurrentMark* _cm;
1367   BitMap* _actual_region_bm;
1368   BitMap* _actual_card_bm;
1369 
1370   uint    _n_workers;
1371 
1372   BitMap* _expected_region_bm;
1373   BitMap* _expected_card_bm;
1374 
1375   int  _failures;
1376 
1377   HeapRegionClaimer _hrclaimer;
1378 
1379 public:
1380   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1381                             BitMap* region_bm, BitMap* card_bm,
1382                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1383     : AbstractGangTask("G1 verify final counting"),
1384       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1385       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1386       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1387       _failures(0),
1388       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1389     assert(VerifyDuringGC, "don't call this otherwise");
1390     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1391     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1392   }
1393 
1394   void work(uint worker_id) {
1395     assert(worker_id < _n_workers, "invariant");
1396 
1397     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1398                                             _actual_region_bm, _actual_card_bm,
1399                                             _expected_region_bm,
1400                                             _expected_card_bm);
1401 
1402     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1403 
1404     Atomic::add(verify_cl.failures(), &_failures);
1405   }
1406 
1407   int failures() const { return _failures; }
1408 };
1409 
1410 // Closure that finalizes the liveness counting data.
1411 // Used during the cleanup pause.
1412 // Sets the bits corresponding to the interval [NTAMS, top]
1413 // (which contains the implicitly live objects) in the
1414 // card liveness bitmap. Also sets the bit for each region,
1415 // containing live data, in the region liveness bitmap.
1416 
1417 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1418  public:
1419   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1420                               BitMap* region_bm,
1421                               BitMap* card_bm) :
1422     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1423 
1424   bool doHeapRegion(HeapRegion* hr) {
1425     HeapWord* ntams = hr->next_top_at_mark_start();
1426     HeapWord* top   = hr->top();
1427 
1428     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1429 
1430     // Mark the allocated-since-marking portion...
1431     if (ntams < top) {
1432       // This definitely means the region has live objects.
1433       set_bit_for_region(hr);
1434 
1435       // Now set the bits in the card bitmap for [ntams, top)
1436       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1437       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1438 
1439       // Note: if we're looking at the last region in heap - top
1440       // could be actually just beyond the end of the heap; end_idx
1441       // will then correspond to a (non-existent) card that is also
1442       // just beyond the heap.
1443       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1444         // end of object is not card aligned - increment to cover
1445         // all the cards spanned by the object
1446         end_idx += 1;
1447       }
1448 
1449       assert(end_idx <= _card_bm->size(),
1450              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1451              end_idx, _card_bm->size());
1452       assert(start_idx < _card_bm->size(),
1453              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1454              start_idx, _card_bm->size());
1455 
1456       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1457     }
1458 
1459     // Set the bit for the region if it contains live data
1460     if (hr->next_marked_bytes() > 0) {
1461       set_bit_for_region(hr);
1462     }
1463 
1464     return false;
1465   }
1466 };
1467 
1468 class G1ParFinalCountTask: public AbstractGangTask {
1469 protected:
1470   G1CollectedHeap* _g1h;
1471   ConcurrentMark* _cm;
1472   BitMap* _actual_region_bm;
1473   BitMap* _actual_card_bm;
1474 
1475   uint    _n_workers;
1476   HeapRegionClaimer _hrclaimer;
1477 
1478 public:
1479   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1480     : AbstractGangTask("G1 final counting"),
1481       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1482       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1483       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1484   }
1485 
1486   void work(uint worker_id) {
1487     assert(worker_id < _n_workers, "invariant");
1488 
1489     FinalCountDataUpdateClosure final_update_cl(_g1h,
1490                                                 _actual_region_bm,
1491                                                 _actual_card_bm);
1492 
1493     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1494   }
1495 };
1496 
1497 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1498   G1CollectedHeap* _g1;
1499   size_t _freed_bytes;
1500   FreeRegionList* _local_cleanup_list;
1501   uint _old_regions_removed;
1502   uint _humongous_regions_removed;
1503   HRRSCleanupTask* _hrrs_cleanup_task;
1504 
1505 public:
1506   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1507                              FreeRegionList* local_cleanup_list,
1508                              HRRSCleanupTask* hrrs_cleanup_task) :
1509     _g1(g1),
1510     _freed_bytes(0),
1511     _local_cleanup_list(local_cleanup_list),
1512     _old_regions_removed(0),
1513     _humongous_regions_removed(0),
1514     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1515 
1516   size_t freed_bytes() { return _freed_bytes; }
1517   const uint old_regions_removed() { return _old_regions_removed; }
1518   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1519 
1520   bool doHeapRegion(HeapRegion *hr) {
1521     if (hr->is_archive()) {
1522       return false;
1523     }
1524     // We use a claim value of zero here because all regions
1525     // were claimed with value 1 in the FinalCount task.
1526     _g1->reset_gc_time_stamps(hr);
1527     hr->note_end_of_marking();
1528 
1529     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1530       _freed_bytes += hr->used();
1531       hr->set_containing_set(NULL);
1532       if (hr->is_humongous()) {
1533         _humongous_regions_removed++;
1534         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1535       } else {
1536         _old_regions_removed++;
1537         _g1->free_region(hr, _local_cleanup_list, true);
1538       }
1539     } else {
1540       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1541     }
1542 
1543     return false;
1544   }
1545 };
1546 
1547 class G1ParNoteEndTask: public AbstractGangTask {
1548   friend class G1NoteEndOfConcMarkClosure;
1549 
1550 protected:
1551   G1CollectedHeap* _g1h;
1552   FreeRegionList* _cleanup_list;
1553   HeapRegionClaimer _hrclaimer;
1554 
1555 public:
1556   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1557       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1558   }
1559 
1560   void work(uint worker_id) {
1561     FreeRegionList local_cleanup_list("Local Cleanup List");
1562     HRRSCleanupTask hrrs_cleanup_task;
1563     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1564                                            &hrrs_cleanup_task);
1565     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1566     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1567 
1568     // Now update the lists
1569     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1570     {
1571       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1572       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1573 
1574       // If we iterate over the global cleanup list at the end of
1575       // cleanup to do this printing we will not guarantee to only
1576       // generate output for the newly-reclaimed regions (the list
1577       // might not be empty at the beginning of cleanup; we might
1578       // still be working on its previous contents). So we do the
1579       // printing here, before we append the new regions to the global
1580       // cleanup list.
1581 
1582       G1HRPrinter* hr_printer = _g1h->hr_printer();
1583       if (hr_printer->is_active()) {
1584         FreeRegionListIterator iter(&local_cleanup_list);
1585         while (iter.more_available()) {
1586           HeapRegion* hr = iter.get_next();
1587           hr_printer->cleanup(hr);
1588         }
1589       }
1590 
1591       _cleanup_list->add_ordered(&local_cleanup_list);
1592       assert(local_cleanup_list.is_empty(), "post-condition");
1593 
1594       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1595     }
1596   }
1597 };
1598 
1599 class G1ParScrubRemSetTask: public AbstractGangTask {
1600 protected:
1601   G1RemSet* _g1rs;
1602   BitMap* _region_bm;
1603   BitMap* _card_bm;
1604   HeapRegionClaimer _hrclaimer;
1605 
1606 public:
1607   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1608       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1609   }
1610 
1611   void work(uint worker_id) {
1612     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1613   }
1614 
1615 };
1616 
1617 void ConcurrentMark::cleanup() {
1618   // world is stopped at this checkpoint
1619   assert(SafepointSynchronize::is_at_safepoint(),
1620          "world should be stopped");
1621   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1622 
1623   // If a full collection has happened, we shouldn't do this.
1624   if (has_aborted()) {
1625     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1626     return;
1627   }
1628 
1629   g1h->verify_region_sets_optional();
1630 
1631   if (VerifyDuringGC) {
1632     HandleMark hm;  // handle scope
1633     g1h->prepare_for_verify();
1634     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1635   }
1636   g1h->check_bitmaps("Cleanup Start");
1637 
1638   G1CollectorPolicy* g1p = g1h->g1_policy();
1639   g1p->record_concurrent_mark_cleanup_start();
1640 
1641   double start = os::elapsedTime();
1642 
1643   HeapRegionRemSet::reset_for_cleanup_tasks();
1644 
1645   // Do counting once more with the world stopped for good measure.
1646   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1647 
1648   g1h->workers()->run_task(&g1_par_count_task);
1649 
1650   if (VerifyDuringGC) {
1651     // Verify that the counting data accumulated during marking matches
1652     // that calculated by walking the marking bitmap.
1653 
1654     // Bitmaps to hold expected values
1655     BitMap expected_region_bm(_region_bm.size(), true);
1656     BitMap expected_card_bm(_card_bm.size(), true);
1657 
1658     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1659                                                  &_region_bm,
1660                                                  &_card_bm,
1661                                                  &expected_region_bm,
1662                                                  &expected_card_bm);
1663 
1664     g1h->workers()->run_task(&g1_par_verify_task);
1665 
1666     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1667   }
1668 
1669   size_t start_used_bytes = g1h->used();
1670   g1h->collector_state()->set_mark_in_progress(false);
1671 
1672   double count_end = os::elapsedTime();
1673   double this_final_counting_time = (count_end - start);
1674   _total_counting_time += this_final_counting_time;
1675 
1676   if (log_is_enabled(Trace, gc, liveness)) {
1677     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1678     _g1h->heap_region_iterate(&cl);
1679   }
1680 
1681   // Install newly created mark bitMap as "prev".
1682   swapMarkBitMaps();
1683 
1684   g1h->reset_gc_time_stamp();
1685 
1686   uint n_workers = _g1h->workers()->active_workers();
1687 
1688   // Note end of marking in all heap regions.
1689   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1690   g1h->workers()->run_task(&g1_par_note_end_task);
1691   g1h->check_gc_time_stamps();
1692 
1693   if (!cleanup_list_is_empty()) {
1694     // The cleanup list is not empty, so we'll have to process it
1695     // concurrently. Notify anyone else that might be wanting free
1696     // regions that there will be more free regions coming soon.
1697     g1h->set_free_regions_coming();
1698   }
1699 
1700   // call below, since it affects the metric by which we sort the heap
1701   // regions.
1702   if (G1ScrubRemSets) {
1703     double rs_scrub_start = os::elapsedTime();
1704     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1705     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1706 
1707     double rs_scrub_end = os::elapsedTime();
1708     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1709     _total_rs_scrub_time += this_rs_scrub_time;
1710   }
1711 
1712   // this will also free any regions totally full of garbage objects,
1713   // and sort the regions.
1714   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1715 
1716   // Statistics.
1717   double end = os::elapsedTime();
1718   _cleanup_times.add((end - start) * 1000.0);
1719 
1720   // Clean up will have freed any regions completely full of garbage.
1721   // Update the soft reference policy with the new heap occupancy.
1722   Universe::update_heap_info_at_gc();
1723 
1724   if (VerifyDuringGC) {
1725     HandleMark hm;  // handle scope
1726     g1h->prepare_for_verify();
1727     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1728   }
1729 
1730   g1h->check_bitmaps("Cleanup End");
1731 
1732   g1h->verify_region_sets_optional();
1733 
1734   // We need to make this be a "collection" so any collection pause that
1735   // races with it goes around and waits for completeCleanup to finish.
1736   g1h->increment_total_collections();
1737 
1738   // Clean out dead classes and update Metaspace sizes.
1739   if (ClassUnloadingWithConcurrentMark) {
1740     ClassLoaderDataGraph::purge();
1741   }
1742   MetaspaceGC::compute_new_size();
1743 
1744   // We reclaimed old regions so we should calculate the sizes to make
1745   // sure we update the old gen/space data.
1746   g1h->g1mm()->update_sizes();
1747   g1h->allocation_context_stats().update_after_mark();
1748 
1749   g1h->trace_heap_after_concurrent_cycle();
1750 }
1751 
1752 void ConcurrentMark::completeCleanup() {
1753   if (has_aborted()) return;
1754 
1755   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1756 
1757   _cleanup_list.verify_optional();
1758   FreeRegionList tmp_free_list("Tmp Free List");
1759 
1760   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1761                                   "cleanup list has %u entries",
1762                                   _cleanup_list.length());
1763 
1764   // No one else should be accessing the _cleanup_list at this point,
1765   // so it is not necessary to take any locks
1766   while (!_cleanup_list.is_empty()) {
1767     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1768     assert(hr != NULL, "Got NULL from a non-empty list");
1769     hr->par_clear();
1770     tmp_free_list.add_ordered(hr);
1771 
1772     // Instead of adding one region at a time to the secondary_free_list,
1773     // we accumulate them in the local list and move them a few at a
1774     // time. This also cuts down on the number of notify_all() calls
1775     // we do during this process. We'll also append the local list when
1776     // _cleanup_list is empty (which means we just removed the last
1777     // region from the _cleanup_list).
1778     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1779         _cleanup_list.is_empty()) {
1780       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1781                                       "appending %u entries to the secondary_free_list, "
1782                                       "cleanup list still has %u entries",
1783                                       tmp_free_list.length(),
1784                                       _cleanup_list.length());
1785 
1786       {
1787         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1788         g1h->secondary_free_list_add(&tmp_free_list);
1789         SecondaryFreeList_lock->notify_all();
1790       }
1791 #ifndef PRODUCT
1792       if (G1StressConcRegionFreeing) {
1793         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1794           os::sleep(Thread::current(), (jlong) 1, false);
1795         }
1796       }
1797 #endif
1798     }
1799   }
1800   assert(tmp_free_list.is_empty(), "post-condition");
1801 }
1802 
1803 // Supporting Object and Oop closures for reference discovery
1804 // and processing in during marking
1805 
1806 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1807   HeapWord* addr = (HeapWord*)obj;
1808   return addr != NULL &&
1809          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1810 }
1811 
1812 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1813 // Uses the CMTask associated with a worker thread (for serial reference
1814 // processing the CMTask for worker 0 is used) to preserve (mark) and
1815 // trace referent objects.
1816 //
1817 // Using the CMTask and embedded local queues avoids having the worker
1818 // threads operating on the global mark stack. This reduces the risk
1819 // of overflowing the stack - which we would rather avoid at this late
1820 // state. Also using the tasks' local queues removes the potential
1821 // of the workers interfering with each other that could occur if
1822 // operating on the global stack.
1823 
1824 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1825   ConcurrentMark* _cm;
1826   CMTask*         _task;
1827   int             _ref_counter_limit;
1828   int             _ref_counter;
1829   bool            _is_serial;
1830  public:
1831   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1832     _cm(cm), _task(task), _is_serial(is_serial),
1833     _ref_counter_limit(G1RefProcDrainInterval) {
1834     assert(_ref_counter_limit > 0, "sanity");
1835     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1836     _ref_counter = _ref_counter_limit;
1837   }
1838 
1839   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1840   virtual void do_oop(      oop* p) { do_oop_work(p); }
1841 
1842   template <class T> void do_oop_work(T* p) {
1843     if (!_cm->has_overflown()) {
1844       oop obj = oopDesc::load_decode_heap_oop(p);
1845       _task->deal_with_reference(obj);
1846       _ref_counter--;
1847 
1848       if (_ref_counter == 0) {
1849         // We have dealt with _ref_counter_limit references, pushing them
1850         // and objects reachable from them on to the local stack (and
1851         // possibly the global stack). Call CMTask::do_marking_step() to
1852         // process these entries.
1853         //
1854         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1855         // there's nothing more to do (i.e. we're done with the entries that
1856         // were pushed as a result of the CMTask::deal_with_reference() calls
1857         // above) or we overflow.
1858         //
1859         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1860         // flag while there may still be some work to do. (See the comment at
1861         // the beginning of CMTask::do_marking_step() for those conditions -
1862         // one of which is reaching the specified time target.) It is only
1863         // when CMTask::do_marking_step() returns without setting the
1864         // has_aborted() flag that the marking step has completed.
1865         do {
1866           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1867           _task->do_marking_step(mark_step_duration_ms,
1868                                  false      /* do_termination */,
1869                                  _is_serial);
1870         } while (_task->has_aborted() && !_cm->has_overflown());
1871         _ref_counter = _ref_counter_limit;
1872       }
1873     }
1874   }
1875 };
1876 
1877 // 'Drain' oop closure used by both serial and parallel reference processing.
1878 // Uses the CMTask associated with a given worker thread (for serial
1879 // reference processing the CMtask for worker 0 is used). Calls the
1880 // do_marking_step routine, with an unbelievably large timeout value,
1881 // to drain the marking data structures of the remaining entries
1882 // added by the 'keep alive' oop closure above.
1883 
1884 class G1CMDrainMarkingStackClosure: public VoidClosure {
1885   ConcurrentMark* _cm;
1886   CMTask*         _task;
1887   bool            _is_serial;
1888  public:
1889   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1890     _cm(cm), _task(task), _is_serial(is_serial) {
1891     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1892   }
1893 
1894   void do_void() {
1895     do {
1896       // We call CMTask::do_marking_step() to completely drain the local
1897       // and global marking stacks of entries pushed by the 'keep alive'
1898       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1899       //
1900       // CMTask::do_marking_step() is called in a loop, which we'll exit
1901       // if there's nothing more to do (i.e. we've completely drained the
1902       // entries that were pushed as a a result of applying the 'keep alive'
1903       // closure to the entries on the discovered ref lists) or we overflow
1904       // the global marking stack.
1905       //
1906       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1907       // flag while there may still be some work to do. (See the comment at
1908       // the beginning of CMTask::do_marking_step() for those conditions -
1909       // one of which is reaching the specified time target.) It is only
1910       // when CMTask::do_marking_step() returns without setting the
1911       // has_aborted() flag that the marking step has completed.
1912 
1913       _task->do_marking_step(1000000000.0 /* something very large */,
1914                              true         /* do_termination */,
1915                              _is_serial);
1916     } while (_task->has_aborted() && !_cm->has_overflown());
1917   }
1918 };
1919 
1920 // Implementation of AbstractRefProcTaskExecutor for parallel
1921 // reference processing at the end of G1 concurrent marking
1922 
1923 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1924 private:
1925   G1CollectedHeap* _g1h;
1926   ConcurrentMark*  _cm;
1927   WorkGang*        _workers;
1928   uint             _active_workers;
1929 
1930 public:
1931   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1932                           ConcurrentMark* cm,
1933                           WorkGang* workers,
1934                           uint n_workers) :
1935     _g1h(g1h), _cm(cm),
1936     _workers(workers), _active_workers(n_workers) { }
1937 
1938   // Executes the given task using concurrent marking worker threads.
1939   virtual void execute(ProcessTask& task);
1940   virtual void execute(EnqueueTask& task);
1941 };
1942 
1943 class G1CMRefProcTaskProxy: public AbstractGangTask {
1944   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1945   ProcessTask&     _proc_task;
1946   G1CollectedHeap* _g1h;
1947   ConcurrentMark*  _cm;
1948 
1949 public:
1950   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1951                      G1CollectedHeap* g1h,
1952                      ConcurrentMark* cm) :
1953     AbstractGangTask("Process reference objects in parallel"),
1954     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1955     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1956     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1957   }
1958 
1959   virtual void work(uint worker_id) {
1960     ResourceMark rm;
1961     HandleMark hm;
1962     CMTask* task = _cm->task(worker_id);
1963     G1CMIsAliveClosure g1_is_alive(_g1h);
1964     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1965     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1966 
1967     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1968   }
1969 };
1970 
1971 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1972   assert(_workers != NULL, "Need parallel worker threads.");
1973   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1974 
1975   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1976 
1977   // We need to reset the concurrency level before each
1978   // proxy task execution, so that the termination protocol
1979   // and overflow handling in CMTask::do_marking_step() knows
1980   // how many workers to wait for.
1981   _cm->set_concurrency(_active_workers);
1982   _workers->run_task(&proc_task_proxy);
1983 }
1984 
1985 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1986   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1987   EnqueueTask& _enq_task;
1988 
1989 public:
1990   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1991     AbstractGangTask("Enqueue reference objects in parallel"),
1992     _enq_task(enq_task) { }
1993 
1994   virtual void work(uint worker_id) {
1995     _enq_task.work(worker_id);
1996   }
1997 };
1998 
1999 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2000   assert(_workers != NULL, "Need parallel worker threads.");
2001   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2002 
2003   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2004 
2005   // Not strictly necessary but...
2006   //
2007   // We need to reset the concurrency level before each
2008   // proxy task execution, so that the termination protocol
2009   // and overflow handling in CMTask::do_marking_step() knows
2010   // how many workers to wait for.
2011   _cm->set_concurrency(_active_workers);
2012   _workers->run_task(&enq_task_proxy);
2013 }
2014 
2015 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2016   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2017 }
2018 
2019 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2020   if (has_overflown()) {
2021     // Skip processing the discovered references if we have
2022     // overflown the global marking stack. Reference objects
2023     // only get discovered once so it is OK to not
2024     // de-populate the discovered reference lists. We could have,
2025     // but the only benefit would be that, when marking restarts,
2026     // less reference objects are discovered.
2027     return;
2028   }
2029 
2030   ResourceMark rm;
2031   HandleMark   hm;
2032 
2033   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2034 
2035   // Is alive closure.
2036   G1CMIsAliveClosure g1_is_alive(g1h);
2037 
2038   // Inner scope to exclude the cleaning of the string and symbol
2039   // tables from the displayed time.
2040   {
2041     GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
2042 
2043     ReferenceProcessor* rp = g1h->ref_processor_cm();
2044 
2045     // See the comment in G1CollectedHeap::ref_processing_init()
2046     // about how reference processing currently works in G1.
2047 
2048     // Set the soft reference policy
2049     rp->setup_policy(clear_all_soft_refs);
2050     assert(_markStack.isEmpty(), "mark stack should be empty");
2051 
2052     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2053     // in serial reference processing. Note these closures are also
2054     // used for serially processing (by the the current thread) the
2055     // JNI references during parallel reference processing.
2056     //
2057     // These closures do not need to synchronize with the worker
2058     // threads involved in parallel reference processing as these
2059     // instances are executed serially by the current thread (e.g.
2060     // reference processing is not multi-threaded and is thus
2061     // performed by the current thread instead of a gang worker).
2062     //
2063     // The gang tasks involved in parallel reference processing create
2064     // their own instances of these closures, which do their own
2065     // synchronization among themselves.
2066     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2067     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2068 
2069     // We need at least one active thread. If reference processing
2070     // is not multi-threaded we use the current (VMThread) thread,
2071     // otherwise we use the work gang from the G1CollectedHeap and
2072     // we utilize all the worker threads we can.
2073     bool processing_is_mt = rp->processing_is_mt();
2074     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2075     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2076 
2077     // Parallel processing task executor.
2078     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2079                                               g1h->workers(), active_workers);
2080     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2081 
2082     // Set the concurrency level. The phase was already set prior to
2083     // executing the remark task.
2084     set_concurrency(active_workers);
2085 
2086     // Set the degree of MT processing here.  If the discovery was done MT,
2087     // the number of threads involved during discovery could differ from
2088     // the number of active workers.  This is OK as long as the discovered
2089     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2090     rp->set_active_mt_degree(active_workers);
2091 
2092     // Process the weak references.
2093     const ReferenceProcessorStats& stats =
2094         rp->process_discovered_references(&g1_is_alive,
2095                                           &g1_keep_alive,
2096                                           &g1_drain_mark_stack,
2097                                           executor,
2098                                           g1h->gc_timer_cm());
2099     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2100 
2101     // The do_oop work routines of the keep_alive and drain_marking_stack
2102     // oop closures will set the has_overflown flag if we overflow the
2103     // global marking stack.
2104 
2105     assert(_markStack.overflow() || _markStack.isEmpty(),
2106             "mark stack should be empty (unless it overflowed)");
2107 
2108     if (_markStack.overflow()) {
2109       // This should have been done already when we tried to push an
2110       // entry on to the global mark stack. But let's do it again.
2111       set_has_overflown();
2112     }
2113 
2114     assert(rp->num_q() == active_workers, "why not");
2115 
2116     rp->enqueue_discovered_references(executor);
2117 
2118     rp->verify_no_references_recorded();
2119     assert(!rp->discovery_enabled(), "Post condition");
2120   }
2121 
2122   if (has_overflown()) {
2123     // We can not trust g1_is_alive if the marking stack overflowed
2124     return;
2125   }
2126 
2127   assert(_markStack.isEmpty(), "Marking should have completed");
2128 
2129   // Unload Klasses, String, Symbols, Code Cache, etc.
2130   {
2131     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2132 
2133     if (ClassUnloadingWithConcurrentMark) {
2134       bool purged_classes;
2135 
2136       {
2137         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2138         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2139       }
2140 
2141       {
2142         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2143         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2144       }
2145     }
2146 
2147     if (G1StringDedup::is_enabled()) {
2148       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2149       G1StringDedup::unlink(&g1_is_alive);
2150     }
2151   }
2152 }
2153 
2154 void ConcurrentMark::swapMarkBitMaps() {
2155   CMBitMapRO* temp = _prevMarkBitMap;
2156   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2157   _nextMarkBitMap  = (CMBitMap*)  temp;
2158 }
2159 
2160 // Closure for marking entries in SATB buffers.
2161 class CMSATBBufferClosure : public SATBBufferClosure {
2162 private:
2163   CMTask* _task;
2164   G1CollectedHeap* _g1h;
2165 
2166   // This is very similar to CMTask::deal_with_reference, but with
2167   // more relaxed requirements for the argument, so this must be more
2168   // circumspect about treating the argument as an object.
2169   void do_entry(void* entry) const {
2170     _task->increment_refs_reached();
2171     HeapRegion* hr = _g1h->heap_region_containing(entry);
2172     if (entry < hr->next_top_at_mark_start()) {
2173       // Until we get here, we don't know whether entry refers to a valid
2174       // object; it could instead have been a stale reference.
2175       oop obj = static_cast<oop>(entry);
2176       assert(obj->is_oop(true /* ignore mark word */),
2177              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2178       _task->make_reference_grey(obj, hr);
2179     }
2180   }
2181 
2182 public:
2183   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2184     : _task(task), _g1h(g1h) { }
2185 
2186   virtual void do_buffer(void** buffer, size_t size) {
2187     for (size_t i = 0; i < size; ++i) {
2188       do_entry(buffer[i]);
2189     }
2190   }
2191 };
2192 
2193 class G1RemarkThreadsClosure : public ThreadClosure {
2194   CMSATBBufferClosure _cm_satb_cl;
2195   G1CMOopClosure _cm_cl;
2196   MarkingCodeBlobClosure _code_cl;
2197   int _thread_parity;
2198 
2199  public:
2200   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2201     _cm_satb_cl(task, g1h),
2202     _cm_cl(g1h, g1h->concurrent_mark(), task),
2203     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2204     _thread_parity(Threads::thread_claim_parity()) {}
2205 
2206   void do_thread(Thread* thread) {
2207     if (thread->is_Java_thread()) {
2208       if (thread->claim_oops_do(true, _thread_parity)) {
2209         JavaThread* jt = (JavaThread*)thread;
2210 
2211         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2212         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2213         // * Alive if on the stack of an executing method
2214         // * Weakly reachable otherwise
2215         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2216         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2217         jt->nmethods_do(&_code_cl);
2218 
2219         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2220       }
2221     } else if (thread->is_VM_thread()) {
2222       if (thread->claim_oops_do(true, _thread_parity)) {
2223         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2224       }
2225     }
2226   }
2227 };
2228 
2229 class CMRemarkTask: public AbstractGangTask {
2230 private:
2231   ConcurrentMark* _cm;
2232 public:
2233   void work(uint worker_id) {
2234     // Since all available tasks are actually started, we should
2235     // only proceed if we're supposed to be active.
2236     if (worker_id < _cm->active_tasks()) {
2237       CMTask* task = _cm->task(worker_id);
2238       task->record_start_time();
2239       {
2240         ResourceMark rm;
2241         HandleMark hm;
2242 
2243         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2244         Threads::threads_do(&threads_f);
2245       }
2246 
2247       do {
2248         task->do_marking_step(1000000000.0 /* something very large */,
2249                               true         /* do_termination       */,
2250                               false        /* is_serial            */);
2251       } while (task->has_aborted() && !_cm->has_overflown());
2252       // If we overflow, then we do not want to restart. We instead
2253       // want to abort remark and do concurrent marking again.
2254       task->record_end_time();
2255     }
2256   }
2257 
2258   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2259     AbstractGangTask("Par Remark"), _cm(cm) {
2260     _cm->terminator()->reset_for_reuse(active_workers);
2261   }
2262 };
2263 
2264 void ConcurrentMark::checkpointRootsFinalWork() {
2265   ResourceMark rm;
2266   HandleMark   hm;
2267   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2268 
2269   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2270 
2271   g1h->ensure_parsability(false);
2272 
2273   // this is remark, so we'll use up all active threads
2274   uint active_workers = g1h->workers()->active_workers();
2275   set_concurrency_and_phase(active_workers, false /* concurrent */);
2276   // Leave _parallel_marking_threads at it's
2277   // value originally calculated in the ConcurrentMark
2278   // constructor and pass values of the active workers
2279   // through the gang in the task.
2280 
2281   {
2282     StrongRootsScope srs(active_workers);
2283 
2284     CMRemarkTask remarkTask(this, active_workers);
2285     // We will start all available threads, even if we decide that the
2286     // active_workers will be fewer. The extra ones will just bail out
2287     // immediately.
2288     g1h->workers()->run_task(&remarkTask);
2289   }
2290 
2291   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2292   guarantee(has_overflown() ||
2293             satb_mq_set.completed_buffers_num() == 0,
2294             "Invariant: has_overflown = %s, num buffers = %d",
2295             BOOL_TO_STR(has_overflown()),
2296             satb_mq_set.completed_buffers_num());
2297 
2298   print_stats();
2299 }
2300 
2301 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2302   // Note we are overriding the read-only view of the prev map here, via
2303   // the cast.
2304   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2305 }
2306 
2307 HeapRegion*
2308 ConcurrentMark::claim_region(uint worker_id) {
2309   // "checkpoint" the finger
2310   HeapWord* finger = _finger;
2311 
2312   // _heap_end will not change underneath our feet; it only changes at
2313   // yield points.
2314   while (finger < _heap_end) {
2315     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2316 
2317     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2318 
2319     // Above heap_region_containing may return NULL as we always scan claim
2320     // until the end of the heap. In this case, just jump to the next region.
2321     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2322 
2323     // Is the gap between reading the finger and doing the CAS too long?
2324     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2325     if (res == finger && curr_region != NULL) {
2326       // we succeeded
2327       HeapWord*   bottom        = curr_region->bottom();
2328       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2329 
2330       // notice that _finger == end cannot be guaranteed here since,
2331       // someone else might have moved the finger even further
2332       assert(_finger >= end, "the finger should have moved forward");
2333 
2334       if (limit > bottom) {
2335         return curr_region;
2336       } else {
2337         assert(limit == bottom,
2338                "the region limit should be at bottom");
2339         // we return NULL and the caller should try calling
2340         // claim_region() again.
2341         return NULL;
2342       }
2343     } else {
2344       assert(_finger > finger, "the finger should have moved forward");
2345       // read it again
2346       finger = _finger;
2347     }
2348   }
2349 
2350   return NULL;
2351 }
2352 
2353 #ifndef PRODUCT
2354 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2355 private:
2356   G1CollectedHeap* _g1h;
2357   const char* _phase;
2358   int _info;
2359 
2360 public:
2361   VerifyNoCSetOops(const char* phase, int info = -1) :
2362     _g1h(G1CollectedHeap::heap()),
2363     _phase(phase),
2364     _info(info)
2365   { }
2366 
2367   void operator()(oop obj) const {
2368     guarantee(obj->is_oop(),
2369               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2370               p2i(obj), _phase, _info);
2371     guarantee(!_g1h->obj_in_cs(obj),
2372               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2373               p2i(obj), _phase, _info);
2374   }
2375 };
2376 
2377 void ConcurrentMark::verify_no_cset_oops() {
2378   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2379   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2380     return;
2381   }
2382 
2383   // Verify entries on the global mark stack
2384   _markStack.iterate(VerifyNoCSetOops("Stack"));
2385 
2386   // Verify entries on the task queues
2387   for (uint i = 0; i < _max_worker_id; ++i) {
2388     CMTaskQueue* queue = _task_queues->queue(i);
2389     queue->iterate(VerifyNoCSetOops("Queue", i));
2390   }
2391 
2392   // Verify the global finger
2393   HeapWord* global_finger = finger();
2394   if (global_finger != NULL && global_finger < _heap_end) {
2395     // Since we always iterate over all regions, we might get a NULL HeapRegion
2396     // here.
2397     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2398     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2399               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2400               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2401   }
2402 
2403   // Verify the task fingers
2404   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2405   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2406     CMTask* task = _tasks[i];
2407     HeapWord* task_finger = task->finger();
2408     if (task_finger != NULL && task_finger < _heap_end) {
2409       // See above note on the global finger verification.
2410       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2411       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2412                 !task_hr->in_collection_set(),
2413                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2414                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2415     }
2416   }
2417 }
2418 #endif // PRODUCT
2419 
2420 // Aggregate the counting data that was constructed concurrently
2421 // with marking.
2422 class AggregateCountDataHRClosure: public HeapRegionClosure {
2423   G1CollectedHeap* _g1h;
2424   ConcurrentMark* _cm;
2425   CardTableModRefBS* _ct_bs;
2426   BitMap* _cm_card_bm;
2427   uint _max_worker_id;
2428 
2429  public:
2430   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2431                               BitMap* cm_card_bm,
2432                               uint max_worker_id) :
2433     _g1h(g1h), _cm(g1h->concurrent_mark()),
2434     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2435     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2436 
2437   bool doHeapRegion(HeapRegion* hr) {
2438     HeapWord* start = hr->bottom();
2439     HeapWord* limit = hr->next_top_at_mark_start();
2440     HeapWord* end = hr->end();
2441 
2442     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2443            "Preconditions not met - "
2444            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2445            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2446            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2447 
2448     assert(hr->next_marked_bytes() == 0, "Precondition");
2449 
2450     if (start == limit) {
2451       // NTAMS of this region has not been set so nothing to do.
2452       return false;
2453     }
2454 
2455     // 'start' should be in the heap.
2456     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2457     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2458     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2459 
2460     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2461     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2462     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2463 
2464     // If ntams is not card aligned then we bump card bitmap index
2465     // for limit so that we get the all the cards spanned by
2466     // the object ending at ntams.
2467     // Note: if this is the last region in the heap then ntams
2468     // could be actually just beyond the end of the the heap;
2469     // limit_idx will then  correspond to a (non-existent) card
2470     // that is also outside the heap.
2471     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2472       limit_idx += 1;
2473     }
2474 
2475     assert(limit_idx <= end_idx, "or else use atomics");
2476 
2477     // Aggregate the "stripe" in the count data associated with hr.
2478     uint hrm_index = hr->hrm_index();
2479     size_t marked_bytes = 0;
2480 
2481     for (uint i = 0; i < _max_worker_id; i += 1) {
2482       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2483       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2484 
2485       // Fetch the marked_bytes in this region for task i and
2486       // add it to the running total for this region.
2487       marked_bytes += marked_bytes_array[hrm_index];
2488 
2489       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2490       // into the global card bitmap.
2491       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2492 
2493       while (scan_idx < limit_idx) {
2494         assert(task_card_bm->at(scan_idx) == true, "should be");
2495         _cm_card_bm->set_bit(scan_idx);
2496         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2497 
2498         // BitMap::get_next_one_offset() can handle the case when
2499         // its left_offset parameter is greater than its right_offset
2500         // parameter. It does, however, have an early exit if
2501         // left_offset == right_offset. So let's limit the value
2502         // passed in for left offset here.
2503         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2504         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2505       }
2506     }
2507 
2508     // Update the marked bytes for this region.
2509     hr->add_to_marked_bytes(marked_bytes);
2510 
2511     // Next heap region
2512     return false;
2513   }
2514 };
2515 
2516 class G1AggregateCountDataTask: public AbstractGangTask {
2517 protected:
2518   G1CollectedHeap* _g1h;
2519   ConcurrentMark* _cm;
2520   BitMap* _cm_card_bm;
2521   uint _max_worker_id;
2522   uint _active_workers;
2523   HeapRegionClaimer _hrclaimer;
2524 
2525 public:
2526   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2527                            ConcurrentMark* cm,
2528                            BitMap* cm_card_bm,
2529                            uint max_worker_id,
2530                            uint n_workers) :
2531       AbstractGangTask("Count Aggregation"),
2532       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2533       _max_worker_id(max_worker_id),
2534       _active_workers(n_workers),
2535       _hrclaimer(_active_workers) {
2536   }
2537 
2538   void work(uint worker_id) {
2539     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2540 
2541     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2542   }
2543 };
2544 
2545 
2546 void ConcurrentMark::aggregate_count_data() {
2547   uint n_workers = _g1h->workers()->active_workers();
2548 
2549   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2550                                            _max_worker_id, n_workers);
2551 
2552   _g1h->workers()->run_task(&g1_par_agg_task);
2553 }
2554 
2555 // Clear the per-worker arrays used to store the per-region counting data
2556 void ConcurrentMark::clear_all_count_data() {
2557   // Clear the global card bitmap - it will be filled during
2558   // liveness count aggregation (during remark) and the
2559   // final counting task.
2560   _card_bm.clear();
2561 
2562   // Clear the global region bitmap - it will be filled as part
2563   // of the final counting task.
2564   _region_bm.clear();
2565 
2566   uint max_regions = _g1h->max_regions();
2567   assert(_max_worker_id > 0, "uninitialized");
2568 
2569   for (uint i = 0; i < _max_worker_id; i += 1) {
2570     BitMap* task_card_bm = count_card_bitmap_for(i);
2571     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2572 
2573     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2574     assert(marked_bytes_array != NULL, "uninitialized");
2575 
2576     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2577     task_card_bm->clear();
2578   }
2579 }
2580 
2581 void ConcurrentMark::print_stats() {
2582   if (!log_is_enabled(Debug, gc, stats)) {
2583     return;
2584   }
2585   log_debug(gc, stats)("---------------------------------------------------------------------");
2586   for (size_t i = 0; i < _active_tasks; ++i) {
2587     _tasks[i]->print_stats();
2588     log_debug(gc, stats)("---------------------------------------------------------------------");
2589   }
2590 }
2591 
2592 // abandon current marking iteration due to a Full GC
2593 void ConcurrentMark::abort() {
2594   if (!cmThread()->during_cycle() || _has_aborted) {
2595     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2596     return;
2597   }
2598 
2599   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2600   // concurrent bitmap clearing.
2601   _nextMarkBitMap->clearAll();
2602 
2603   // Note we cannot clear the previous marking bitmap here
2604   // since VerifyDuringGC verifies the objects marked during
2605   // a full GC against the previous bitmap.
2606 
2607   // Clear the liveness counting data
2608   clear_all_count_data();
2609   // Empty mark stack
2610   reset_marking_state();
2611   for (uint i = 0; i < _max_worker_id; ++i) {
2612     _tasks[i]->clear_region_fields();
2613   }
2614   _first_overflow_barrier_sync.abort();
2615   _second_overflow_barrier_sync.abort();
2616   _has_aborted = true;
2617 
2618   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2619   satb_mq_set.abandon_partial_marking();
2620   // This can be called either during or outside marking, we'll read
2621   // the expected_active value from the SATB queue set.
2622   satb_mq_set.set_active_all_threads(
2623                                  false, /* new active value */
2624                                  satb_mq_set.is_active() /* expected_active */);
2625 
2626   _g1h->trace_heap_after_concurrent_cycle();
2627 
2628   // Close any open concurrent phase timing
2629   register_concurrent_phase_end();
2630 
2631   _g1h->register_concurrent_cycle_end();
2632 }
2633 
2634 static void print_ms_time_info(const char* prefix, const char* name,
2635                                NumberSeq& ns) {
2636   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2637                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2638   if (ns.num() > 0) {
2639     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2640                            prefix, ns.sd(), ns.maximum());
2641   }
2642 }
2643 
2644 void ConcurrentMark::print_summary_info() {
2645   LogHandle(gc, marking) log;
2646   if (!log.is_trace()) {
2647     return;
2648   }
2649 
2650   log.trace(" Concurrent marking:");
2651   print_ms_time_info("  ", "init marks", _init_times);
2652   print_ms_time_info("  ", "remarks", _remark_times);
2653   {
2654     print_ms_time_info("     ", "final marks", _remark_mark_times);
2655     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2656 
2657   }
2658   print_ms_time_info("  ", "cleanups", _cleanup_times);
2659   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2660             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2661   if (G1ScrubRemSets) {
2662     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2663               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2664   }
2665   log.trace("  Total stop_world time = %8.2f s.",
2666             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2667   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2668             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2669 }
2670 
2671 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2672   _parallel_workers->print_worker_threads_on(st);
2673 }
2674 
2675 void ConcurrentMark::print_on_error(outputStream* st) const {
2676   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2677       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2678   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2679   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2680 }
2681 
2682 // We take a break if someone is trying to stop the world.
2683 bool ConcurrentMark::do_yield_check(uint worker_id) {
2684   if (SuspendibleThreadSet::should_yield()) {
2685     if (worker_id == 0) {
2686       _g1h->g1_policy()->record_concurrent_pause();
2687     }
2688     SuspendibleThreadSet::yield();
2689     return true;
2690   } else {
2691     return false;
2692   }
2693 }
2694 
2695 // Closure for iteration over bitmaps
2696 class CMBitMapClosure : public BitMapClosure {
2697 private:
2698   // the bitmap that is being iterated over
2699   CMBitMap*                   _nextMarkBitMap;
2700   ConcurrentMark*             _cm;
2701   CMTask*                     _task;
2702 
2703 public:
2704   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2705     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2706 
2707   bool do_bit(size_t offset) {
2708     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2709     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2710     assert( addr < _cm->finger(), "invariant");
2711     assert(addr >= _task->finger(), "invariant");
2712 
2713     // We move that task's local finger along.
2714     _task->move_finger_to(addr);
2715 
2716     _task->scan_object(oop(addr));
2717     // we only partially drain the local queue and global stack
2718     _task->drain_local_queue(true);
2719     _task->drain_global_stack(true);
2720 
2721     // if the has_aborted flag has been raised, we need to bail out of
2722     // the iteration
2723     return !_task->has_aborted();
2724   }
2725 };
2726 
2727 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2728   ReferenceProcessor* result = NULL;
2729   if (G1UseConcMarkReferenceProcessing) {
2730     result = g1h->ref_processor_cm();
2731     assert(result != NULL, "should not be NULL");
2732   }
2733   return result;
2734 }
2735 
2736 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2737                                ConcurrentMark* cm,
2738                                CMTask* task)
2739   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2740     _g1h(g1h), _cm(cm), _task(task)
2741 { }
2742 
2743 void CMTask::setup_for_region(HeapRegion* hr) {
2744   assert(hr != NULL,
2745         "claim_region() should have filtered out NULL regions");
2746   _curr_region  = hr;
2747   _finger       = hr->bottom();
2748   update_region_limit();
2749 }
2750 
2751 void CMTask::update_region_limit() {
2752   HeapRegion* hr            = _curr_region;
2753   HeapWord* bottom          = hr->bottom();
2754   HeapWord* limit           = hr->next_top_at_mark_start();
2755 
2756   if (limit == bottom) {
2757     // The region was collected underneath our feet.
2758     // We set the finger to bottom to ensure that the bitmap
2759     // iteration that will follow this will not do anything.
2760     // (this is not a condition that holds when we set the region up,
2761     // as the region is not supposed to be empty in the first place)
2762     _finger = bottom;
2763   } else if (limit >= _region_limit) {
2764     assert(limit >= _finger, "peace of mind");
2765   } else {
2766     assert(limit < _region_limit, "only way to get here");
2767     // This can happen under some pretty unusual circumstances.  An
2768     // evacuation pause empties the region underneath our feet (NTAMS
2769     // at bottom). We then do some allocation in the region (NTAMS
2770     // stays at bottom), followed by the region being used as a GC
2771     // alloc region (NTAMS will move to top() and the objects
2772     // originally below it will be grayed). All objects now marked in
2773     // the region are explicitly grayed, if below the global finger,
2774     // and we do not need in fact to scan anything else. So, we simply
2775     // set _finger to be limit to ensure that the bitmap iteration
2776     // doesn't do anything.
2777     _finger = limit;
2778   }
2779 
2780   _region_limit = limit;
2781 }
2782 
2783 void CMTask::giveup_current_region() {
2784   assert(_curr_region != NULL, "invariant");
2785   clear_region_fields();
2786 }
2787 
2788 void CMTask::clear_region_fields() {
2789   // Values for these three fields that indicate that we're not
2790   // holding on to a region.
2791   _curr_region   = NULL;
2792   _finger        = NULL;
2793   _region_limit  = NULL;
2794 }
2795 
2796 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2797   if (cm_oop_closure == NULL) {
2798     assert(_cm_oop_closure != NULL, "invariant");
2799   } else {
2800     assert(_cm_oop_closure == NULL, "invariant");
2801   }
2802   _cm_oop_closure = cm_oop_closure;
2803 }
2804 
2805 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2806   guarantee(nextMarkBitMap != NULL, "invariant");
2807   _nextMarkBitMap                = nextMarkBitMap;
2808   clear_region_fields();
2809 
2810   _calls                         = 0;
2811   _elapsed_time_ms               = 0.0;
2812   _termination_time_ms           = 0.0;
2813   _termination_start_time_ms     = 0.0;
2814 }
2815 
2816 bool CMTask::should_exit_termination() {
2817   regular_clock_call();
2818   // This is called when we are in the termination protocol. We should
2819   // quit if, for some reason, this task wants to abort or the global
2820   // stack is not empty (this means that we can get work from it).
2821   return !_cm->mark_stack_empty() || has_aborted();
2822 }
2823 
2824 void CMTask::reached_limit() {
2825   assert(_words_scanned >= _words_scanned_limit ||
2826          _refs_reached >= _refs_reached_limit ,
2827          "shouldn't have been called otherwise");
2828   regular_clock_call();
2829 }
2830 
2831 void CMTask::regular_clock_call() {
2832   if (has_aborted()) return;
2833 
2834   // First, we need to recalculate the words scanned and refs reached
2835   // limits for the next clock call.
2836   recalculate_limits();
2837 
2838   // During the regular clock call we do the following
2839 
2840   // (1) If an overflow has been flagged, then we abort.
2841   if (_cm->has_overflown()) {
2842     set_has_aborted();
2843     return;
2844   }
2845 
2846   // If we are not concurrent (i.e. we're doing remark) we don't need
2847   // to check anything else. The other steps are only needed during
2848   // the concurrent marking phase.
2849   if (!concurrent()) return;
2850 
2851   // (2) If marking has been aborted for Full GC, then we also abort.
2852   if (_cm->has_aborted()) {
2853     set_has_aborted();
2854     return;
2855   }
2856 
2857   double curr_time_ms = os::elapsedVTime() * 1000.0;
2858 
2859   // (4) We check whether we should yield. If we have to, then we abort.
2860   if (SuspendibleThreadSet::should_yield()) {
2861     // We should yield. To do this we abort the task. The caller is
2862     // responsible for yielding.
2863     set_has_aborted();
2864     return;
2865   }
2866 
2867   // (5) We check whether we've reached our time quota. If we have,
2868   // then we abort.
2869   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2870   if (elapsed_time_ms > _time_target_ms) {
2871     set_has_aborted();
2872     _has_timed_out = true;
2873     return;
2874   }
2875 
2876   // (6) Finally, we check whether there are enough completed STAB
2877   // buffers available for processing. If there are, we abort.
2878   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2879   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2880     // we do need to process SATB buffers, we'll abort and restart
2881     // the marking task to do so
2882     set_has_aborted();
2883     return;
2884   }
2885 }
2886 
2887 void CMTask::recalculate_limits() {
2888   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2889   _words_scanned_limit      = _real_words_scanned_limit;
2890 
2891   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2892   _refs_reached_limit       = _real_refs_reached_limit;
2893 }
2894 
2895 void CMTask::decrease_limits() {
2896   // This is called when we believe that we're going to do an infrequent
2897   // operation which will increase the per byte scanned cost (i.e. move
2898   // entries to/from the global stack). It basically tries to decrease the
2899   // scanning limit so that the clock is called earlier.
2900 
2901   _words_scanned_limit = _real_words_scanned_limit -
2902     3 * words_scanned_period / 4;
2903   _refs_reached_limit  = _real_refs_reached_limit -
2904     3 * refs_reached_period / 4;
2905 }
2906 
2907 void CMTask::move_entries_to_global_stack() {
2908   // local array where we'll store the entries that will be popped
2909   // from the local queue
2910   oop buffer[global_stack_transfer_size];
2911 
2912   int n = 0;
2913   oop obj;
2914   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2915     buffer[n] = obj;
2916     ++n;
2917   }
2918 
2919   if (n > 0) {
2920     // we popped at least one entry from the local queue
2921 
2922     if (!_cm->mark_stack_push(buffer, n)) {
2923       set_has_aborted();
2924     }
2925   }
2926 
2927   // this operation was quite expensive, so decrease the limits
2928   decrease_limits();
2929 }
2930 
2931 void CMTask::get_entries_from_global_stack() {
2932   // local array where we'll store the entries that will be popped
2933   // from the global stack.
2934   oop buffer[global_stack_transfer_size];
2935   int n;
2936   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2937   assert(n <= global_stack_transfer_size,
2938          "we should not pop more than the given limit");
2939   if (n > 0) {
2940     // yes, we did actually pop at least one entry
2941     for (int i = 0; i < n; ++i) {
2942       bool success = _task_queue->push(buffer[i]);
2943       // We only call this when the local queue is empty or under a
2944       // given target limit. So, we do not expect this push to fail.
2945       assert(success, "invariant");
2946     }
2947   }
2948 
2949   // this operation was quite expensive, so decrease the limits
2950   decrease_limits();
2951 }
2952 
2953 void CMTask::drain_local_queue(bool partially) {
2954   if (has_aborted()) return;
2955 
2956   // Decide what the target size is, depending whether we're going to
2957   // drain it partially (so that other tasks can steal if they run out
2958   // of things to do) or totally (at the very end).
2959   size_t target_size;
2960   if (partially) {
2961     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2962   } else {
2963     target_size = 0;
2964   }
2965 
2966   if (_task_queue->size() > target_size) {
2967     oop obj;
2968     bool ret = _task_queue->pop_local(obj);
2969     while (ret) {
2970       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2971       assert(!_g1h->is_on_master_free_list(
2972                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2973 
2974       scan_object(obj);
2975 
2976       if (_task_queue->size() <= target_size || has_aborted()) {
2977         ret = false;
2978       } else {
2979         ret = _task_queue->pop_local(obj);
2980       }
2981     }
2982   }
2983 }
2984 
2985 void CMTask::drain_global_stack(bool partially) {
2986   if (has_aborted()) return;
2987 
2988   // We have a policy to drain the local queue before we attempt to
2989   // drain the global stack.
2990   assert(partially || _task_queue->size() == 0, "invariant");
2991 
2992   // Decide what the target size is, depending whether we're going to
2993   // drain it partially (so that other tasks can steal if they run out
2994   // of things to do) or totally (at the very end).  Notice that,
2995   // because we move entries from the global stack in chunks or
2996   // because another task might be doing the same, we might in fact
2997   // drop below the target. But, this is not a problem.
2998   size_t target_size;
2999   if (partially) {
3000     target_size = _cm->partial_mark_stack_size_target();
3001   } else {
3002     target_size = 0;
3003   }
3004 
3005   if (_cm->mark_stack_size() > target_size) {
3006     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3007       get_entries_from_global_stack();
3008       drain_local_queue(partially);
3009     }
3010   }
3011 }
3012 
3013 // SATB Queue has several assumptions on whether to call the par or
3014 // non-par versions of the methods. this is why some of the code is
3015 // replicated. We should really get rid of the single-threaded version
3016 // of the code to simplify things.
3017 void CMTask::drain_satb_buffers() {
3018   if (has_aborted()) return;
3019 
3020   // We set this so that the regular clock knows that we're in the
3021   // middle of draining buffers and doesn't set the abort flag when it
3022   // notices that SATB buffers are available for draining. It'd be
3023   // very counter productive if it did that. :-)
3024   _draining_satb_buffers = true;
3025 
3026   CMSATBBufferClosure satb_cl(this, _g1h);
3027   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3028 
3029   // This keeps claiming and applying the closure to completed buffers
3030   // until we run out of buffers or we need to abort.
3031   while (!has_aborted() &&
3032          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3033     regular_clock_call();
3034   }
3035 
3036   _draining_satb_buffers = false;
3037 
3038   assert(has_aborted() ||
3039          concurrent() ||
3040          satb_mq_set.completed_buffers_num() == 0, "invariant");
3041 
3042   // again, this was a potentially expensive operation, decrease the
3043   // limits to get the regular clock call early
3044   decrease_limits();
3045 }
3046 
3047 void CMTask::print_stats() {
3048   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3049                        _worker_id, _calls);
3050   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3051                        _elapsed_time_ms, _termination_time_ms);
3052   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3053                        _step_times_ms.num(), _step_times_ms.avg(),
3054                        _step_times_ms.sd());
3055   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3056                        _step_times_ms.maximum(), _step_times_ms.sum());
3057 }
3058 
3059 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3060   return _task_queues->steal(worker_id, hash_seed, obj);
3061 }
3062 
3063 /*****************************************************************************
3064 
3065     The do_marking_step(time_target_ms, ...) method is the building
3066     block of the parallel marking framework. It can be called in parallel
3067     with other invocations of do_marking_step() on different tasks
3068     (but only one per task, obviously) and concurrently with the
3069     mutator threads, or during remark, hence it eliminates the need
3070     for two versions of the code. When called during remark, it will
3071     pick up from where the task left off during the concurrent marking
3072     phase. Interestingly, tasks are also claimable during evacuation
3073     pauses too, since do_marking_step() ensures that it aborts before
3074     it needs to yield.
3075 
3076     The data structures that it uses to do marking work are the
3077     following:
3078 
3079       (1) Marking Bitmap. If there are gray objects that appear only
3080       on the bitmap (this happens either when dealing with an overflow
3081       or when the initial marking phase has simply marked the roots
3082       and didn't push them on the stack), then tasks claim heap
3083       regions whose bitmap they then scan to find gray objects. A
3084       global finger indicates where the end of the last claimed region
3085       is. A local finger indicates how far into the region a task has
3086       scanned. The two fingers are used to determine how to gray an
3087       object (i.e. whether simply marking it is OK, as it will be
3088       visited by a task in the future, or whether it needs to be also
3089       pushed on a stack).
3090 
3091       (2) Local Queue. The local queue of the task which is accessed
3092       reasonably efficiently by the task. Other tasks can steal from
3093       it when they run out of work. Throughout the marking phase, a
3094       task attempts to keep its local queue short but not totally
3095       empty, so that entries are available for stealing by other
3096       tasks. Only when there is no more work, a task will totally
3097       drain its local queue.
3098 
3099       (3) Global Mark Stack. This handles local queue overflow. During
3100       marking only sets of entries are moved between it and the local
3101       queues, as access to it requires a mutex and more fine-grain
3102       interaction with it which might cause contention. If it
3103       overflows, then the marking phase should restart and iterate
3104       over the bitmap to identify gray objects. Throughout the marking
3105       phase, tasks attempt to keep the global mark stack at a small
3106       length but not totally empty, so that entries are available for
3107       popping by other tasks. Only when there is no more work, tasks
3108       will totally drain the global mark stack.
3109 
3110       (4) SATB Buffer Queue. This is where completed SATB buffers are
3111       made available. Buffers are regularly removed from this queue
3112       and scanned for roots, so that the queue doesn't get too
3113       long. During remark, all completed buffers are processed, as
3114       well as the filled in parts of any uncompleted buffers.
3115 
3116     The do_marking_step() method tries to abort when the time target
3117     has been reached. There are a few other cases when the
3118     do_marking_step() method also aborts:
3119 
3120       (1) When the marking phase has been aborted (after a Full GC).
3121 
3122       (2) When a global overflow (on the global stack) has been
3123       triggered. Before the task aborts, it will actually sync up with
3124       the other tasks to ensure that all the marking data structures
3125       (local queues, stacks, fingers etc.)  are re-initialized so that
3126       when do_marking_step() completes, the marking phase can
3127       immediately restart.
3128 
3129       (3) When enough completed SATB buffers are available. The
3130       do_marking_step() method only tries to drain SATB buffers right
3131       at the beginning. So, if enough buffers are available, the
3132       marking step aborts and the SATB buffers are processed at
3133       the beginning of the next invocation.
3134 
3135       (4) To yield. when we have to yield then we abort and yield
3136       right at the end of do_marking_step(). This saves us from a lot
3137       of hassle as, by yielding we might allow a Full GC. If this
3138       happens then objects will be compacted underneath our feet, the
3139       heap might shrink, etc. We save checking for this by just
3140       aborting and doing the yield right at the end.
3141 
3142     From the above it follows that the do_marking_step() method should
3143     be called in a loop (or, otherwise, regularly) until it completes.
3144 
3145     If a marking step completes without its has_aborted() flag being
3146     true, it means it has completed the current marking phase (and
3147     also all other marking tasks have done so and have all synced up).
3148 
3149     A method called regular_clock_call() is invoked "regularly" (in
3150     sub ms intervals) throughout marking. It is this clock method that
3151     checks all the abort conditions which were mentioned above and
3152     decides when the task should abort. A work-based scheme is used to
3153     trigger this clock method: when the number of object words the
3154     marking phase has scanned or the number of references the marking
3155     phase has visited reach a given limit. Additional invocations to
3156     the method clock have been planted in a few other strategic places
3157     too. The initial reason for the clock method was to avoid calling
3158     vtime too regularly, as it is quite expensive. So, once it was in
3159     place, it was natural to piggy-back all the other conditions on it
3160     too and not constantly check them throughout the code.
3161 
3162     If do_termination is true then do_marking_step will enter its
3163     termination protocol.
3164 
3165     The value of is_serial must be true when do_marking_step is being
3166     called serially (i.e. by the VMThread) and do_marking_step should
3167     skip any synchronization in the termination and overflow code.
3168     Examples include the serial remark code and the serial reference
3169     processing closures.
3170 
3171     The value of is_serial must be false when do_marking_step is
3172     being called by any of the worker threads in a work gang.
3173     Examples include the concurrent marking code (CMMarkingTask),
3174     the MT remark code, and the MT reference processing closures.
3175 
3176  *****************************************************************************/
3177 
3178 void CMTask::do_marking_step(double time_target_ms,
3179                              bool do_termination,
3180                              bool is_serial) {
3181   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3182   assert(concurrent() == _cm->concurrent(), "they should be the same");
3183 
3184   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3185   assert(_task_queues != NULL, "invariant");
3186   assert(_task_queue != NULL, "invariant");
3187   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3188 
3189   assert(!_claimed,
3190          "only one thread should claim this task at any one time");
3191 
3192   // OK, this doesn't safeguard again all possible scenarios, as it is
3193   // possible for two threads to set the _claimed flag at the same
3194   // time. But it is only for debugging purposes anyway and it will
3195   // catch most problems.
3196   _claimed = true;
3197 
3198   _start_time_ms = os::elapsedVTime() * 1000.0;
3199 
3200   // If do_stealing is true then do_marking_step will attempt to
3201   // steal work from the other CMTasks. It only makes sense to
3202   // enable stealing when the termination protocol is enabled
3203   // and do_marking_step() is not being called serially.
3204   bool do_stealing = do_termination && !is_serial;
3205 
3206   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3207   _time_target_ms = time_target_ms - diff_prediction_ms;
3208 
3209   // set up the variables that are used in the work-based scheme to
3210   // call the regular clock method
3211   _words_scanned = 0;
3212   _refs_reached  = 0;
3213   recalculate_limits();
3214 
3215   // clear all flags
3216   clear_has_aborted();
3217   _has_timed_out = false;
3218   _draining_satb_buffers = false;
3219 
3220   ++_calls;
3221 
3222   // Set up the bitmap and oop closures. Anything that uses them is
3223   // eventually called from this method, so it is OK to allocate these
3224   // statically.
3225   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3226   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3227   set_cm_oop_closure(&cm_oop_closure);
3228 
3229   if (_cm->has_overflown()) {
3230     // This can happen if the mark stack overflows during a GC pause
3231     // and this task, after a yield point, restarts. We have to abort
3232     // as we need to get into the overflow protocol which happens
3233     // right at the end of this task.
3234     set_has_aborted();
3235   }
3236 
3237   // First drain any available SATB buffers. After this, we will not
3238   // look at SATB buffers before the next invocation of this method.
3239   // If enough completed SATB buffers are queued up, the regular clock
3240   // will abort this task so that it restarts.
3241   drain_satb_buffers();
3242   // ...then partially drain the local queue and the global stack
3243   drain_local_queue(true);
3244   drain_global_stack(true);
3245 
3246   do {
3247     if (!has_aborted() && _curr_region != NULL) {
3248       // This means that we're already holding on to a region.
3249       assert(_finger != NULL, "if region is not NULL, then the finger "
3250              "should not be NULL either");
3251 
3252       // We might have restarted this task after an evacuation pause
3253       // which might have evacuated the region we're holding on to
3254       // underneath our feet. Let's read its limit again to make sure
3255       // that we do not iterate over a region of the heap that
3256       // contains garbage (update_region_limit() will also move
3257       // _finger to the start of the region if it is found empty).
3258       update_region_limit();
3259       // We will start from _finger not from the start of the region,
3260       // as we might be restarting this task after aborting half-way
3261       // through scanning this region. In this case, _finger points to
3262       // the address where we last found a marked object. If this is a
3263       // fresh region, _finger points to start().
3264       MemRegion mr = MemRegion(_finger, _region_limit);
3265 
3266       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3267              "humongous regions should go around loop once only");
3268 
3269       // Some special cases:
3270       // If the memory region is empty, we can just give up the region.
3271       // If the current region is humongous then we only need to check
3272       // the bitmap for the bit associated with the start of the object,
3273       // scan the object if it's live, and give up the region.
3274       // Otherwise, let's iterate over the bitmap of the part of the region
3275       // that is left.
3276       // If the iteration is successful, give up the region.
3277       if (mr.is_empty()) {
3278         giveup_current_region();
3279         regular_clock_call();
3280       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3281         if (_nextMarkBitMap->isMarked(mr.start())) {
3282           // The object is marked - apply the closure
3283           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3284           bitmap_closure.do_bit(offset);
3285         }
3286         // Even if this task aborted while scanning the humongous object
3287         // we can (and should) give up the current region.
3288         giveup_current_region();
3289         regular_clock_call();
3290       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3291         giveup_current_region();
3292         regular_clock_call();
3293       } else {
3294         assert(has_aborted(), "currently the only way to do so");
3295         // The only way to abort the bitmap iteration is to return
3296         // false from the do_bit() method. However, inside the
3297         // do_bit() method we move the _finger to point to the
3298         // object currently being looked at. So, if we bail out, we
3299         // have definitely set _finger to something non-null.
3300         assert(_finger != NULL, "invariant");
3301 
3302         // Region iteration was actually aborted. So now _finger
3303         // points to the address of the object we last scanned. If we
3304         // leave it there, when we restart this task, we will rescan
3305         // the object. It is easy to avoid this. We move the finger by
3306         // enough to point to the next possible object header (the
3307         // bitmap knows by how much we need to move it as it knows its
3308         // granularity).
3309         assert(_finger < _region_limit, "invariant");
3310         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3311         // Check if bitmap iteration was aborted while scanning the last object
3312         if (new_finger >= _region_limit) {
3313           giveup_current_region();
3314         } else {
3315           move_finger_to(new_finger);
3316         }
3317       }
3318     }
3319     // At this point we have either completed iterating over the
3320     // region we were holding on to, or we have aborted.
3321 
3322     // We then partially drain the local queue and the global stack.
3323     // (Do we really need this?)
3324     drain_local_queue(true);
3325     drain_global_stack(true);
3326 
3327     // Read the note on the claim_region() method on why it might
3328     // return NULL with potentially more regions available for
3329     // claiming and why we have to check out_of_regions() to determine
3330     // whether we're done or not.
3331     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3332       // We are going to try to claim a new region. We should have
3333       // given up on the previous one.
3334       // Separated the asserts so that we know which one fires.
3335       assert(_curr_region  == NULL, "invariant");
3336       assert(_finger       == NULL, "invariant");
3337       assert(_region_limit == NULL, "invariant");
3338       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3339       if (claimed_region != NULL) {
3340         // Yes, we managed to claim one
3341         setup_for_region(claimed_region);
3342         assert(_curr_region == claimed_region, "invariant");
3343       }
3344       // It is important to call the regular clock here. It might take
3345       // a while to claim a region if, for example, we hit a large
3346       // block of empty regions. So we need to call the regular clock
3347       // method once round the loop to make sure it's called
3348       // frequently enough.
3349       regular_clock_call();
3350     }
3351 
3352     if (!has_aborted() && _curr_region == NULL) {
3353       assert(_cm->out_of_regions(),
3354              "at this point we should be out of regions");
3355     }
3356   } while ( _curr_region != NULL && !has_aborted());
3357 
3358   if (!has_aborted()) {
3359     // We cannot check whether the global stack is empty, since other
3360     // tasks might be pushing objects to it concurrently.
3361     assert(_cm->out_of_regions(),
3362            "at this point we should be out of regions");
3363     // Try to reduce the number of available SATB buffers so that
3364     // remark has less work to do.
3365     drain_satb_buffers();
3366   }
3367 
3368   // Since we've done everything else, we can now totally drain the
3369   // local queue and global stack.
3370   drain_local_queue(false);
3371   drain_global_stack(false);
3372 
3373   // Attempt at work stealing from other task's queues.
3374   if (do_stealing && !has_aborted()) {
3375     // We have not aborted. This means that we have finished all that
3376     // we could. Let's try to do some stealing...
3377 
3378     // We cannot check whether the global stack is empty, since other
3379     // tasks might be pushing objects to it concurrently.
3380     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3381            "only way to reach here");
3382     while (!has_aborted()) {
3383       oop obj;
3384       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3385         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3386                "any stolen object should be marked");
3387         scan_object(obj);
3388 
3389         // And since we're towards the end, let's totally drain the
3390         // local queue and global stack.
3391         drain_local_queue(false);
3392         drain_global_stack(false);
3393       } else {
3394         break;
3395       }
3396     }
3397   }
3398 
3399   // We still haven't aborted. Now, let's try to get into the
3400   // termination protocol.
3401   if (do_termination && !has_aborted()) {
3402     // We cannot check whether the global stack is empty, since other
3403     // tasks might be concurrently pushing objects on it.
3404     // Separated the asserts so that we know which one fires.
3405     assert(_cm->out_of_regions(), "only way to reach here");
3406     assert(_task_queue->size() == 0, "only way to reach here");
3407     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3408 
3409     // The CMTask class also extends the TerminatorTerminator class,
3410     // hence its should_exit_termination() method will also decide
3411     // whether to exit the termination protocol or not.
3412     bool finished = (is_serial ||
3413                      _cm->terminator()->offer_termination(this));
3414     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3415     _termination_time_ms +=
3416       termination_end_time_ms - _termination_start_time_ms;
3417 
3418     if (finished) {
3419       // We're all done.
3420 
3421       if (_worker_id == 0) {
3422         // let's allow task 0 to do this
3423         if (concurrent()) {
3424           assert(_cm->concurrent_marking_in_progress(), "invariant");
3425           // we need to set this to false before the next
3426           // safepoint. This way we ensure that the marking phase
3427           // doesn't observe any more heap expansions.
3428           _cm->clear_concurrent_marking_in_progress();
3429         }
3430       }
3431 
3432       // We can now guarantee that the global stack is empty, since
3433       // all other tasks have finished. We separated the guarantees so
3434       // that, if a condition is false, we can immediately find out
3435       // which one.
3436       guarantee(_cm->out_of_regions(), "only way to reach here");
3437       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3438       guarantee(_task_queue->size() == 0, "only way to reach here");
3439       guarantee(!_cm->has_overflown(), "only way to reach here");
3440       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3441     } else {
3442       // Apparently there's more work to do. Let's abort this task. It
3443       // will restart it and we can hopefully find more things to do.
3444       set_has_aborted();
3445     }
3446   }
3447 
3448   // Mainly for debugging purposes to make sure that a pointer to the
3449   // closure which was statically allocated in this frame doesn't
3450   // escape it by accident.
3451   set_cm_oop_closure(NULL);
3452   double end_time_ms = os::elapsedVTime() * 1000.0;
3453   double elapsed_time_ms = end_time_ms - _start_time_ms;
3454   // Update the step history.
3455   _step_times_ms.add(elapsed_time_ms);
3456 
3457   if (has_aborted()) {
3458     // The task was aborted for some reason.
3459     if (_has_timed_out) {
3460       double diff_ms = elapsed_time_ms - _time_target_ms;
3461       // Keep statistics of how well we did with respect to hitting
3462       // our target only if we actually timed out (if we aborted for
3463       // other reasons, then the results might get skewed).
3464       _marking_step_diffs_ms.add(diff_ms);
3465     }
3466 
3467     if (_cm->has_overflown()) {
3468       // This is the interesting one. We aborted because a global
3469       // overflow was raised. This means we have to restart the
3470       // marking phase and start iterating over regions. However, in
3471       // order to do this we have to make sure that all tasks stop
3472       // what they are doing and re-initialize in a safe manner. We
3473       // will achieve this with the use of two barrier sync points.
3474 
3475       if (!is_serial) {
3476         // We only need to enter the sync barrier if being called
3477         // from a parallel context
3478         _cm->enter_first_sync_barrier(_worker_id);
3479 
3480         // When we exit this sync barrier we know that all tasks have
3481         // stopped doing marking work. So, it's now safe to
3482         // re-initialize our data structures. At the end of this method,
3483         // task 0 will clear the global data structures.
3484       }
3485 
3486       // We clear the local state of this task...
3487       clear_region_fields();
3488 
3489       if (!is_serial) {
3490         // ...and enter the second barrier.
3491         _cm->enter_second_sync_barrier(_worker_id);
3492       }
3493       // At this point, if we're during the concurrent phase of
3494       // marking, everything has been re-initialized and we're
3495       // ready to restart.
3496     }
3497   }
3498 
3499   _claimed = false;
3500 }
3501 
3502 CMTask::CMTask(uint worker_id,
3503                ConcurrentMark* cm,
3504                size_t* marked_bytes,
3505                BitMap* card_bm,
3506                CMTaskQueue* task_queue,
3507                CMTaskQueueSet* task_queues)
3508   : _g1h(G1CollectedHeap::heap()),
3509     _worker_id(worker_id), _cm(cm),
3510     _claimed(false),
3511     _nextMarkBitMap(NULL), _hash_seed(17),
3512     _task_queue(task_queue),
3513     _task_queues(task_queues),
3514     _cm_oop_closure(NULL),
3515     _marked_bytes_array(marked_bytes),
3516     _card_bm(card_bm) {
3517   guarantee(task_queue != NULL, "invariant");
3518   guarantee(task_queues != NULL, "invariant");
3519 
3520   _marking_step_diffs_ms.add(0.5);
3521 }
3522 
3523 // These are formatting macros that are used below to ensure
3524 // consistent formatting. The *_H_* versions are used to format the
3525 // header for a particular value and they should be kept consistent
3526 // with the corresponding macro. Also note that most of the macros add
3527 // the necessary white space (as a prefix) which makes them a bit
3528 // easier to compose.
3529 
3530 // All the output lines are prefixed with this string to be able to
3531 // identify them easily in a large log file.
3532 #define G1PPRL_LINE_PREFIX            "###"
3533 
3534 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3535 #ifdef _LP64
3536 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3537 #else // _LP64
3538 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3539 #endif // _LP64
3540 
3541 // For per-region info
3542 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3543 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3544 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3545 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3546 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3547 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3548 
3549 // For summary info
3550 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3551 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3552 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3553 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3554 
3555 G1PrintRegionLivenessInfoClosure::
3556 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3557   : _total_used_bytes(0), _total_capacity_bytes(0),
3558     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3559     _hum_used_bytes(0), _hum_capacity_bytes(0),
3560     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3561     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3562   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3563   MemRegion g1_reserved = g1h->g1_reserved();
3564   double now = os::elapsedTime();
3565 
3566   // Print the header of the output.
3567   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3568   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3569                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3570                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3571                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3572                           HeapRegion::GrainBytes);
3573   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3574   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3575                           G1PPRL_TYPE_H_FORMAT
3576                           G1PPRL_ADDR_BASE_H_FORMAT
3577                           G1PPRL_BYTE_H_FORMAT
3578                           G1PPRL_BYTE_H_FORMAT
3579                           G1PPRL_BYTE_H_FORMAT
3580                           G1PPRL_DOUBLE_H_FORMAT
3581                           G1PPRL_BYTE_H_FORMAT
3582                           G1PPRL_BYTE_H_FORMAT,
3583                           "type", "address-range",
3584                           "used", "prev-live", "next-live", "gc-eff",
3585                           "remset", "code-roots");
3586   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3587                           G1PPRL_TYPE_H_FORMAT
3588                           G1PPRL_ADDR_BASE_H_FORMAT
3589                           G1PPRL_BYTE_H_FORMAT
3590                           G1PPRL_BYTE_H_FORMAT
3591                           G1PPRL_BYTE_H_FORMAT
3592                           G1PPRL_DOUBLE_H_FORMAT
3593                           G1PPRL_BYTE_H_FORMAT
3594                           G1PPRL_BYTE_H_FORMAT,
3595                           "", "",
3596                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3597                           "(bytes)", "(bytes)");
3598 }
3599 
3600 // It takes as a parameter a reference to one of the _hum_* fields, it
3601 // deduces the corresponding value for a region in a humongous region
3602 // series (either the region size, or what's left if the _hum_* field
3603 // is < the region size), and updates the _hum_* field accordingly.
3604 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3605   size_t bytes = 0;
3606   // The > 0 check is to deal with the prev and next live bytes which
3607   // could be 0.
3608   if (*hum_bytes > 0) {
3609     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3610     *hum_bytes -= bytes;
3611   }
3612   return bytes;
3613 }
3614 
3615 // It deduces the values for a region in a humongous region series
3616 // from the _hum_* fields and updates those accordingly. It assumes
3617 // that that _hum_* fields have already been set up from the "starts
3618 // humongous" region and we visit the regions in address order.
3619 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3620                                                      size_t* capacity_bytes,
3621                                                      size_t* prev_live_bytes,
3622                                                      size_t* next_live_bytes) {
3623   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3624   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3625   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3626   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3627   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3628 }
3629 
3630 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3631   const char* type       = r->get_type_str();
3632   HeapWord* bottom       = r->bottom();
3633   HeapWord* end          = r->end();
3634   size_t capacity_bytes  = r->capacity();
3635   size_t used_bytes      = r->used();
3636   size_t prev_live_bytes = r->live_bytes();
3637   size_t next_live_bytes = r->next_live_bytes();
3638   double gc_eff          = r->gc_efficiency();
3639   size_t remset_bytes    = r->rem_set()->mem_size();
3640   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3641 
3642   if (r->is_starts_humongous()) {
3643     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3644            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3645            "they should have been zeroed after the last time we used them");
3646     // Set up the _hum_* fields.
3647     _hum_capacity_bytes  = capacity_bytes;
3648     _hum_used_bytes      = used_bytes;
3649     _hum_prev_live_bytes = prev_live_bytes;
3650     _hum_next_live_bytes = next_live_bytes;
3651     get_hum_bytes(&used_bytes, &capacity_bytes,
3652                   &prev_live_bytes, &next_live_bytes);
3653     end = bottom + HeapRegion::GrainWords;
3654   } else if (r->is_continues_humongous()) {
3655     get_hum_bytes(&used_bytes, &capacity_bytes,
3656                   &prev_live_bytes, &next_live_bytes);
3657     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3658   }
3659 
3660   _total_used_bytes      += used_bytes;
3661   _total_capacity_bytes  += capacity_bytes;
3662   _total_prev_live_bytes += prev_live_bytes;
3663   _total_next_live_bytes += next_live_bytes;
3664   _total_remset_bytes    += remset_bytes;
3665   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3666 
3667   // Print a line for this particular region.
3668   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3669                           G1PPRL_TYPE_FORMAT
3670                           G1PPRL_ADDR_BASE_FORMAT
3671                           G1PPRL_BYTE_FORMAT
3672                           G1PPRL_BYTE_FORMAT
3673                           G1PPRL_BYTE_FORMAT
3674                           G1PPRL_DOUBLE_FORMAT
3675                           G1PPRL_BYTE_FORMAT
3676                           G1PPRL_BYTE_FORMAT,
3677                           type, p2i(bottom), p2i(end),
3678                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3679                           remset_bytes, strong_code_roots_bytes);
3680 
3681   return false;
3682 }
3683 
3684 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3685   // add static memory usages to remembered set sizes
3686   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3687   // Print the footer of the output.
3688   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3689   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3690                          " SUMMARY"
3691                          G1PPRL_SUM_MB_FORMAT("capacity")
3692                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3693                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3694                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3695                          G1PPRL_SUM_MB_FORMAT("remset")
3696                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3697                          bytes_to_mb(_total_capacity_bytes),
3698                          bytes_to_mb(_total_used_bytes),
3699                          perc(_total_used_bytes, _total_capacity_bytes),
3700                          bytes_to_mb(_total_prev_live_bytes),
3701                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3702                          bytes_to_mb(_total_next_live_bytes),
3703                          perc(_total_next_live_bytes, _total_capacity_bytes),
3704                          bytes_to_mb(_total_remset_bytes),
3705                          bytes_to_mb(_total_strong_code_roots_bytes));
3706 }