Old src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   assert(limit != NULL, "limit must not be NULL");
  78   size_t limitOffset = heapWordToOffset(limit);
  79   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  80   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  81   assert(nextAddr >= addr, "get_next_one postcondition");
  82   assert(nextAddr == limit || isMarked(nextAddr),
  83          "get_next_one postcondition");
  84   return nextAddr;
  85 }
  86 
  87 #ifndef PRODUCT
  88 bool CMBitMapRO::covers(MemRegion heap_rs) const {
  89   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  90   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  91          "size inconsistency");
  92   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  93          _bmWordSize  == heap_rs.word_size();
  94 }
  95 #endif
  96 
  97 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  98   _bm.print_on_error(st, prefix);
  99 }
 100 
 101 size_t CMBitMap::compute_size(size_t heap_size) {
 102   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 103 }
 104 
 105 size_t CMBitMap::mark_distance() {
 106   return MinObjAlignmentInBytes * BitsPerByte;
 107 }
 108 
 109 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 110   _bmStartWord = heap.start();
 111   _bmWordSize = heap.word_size();
 112 
 113   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 114   _bm.set_size(_bmWordSize >> _shifter);
 115 
 116   storage->set_mapping_changed_listener(&_listener);
 117 }
 118 
 119 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 120   if (zero_filled) {
 121     return;
 122   }
 123   // We need to clear the bitmap on commit, removing any existing information.
 124   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 125   _bm->clearRange(mr);
 126 }
 127 
 128 // Closure used for clearing the given mark bitmap.
 129 class ClearBitmapHRClosure : public HeapRegionClosure {
 130  private:
 131   ConcurrentMark* _cm;
 132   CMBitMap* _bitmap;
 133   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 134  public:
 135   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 136     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 137   }
 138 
 139   virtual bool doHeapRegion(HeapRegion* r) {
 140     size_t const chunk_size_in_words = M / HeapWordSize;
 141 
 142     HeapWord* cur = r->bottom();
 143     HeapWord* const end = r->end();
 144 
 145     while (cur < end) {
 146       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 147       _bitmap->clearRange(mr);
 148 
 149       cur += chunk_size_in_words;
 150 
 151       // Abort iteration if after yielding the marking has been aborted.
 152       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 153         return true;
 154       }
 155       // Repeat the asserts from before the start of the closure. We will do them
 156       // as asserts here to minimize their overhead on the product. However, we
 157       // will have them as guarantees at the beginning / end of the bitmap
 158       // clearing to get some checking in the product.
 159       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 160       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 161     }
 162 
 163     return false;
 164   }
 165 };
 166 
 167 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 168   ClearBitmapHRClosure* _cl;
 169   HeapRegionClaimer     _hrclaimer;
 170   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 171 
 172 public:
 173   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 174       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 175 
 176   void work(uint worker_id) {
 177     SuspendibleThreadSetJoiner sts_join(_suspendible);
 178     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 179   }
 180 };
 181 
 182 void CMBitMap::clearAll() {
 183   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 184   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 185   uint n_workers = g1h->workers()->active_workers();
 186   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 187   g1h->workers()->run_task(&task);
 188   guarantee(cl.complete(), "Must have completed iteration.");
 189   return;
 190 }
 191 
 192 void CMBitMap::clearRange(MemRegion mr) {
 193   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 194   assert(!mr.is_empty(), "unexpected empty region");
 195   // convert address range into offset range
 196   _bm.at_put_range(heapWordToOffset(mr.start()),
 197                    heapWordToOffset(mr.end()), false);
 198 }
 199 
 200 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 201   _base(NULL), _cm(cm)
 202 {}
 203 
 204 bool CMMarkStack::allocate(size_t capacity) {
 205   // allocate a stack of the requisite depth
 206   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 207   if (!rs.is_reserved()) {
 208     warning("ConcurrentMark MarkStack allocation failure");
 209     return false;
 210   }
 211   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 212   if (!_virtual_space.initialize(rs, rs.size())) {
 213     warning("ConcurrentMark MarkStack backing store failure");
 214     // Release the virtual memory reserved for the marking stack
 215     rs.release();
 216     return false;
 217   }
 218   assert(_virtual_space.committed_size() == rs.size(),
 219          "Didn't reserve backing store for all of ConcurrentMark stack?");
 220   _base = (oop*) _virtual_space.low();
 221   setEmpty();
 222   _capacity = (jint) capacity;
 223   _saved_index = -1;
 224   _should_expand = false;
 225   return true;
 226 }
 227 
 228 void CMMarkStack::expand() {
 229   // Called, during remark, if we've overflown the marking stack during marking.
 230   assert(isEmpty(), "stack should been emptied while handling overflow");
 231   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 232   // Clear expansion flag
 233   _should_expand = false;
 234   if (_capacity == (jint) MarkStackSizeMax) {
 235     if (PrintGCDetails && Verbose) {
 236       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 237     }
 238     return;
 239   }
 240   // Double capacity if possible
 241   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 242   // Do not give up existing stack until we have managed to
 243   // get the double capacity that we desired.
 244   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 245                                                            sizeof(oop)));
 246   if (rs.is_reserved()) {
 247     // Release the backing store associated with old stack
 248     _virtual_space.release();
 249     // Reinitialize virtual space for new stack
 250     if (!_virtual_space.initialize(rs, rs.size())) {
 251       fatal("Not enough swap for expanded marking stack capacity");
 252     }
 253     _base = (oop*)(_virtual_space.low());
 254     _index = 0;
 255     _capacity = new_capacity;
 256   } else {
 257     if (PrintGCDetails && Verbose) {
 258       // Failed to double capacity, continue;
 259       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 260                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 261                           _capacity / K, new_capacity / K);
 262     }
 263   }
 264 }
 265 
 266 void CMMarkStack::set_should_expand() {
 267   // If we're resetting the marking state because of an
 268   // marking stack overflow, record that we should, if
 269   // possible, expand the stack.
 270   _should_expand = _cm->has_overflown();
 271 }
 272 
 273 CMMarkStack::~CMMarkStack() {
 274   if (_base != NULL) {
 275     _base = NULL;
 276     _virtual_space.release();
 277   }
 278 }
 279 
 280 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 281   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 282   jint start = _index;
 283   jint next_index = start + n;
 284   if (next_index > _capacity) {
 285     _overflow = true;
 286     return;
 287   }
 288   // Otherwise.
 289   _index = next_index;
 290   for (int i = 0; i < n; i++) {
 291     int ind = start + i;
 292     assert(ind < _capacity, "By overflow test above.");
 293     _base[ind] = ptr_arr[i];
 294   }
 295 }
 296 
 297 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 298   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 299   jint index = _index;
 300   if (index == 0) {
 301     *n = 0;
 302     return false;
 303   } else {
 304     int k = MIN2(max, index);
 305     jint  new_ind = index - k;
 306     for (int j = 0; j < k; j++) {
 307       ptr_arr[j] = _base[new_ind + j];
 308     }
 309     _index = new_ind;
 310     *n = k;
 311     return true;
 312   }
 313 }
 314 
 315 void CMMarkStack::note_start_of_gc() {
 316   assert(_saved_index == -1,
 317          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 318   _saved_index = _index;
 319 }
 320 
 321 void CMMarkStack::note_end_of_gc() {
 322   // This is intentionally a guarantee, instead of an assert. If we
 323   // accidentally add something to the mark stack during GC, it
 324   // will be a correctness issue so it's better if we crash. we'll
 325   // only check this once per GC anyway, so it won't be a performance
 326   // issue in any way.
 327   guarantee(_saved_index == _index,
 328             "saved index: %d index: %d", _saved_index, _index);
 329   _saved_index = -1;
 330 }
 331 
 332 CMRootRegions::CMRootRegions() :
 333   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 334   _should_abort(false),  _next_survivor(NULL) { }
 335 
 336 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 337   _young_list = g1h->young_list();
 338   _cm = cm;
 339 }
 340 
 341 void CMRootRegions::prepare_for_scan() {
 342   assert(!scan_in_progress(), "pre-condition");
 343 
 344   // Currently, only survivors can be root regions.
 345   assert(_next_survivor == NULL, "pre-condition");
 346   _next_survivor = _young_list->first_survivor_region();
 347   _scan_in_progress = (_next_survivor != NULL);
 348   _should_abort = false;
 349 }
 350 
 351 HeapRegion* CMRootRegions::claim_next() {
 352   if (_should_abort) {
 353     // If someone has set the should_abort flag, we return NULL to
 354     // force the caller to bail out of their loop.
 355     return NULL;
 356   }
 357 
 358   // Currently, only survivors can be root regions.
 359   HeapRegion* res = _next_survivor;
 360   if (res != NULL) {
 361     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 362     // Read it again in case it changed while we were waiting for the lock.
 363     res = _next_survivor;
 364     if (res != NULL) {
 365       if (res == _young_list->last_survivor_region()) {
 366         // We just claimed the last survivor so store NULL to indicate
 367         // that we're done.
 368         _next_survivor = NULL;
 369       } else {
 370         _next_survivor = res->get_next_young_region();
 371       }
 372     } else {
 373       // Someone else claimed the last survivor while we were trying
 374       // to take the lock so nothing else to do.
 375     }
 376   }
 377   assert(res == NULL || res->is_survivor(), "post-condition");
 378 
 379   return res;
 380 }
 381 
 382 void CMRootRegions::scan_finished() {
 383   assert(scan_in_progress(), "pre-condition");
 384 
 385   // Currently, only survivors can be root regions.
 386   if (!_should_abort) {
 387     assert(_next_survivor == NULL, "we should have claimed all survivors");
 388   }
 389   _next_survivor = NULL;
 390 
 391   {
 392     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 393     _scan_in_progress = false;
 394     RootRegionScan_lock->notify_all();
 395   }
 396 }
 397 
 398 bool CMRootRegions::wait_until_scan_finished() {
 399   if (!scan_in_progress()) return false;
 400 
 401   {
 402     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 403     while (scan_in_progress()) {
 404       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 405     }
 406   }
 407   return true;
 408 }
 409 
 410 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 411   return MAX2((n_par_threads + 2) / 4, 1U);
 412 }
 413 
 414 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 415   _g1h(g1h),
 416   _markBitMap1(),
 417   _markBitMap2(),
 418   _parallel_marking_threads(0),
 419   _max_parallel_marking_threads(0),
 420   _sleep_factor(0.0),
 421   _marking_task_overhead(1.0),
 422   _cleanup_list("Cleanup List"),
 423   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 424   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 425             CardTableModRefBS::card_shift,
 426             false /* in_resource_area*/),
 427 
 428   _prevMarkBitMap(&_markBitMap1),
 429   _nextMarkBitMap(&_markBitMap2),
 430 
 431   _markStack(this),
 432   // _finger set in set_non_marking_state
 433 
 434   _max_worker_id(ParallelGCThreads),
 435   // _active_tasks set in set_non_marking_state
 436   // _tasks set inside the constructor
 437   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 438   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 439 
 440   _has_overflown(false),
 441   _concurrent(false),
 442   _has_aborted(false),
 443   _restart_for_overflow(false),
 444   _concurrent_marking_in_progress(false),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL),
 458   _completed_initialization(false) {
 459 
 460   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 461   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 462 
 463   // Create & start a ConcurrentMark thread.
 464   _cmThread = new ConcurrentMarkThread(this);
 465   assert(cmThread() != NULL, "CM Thread should have been created");
 466   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 467   if (_cmThread->osthread() == NULL) {
 468       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 469   }
 470 
 471   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 472   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 473   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 474 
 475   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 476   satb_qs.set_buffer_size(G1SATBBufferSize);
 477 
 478   _root_regions.init(_g1h, this);
 479 
 480   if (ConcGCThreads > ParallelGCThreads) {
 481     warning("Can't have more ConcGCThreads (%u) "
 482             "than ParallelGCThreads (%u).",
 483             ConcGCThreads, ParallelGCThreads);
 484     return;
 485   }
 486   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 487     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 488     // if both are set
 489     _sleep_factor             = 0.0;
 490     _marking_task_overhead    = 1.0;
 491   } else if (G1MarkingOverheadPercent > 0) {
 492     // We will calculate the number of parallel marking threads based
 493     // on a target overhead with respect to the soft real-time goal
 494     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 495     double overall_cm_overhead =
 496       (double) MaxGCPauseMillis * marking_overhead /
 497       (double) GCPauseIntervalMillis;
 498     double cpu_ratio = 1.0 / (double) os::processor_count();
 499     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 500     double marking_task_overhead =
 501       overall_cm_overhead / marking_thread_num *
 502                                               (double) os::processor_count();
 503     double sleep_factor =
 504                        (1.0 - marking_task_overhead) / marking_task_overhead;
 505 
 506     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 507     _sleep_factor             = sleep_factor;
 508     _marking_task_overhead    = marking_task_overhead;
 509   } else {
 510     // Calculate the number of parallel marking threads by scaling
 511     // the number of parallel GC threads.
 512     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 513     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 514     _sleep_factor             = 0.0;
 515     _marking_task_overhead    = 1.0;
 516   }
 517 
 518   assert(ConcGCThreads > 0, "Should have been set");
 519   _parallel_marking_threads = ConcGCThreads;
 520   _max_parallel_marking_threads = _parallel_marking_threads;
 521 
 522   _parallel_workers = new WorkGang("G1 Marker",
 523        _max_parallel_marking_threads, false, true);
 524   if (_parallel_workers == NULL) {
 525     vm_exit_during_initialization("Failed necessary allocation.");
 526   } else {
 527     _parallel_workers->initialize_workers();
 528   }
 529 
 530   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 531     size_t mark_stack_size =
 532       MIN2(MarkStackSizeMax,
 533           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 534     // Verify that the calculated value for MarkStackSize is in range.
 535     // It would be nice to use the private utility routine from Arguments.
 536     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 537       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 538               "must be between 1 and " SIZE_FORMAT,
 539               mark_stack_size, MarkStackSizeMax);
 540       return;
 541     }
 542     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 543   } else {
 544     // Verify MarkStackSize is in range.
 545     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 546       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 547         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 548           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 549                   "must be between 1 and " SIZE_FORMAT,
 550                   MarkStackSize, MarkStackSizeMax);
 551           return;
 552         }
 553       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 554         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 555           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 556                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 557                   MarkStackSize, MarkStackSizeMax);
 558           return;
 559         }
 560       }
 561     }
 562   }
 563 
 564   if (!_markStack.allocate(MarkStackSize)) {
 565     warning("Failed to allocate CM marking stack");
 566     return;
 567   }
 568 
 569   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 570   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 571 
 572   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 573   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 574 
 575   BitMap::idx_t card_bm_size = _card_bm.size();
 576 
 577   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 578   _active_tasks = _max_worker_id;
 579 
 580   uint max_regions = _g1h->max_regions();
 581   for (uint i = 0; i < _max_worker_id; ++i) {
 582     CMTaskQueue* task_queue = new CMTaskQueue();
 583     task_queue->initialize();
 584     _task_queues->register_queue(i, task_queue);
 585 
 586     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 587     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 588 
 589     _tasks[i] = new CMTask(i, this,
 590                            _count_marked_bytes[i],
 591                            &_count_card_bitmaps[i],
 592                            task_queue, _task_queues);
 593 
 594     _accum_task_vtime[i] = 0.0;
 595   }
 596 
 597   // Calculate the card number for the bottom of the heap. Used
 598   // in biasing indexes into the accounting card bitmaps.
 599   _heap_bottom_card_num =
 600     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 601                                 CardTableModRefBS::card_shift);
 602 
 603   // Clear all the liveness counting data
 604   clear_all_count_data();
 605 
 606   // so that the call below can read a sensible value
 607   _heap_start = g1h->reserved_region().start();
 608   set_non_marking_state();
 609   _completed_initialization = true;
 610 }
 611 
 612 void ConcurrentMark::reset() {
 613   // Starting values for these two. This should be called in a STW
 614   // phase.
 615   MemRegion reserved = _g1h->g1_reserved();
 616   _heap_start = reserved.start();
 617   _heap_end   = reserved.end();
 618 
 619   // Separated the asserts so that we know which one fires.
 620   assert(_heap_start != NULL, "heap bounds should look ok");
 621   assert(_heap_end != NULL, "heap bounds should look ok");
 622   assert(_heap_start < _heap_end, "heap bounds should look ok");
 623 
 624   // Reset all the marking data structures and any necessary flags
 625   reset_marking_state();
 626 
 627   // We do reset all of them, since different phases will use
 628   // different number of active threads. So, it's easiest to have all
 629   // of them ready.
 630   for (uint i = 0; i < _max_worker_id; ++i) {
 631     _tasks[i]->reset(_nextMarkBitMap);
 632   }
 633 
 634   // we need this to make sure that the flag is on during the evac
 635   // pause with initial mark piggy-backed
 636   set_concurrent_marking_in_progress();
 637 }
 638 
 639 
 640 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 641   _markStack.set_should_expand();
 642   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 643   if (clear_overflow) {
 644     clear_has_overflown();
 645   } else {
 646     assert(has_overflown(), "pre-condition");
 647   }
 648   _finger = _heap_start;
 649 
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     CMTaskQueue* queue = _task_queues->queue(i);
 652     queue->set_empty();
 653   }
 654 }
 655 
 656 void ConcurrentMark::set_concurrency(uint active_tasks) {
 657   assert(active_tasks <= _max_worker_id, "we should not have more");
 658 
 659   _active_tasks = active_tasks;
 660   // Need to update the three data structures below according to the
 661   // number of active threads for this phase.
 662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 665 }
 666 
 667 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 668   set_concurrency(active_tasks);
 669 
 670   _concurrent = concurrent;
 671   // We propagate this to all tasks, not just the active ones.
 672   for (uint i = 0; i < _max_worker_id; ++i)
 673     _tasks[i]->set_concurrent(concurrent);
 674 
 675   if (concurrent) {
 676     set_concurrent_marking_in_progress();
 677   } else {
 678     // We currently assume that the concurrent flag has been set to
 679     // false before we start remark. At this point we should also be
 680     // in a STW phase.
 681     assert(!concurrent_marking_in_progress(), "invariant");
 682     assert(out_of_regions(),
 683            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 684            p2i(_finger), p2i(_heap_end));
 685   }
 686 }
 687 
 688 void ConcurrentMark::set_non_marking_state() {
 689   // We set the global marking state to some default values when we're
 690   // not doing marking.
 691   reset_marking_state();
 692   _active_tasks = 0;
 693   clear_concurrent_marking_in_progress();
 694 }
 695 
 696 ConcurrentMark::~ConcurrentMark() {
 697   // The ConcurrentMark instance is never freed.
 698   ShouldNotReachHere();
 699 }
 700 
 701 void ConcurrentMark::clearNextBitmap() {
 702   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 703 
 704   // Make sure that the concurrent mark thread looks to still be in
 705   // the current cycle.
 706   guarantee(cmThread()->during_cycle(), "invariant");
 707 
 708   // We are finishing up the current cycle by clearing the next
 709   // marking bitmap and getting it ready for the next cycle. During
 710   // this time no other cycle can start. So, let's make sure that this
 711   // is the case.
 712   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 713 
 714   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 715   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 716   _parallel_workers->run_task(&task);
 717 
 718   // Clear the liveness counting data. If the marking has been aborted, the abort()
 719   // call already did that.
 720   if (cl.complete()) {
 721     clear_all_count_data();
 722   }
 723 
 724   // Repeat the asserts from above.
 725   guarantee(cmThread()->during_cycle(), "invariant");
 726   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 727 }
 728 
 729 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 730   CMBitMap* _bitmap;
 731   bool _error;
 732  public:
 733   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 734   }
 735 
 736   virtual bool doHeapRegion(HeapRegion* r) {
 737     // This closure can be called concurrently to the mutator, so we must make sure
 738     // that the result of the getNextMarkedWordAddress() call is compared to the
 739     // value passed to it as limit to detect any found bits.
 740     // end never changes in G1.
 741     HeapWord* end = r->end();
 742     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 743   }
 744 };
 745 
 746 bool ConcurrentMark::nextMarkBitmapIsClear() {
 747   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 748   _g1h->heap_region_iterate(&cl);
 749   return cl.complete();
 750 }
 751 
 752 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 753 public:
 754   bool doHeapRegion(HeapRegion* r) {
 755     r->note_start_of_marking();
 756     return false;
 757   }
 758 };
 759 
 760 void ConcurrentMark::checkpointRootsInitialPre() {
 761   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 762   G1CollectorPolicy* g1p = g1h->g1_policy();
 763 
 764   _has_aborted = false;
 765 
 766   // Initialize marking structures. This has to be done in a STW phase.
 767   reset();
 768 
 769   // For each region note start of marking.
 770   NoteStartOfMarkHRClosure startcl;
 771   g1h->heap_region_iterate(&startcl);
 772 }
 773 
 774 
 775 void ConcurrentMark::checkpointRootsInitialPost() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777 
 778   // Start Concurrent Marking weak-reference discovery.
 779   ReferenceProcessor* rp = g1h->ref_processor_cm();
 780   // enable ("weak") refs discovery
 781   rp->enable_discovery();
 782   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 783 
 784   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 785   // This is the start of  the marking cycle, we're expected all
 786   // threads to have SATB queues with active set to false.
 787   satb_mq_set.set_active_all_threads(true, /* new active value */
 788                                      false /* expected_active */);
 789 
 790   _root_regions.prepare_for_scan();
 791 
 792   // update_g1_committed() will be called at the end of an evac pause
 793   // when marking is on. So, it's also called at the end of the
 794   // initial-mark pause to update the heap end, if the heap expands
 795   // during it. No need to call it here.
 796 }
 797 
 798 /*
 799  * Notice that in the next two methods, we actually leave the STS
 800  * during the barrier sync and join it immediately afterwards. If we
 801  * do not do this, the following deadlock can occur: one thread could
 802  * be in the barrier sync code, waiting for the other thread to also
 803  * sync up, whereas another one could be trying to yield, while also
 804  * waiting for the other threads to sync up too.
 805  *
 806  * Note, however, that this code is also used during remark and in
 807  * this case we should not attempt to leave / enter the STS, otherwise
 808  * we'll either hit an assert (debug / fastdebug) or deadlock
 809  * (product). So we should only leave / enter the STS if we are
 810  * operating concurrently.
 811  *
 812  * Because the thread that does the sync barrier has left the STS, it
 813  * is possible to be suspended for a Full GC or an evacuation pause
 814  * could occur. This is actually safe, since the entering the sync
 815  * barrier is one of the last things do_marking_step() does, and it
 816  * doesn't manipulate any data structures afterwards.
 817  */
 818 
 819 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 820   bool barrier_aborted;
 821   {
 822     SuspendibleThreadSetLeaver sts_leave(concurrent());
 823     barrier_aborted = !_first_overflow_barrier_sync.enter();
 824   }
 825 
 826   // at this point everyone should have synced up and not be doing any
 827   // more work
 828 
 829   if (barrier_aborted) {
 830     // If the barrier aborted we ignore the overflow condition and
 831     // just abort the whole marking phase as quickly as possible.
 832     return;
 833   }
 834 
 835   // If we're executing the concurrent phase of marking, reset the marking
 836   // state; otherwise the marking state is reset after reference processing,
 837   // during the remark pause.
 838   // If we reset here as a result of an overflow during the remark we will
 839   // see assertion failures from any subsequent set_concurrency_and_phase()
 840   // calls.
 841   if (concurrent()) {
 842     // let the task associated with with worker 0 do this
 843     if (worker_id == 0) {
 844       // task 0 is responsible for clearing the global data structures
 845       // We should be here because of an overflow. During STW we should
 846       // not clear the overflow flag since we rely on it being true when
 847       // we exit this method to abort the pause and restart concurrent
 848       // marking.
 849       reset_marking_state(true /* clear_overflow */);
 850 
 851       if (G1Log::fine()) {
 852         gclog_or_tty->gclog_stamp();
 853         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 854       }
 855     }
 856   }
 857 
 858   // after this, each task should reset its own data structures then
 859   // then go into the second barrier
 860 }
 861 
 862 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 863   SuspendibleThreadSetLeaver sts_leave(concurrent());
 864   _second_overflow_barrier_sync.enter();
 865 
 866   // at this point everything should be re-initialized and ready to go
 867 }
 868 
 869 class CMConcurrentMarkingTask: public AbstractGangTask {
 870 private:
 871   ConcurrentMark*       _cm;
 872   ConcurrentMarkThread* _cmt;
 873 
 874 public:
 875   void work(uint worker_id) {
 876     assert(Thread::current()->is_ConcurrentGC_thread(),
 877            "this should only be done by a conc GC thread");
 878     ResourceMark rm;
 879 
 880     double start_vtime = os::elapsedVTime();
 881 
 882     {
 883       SuspendibleThreadSetJoiner sts_join;
 884 
 885       assert(worker_id < _cm->active_tasks(), "invariant");
 886       CMTask* the_task = _cm->task(worker_id);
 887       the_task->record_start_time();
 888       if (!_cm->has_aborted()) {
 889         do {
 890           double start_vtime_sec = os::elapsedVTime();
 891           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 892 
 893           the_task->do_marking_step(mark_step_duration_ms,
 894                                     true  /* do_termination */,
 895                                     false /* is_serial*/);
 896 
 897           double end_vtime_sec = os::elapsedVTime();
 898           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 899           _cm->clear_has_overflown();
 900 
 901           _cm->do_yield_check(worker_id);
 902 
 903           jlong sleep_time_ms;
 904           if (!_cm->has_aborted() && the_task->has_aborted()) {
 905             sleep_time_ms =
 906               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 907             {
 908               SuspendibleThreadSetLeaver sts_leave;
 909               os::sleep(Thread::current(), sleep_time_ms, false);
 910             }
 911           }
 912         } while (!_cm->has_aborted() && the_task->has_aborted());
 913       }
 914       the_task->record_end_time();
 915       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 916     }
 917 
 918     double end_vtime = os::elapsedVTime();
 919     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 920   }
 921 
 922   CMConcurrentMarkingTask(ConcurrentMark* cm,
 923                           ConcurrentMarkThread* cmt) :
 924       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 925 
 926   ~CMConcurrentMarkingTask() { }
 927 };
 928 
 929 // Calculates the number of active workers for a concurrent
 930 // phase.
 931 uint ConcurrentMark::calc_parallel_marking_threads() {
 932   uint n_conc_workers = 0;
 933   if (!UseDynamicNumberOfGCThreads ||
 934       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 935        !ForceDynamicNumberOfGCThreads)) {
 936     n_conc_workers = max_parallel_marking_threads();
 937   } else {
 938     n_conc_workers =
 939       AdaptiveSizePolicy::calc_default_active_workers(
 940                                    max_parallel_marking_threads(),
 941                                    1, /* Minimum workers */
 942                                    parallel_marking_threads(),
 943                                    Threads::number_of_non_daemon_threads());
 944     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 945     // that scaling has already gone into "_max_parallel_marking_threads".
 946   }
 947   assert(n_conc_workers > 0, "Always need at least 1");
 948   return n_conc_workers;
 949 }
 950 
 951 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 952   // Currently, only survivors can be root regions.
 953   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 954   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 955 
 956   const uintx interval = PrefetchScanIntervalInBytes;
 957   HeapWord* curr = hr->bottom();
 958   const HeapWord* end = hr->top();
 959   while (curr < end) {
 960     Prefetch::read(curr, interval);
 961     oop obj = oop(curr);
 962     int size = obj->oop_iterate_size(&cl);
 963     assert(size == obj->size(), "sanity");
 964     curr += size;
 965   }
 966 }
 967 
 968 class CMRootRegionScanTask : public AbstractGangTask {
 969 private:
 970   ConcurrentMark* _cm;
 971 
 972 public:
 973   CMRootRegionScanTask(ConcurrentMark* cm) :
 974     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 975 
 976   void work(uint worker_id) {
 977     assert(Thread::current()->is_ConcurrentGC_thread(),
 978            "this should only be done by a conc GC thread");
 979 
 980     CMRootRegions* root_regions = _cm->root_regions();
 981     HeapRegion* hr = root_regions->claim_next();
 982     while (hr != NULL) {
 983       _cm->scanRootRegion(hr, worker_id);
 984       hr = root_regions->claim_next();
 985     }
 986   }
 987 };
 988 
 989 void ConcurrentMark::scanRootRegions() {
 990   double scan_start = os::elapsedTime();
 991 
 992   // Start of concurrent marking.
 993   ClassLoaderDataGraph::clear_claimed_marks();
 994 
 995   // scan_in_progress() will have been set to true only if there was
 996   // at least one root region to scan. So, if it's false, we
 997   // should not attempt to do any further work.
 998   if (root_regions()->scan_in_progress()) {
 999     if (G1Log::fine()) {
1000       gclog_or_tty->gclog_stamp();
1001       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1002     }
1003 
1004     _parallel_marking_threads = calc_parallel_marking_threads();
1005     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1006            "Maximum number of marking threads exceeded");
1007     uint active_workers = MAX2(1U, parallel_marking_threads());
1008 
1009     CMRootRegionScanTask task(this);
1010     _parallel_workers->set_active_workers(active_workers);
1011     _parallel_workers->run_task(&task);
1012 
1013     if (G1Log::fine()) {
1014       gclog_or_tty->gclog_stamp();
1015       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1016     }
1017 
1018     // It's possible that has_aborted() is true here without actually
1019     // aborting the survivor scan earlier. This is OK as it's
1020     // mainly used for sanity checking.
1021     root_regions()->scan_finished();
1022   }
1023 }
1024 
1025 void ConcurrentMark::markFromRoots() {
1026   // we might be tempted to assert that:
1027   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1028   //        "inconsistent argument?");
1029   // However that wouldn't be right, because it's possible that
1030   // a safepoint is indeed in progress as a younger generation
1031   // stop-the-world GC happens even as we mark in this generation.
1032 
1033   _restart_for_overflow = false;
1034 
1035   // _g1h has _n_par_threads
1036   _parallel_marking_threads = calc_parallel_marking_threads();
1037   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1038     "Maximum number of marking threads exceeded");
1039 
1040   uint active_workers = MAX2(1U, parallel_marking_threads());
1041   assert(active_workers > 0, "Should have been set");
1042 
1043   // Parallel task terminator is set in "set_concurrency_and_phase()"
1044   set_concurrency_and_phase(active_workers, true /* concurrent */);
1045 
1046   CMConcurrentMarkingTask markingTask(this, cmThread());
1047   _parallel_workers->set_active_workers(active_workers);
1048   _parallel_workers->run_task(&markingTask);
1049   print_stats();
1050 }
1051 
1052 // Helper class to get rid of some boilerplate code.
1053 class G1CMTraceTime : public StackObj {
1054   GCTraceTimeImpl _gc_trace_time;
1055   static bool doit_and_prepend(bool doit) {
1056     if (doit) {
1057       gclog_or_tty->put(' ');
1058     }
1059     return doit;
1060   }
1061 
1062  public:
1063   G1CMTraceTime(const char* title, bool doit)
1064     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1065   }
1066 };
1067 
1068 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1069   // world is stopped at this checkpoint
1070   assert(SafepointSynchronize::is_at_safepoint(),
1071          "world should be stopped");
1072 
1073   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1074 
1075   // If a full collection has happened, we shouldn't do this.
1076   if (has_aborted()) {
1077     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1078     return;
1079   }
1080 
1081   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1082 
1083   if (VerifyDuringGC) {
1084     HandleMark hm;  // handle scope
1085     g1h->prepare_for_verify();
1086     Universe::verify(VerifyOption_G1UsePrevMarking,
1087                      " VerifyDuringGC:(before)");
1088   }
1089   g1h->check_bitmaps("Remark Start");
1090 
1091   G1CollectorPolicy* g1p = g1h->g1_policy();
1092   g1p->record_concurrent_mark_remark_start();
1093 
1094   double start = os::elapsedTime();
1095 
1096   checkpointRootsFinalWork();
1097 
1098   double mark_work_end = os::elapsedTime();
1099 
1100   weakRefsWork(clear_all_soft_refs);
1101 
1102   if (has_overflown()) {
1103     // Oops.  We overflowed.  Restart concurrent marking.
1104     _restart_for_overflow = true;
1105     if (G1TraceMarkStackOverflow) {
1106       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1107     }
1108 
1109     // Verify the heap w.r.t. the previous marking bitmap.
1110     if (VerifyDuringGC) {
1111       HandleMark hm;  // handle scope
1112       g1h->prepare_for_verify();
1113       Universe::verify(VerifyOption_G1UsePrevMarking,
1114                        " VerifyDuringGC:(overflow)");
1115     }
1116 
1117     // Clear the marking state because we will be restarting
1118     // marking due to overflowing the global mark stack.
1119     reset_marking_state();
1120   } else {
1121     {
1122       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1123 
1124       // Aggregate the per-task counting data that we have accumulated
1125       // while marking.
1126       aggregate_count_data();
1127     }
1128 
1129     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1130     // We're done with marking.
1131     // This is the end of  the marking cycle, we're expected all
1132     // threads to have SATB queues with active set to true.
1133     satb_mq_set.set_active_all_threads(false, /* new active value */
1134                                        true /* expected_active */);
1135 
1136     if (VerifyDuringGC) {
1137       HandleMark hm;  // handle scope
1138       g1h->prepare_for_verify();
1139       Universe::verify(VerifyOption_G1UseNextMarking,
1140                        " VerifyDuringGC:(after)");
1141     }
1142     g1h->check_bitmaps("Remark End");
1143     assert(!restart_for_overflow(), "sanity");
1144     // Completely reset the marking state since marking completed
1145     set_non_marking_state();
1146   }
1147 
1148   // Expand the marking stack, if we have to and if we can.
1149   if (_markStack.should_expand()) {
1150     _markStack.expand();
1151   }
1152 
1153   // Statistics
1154   double now = os::elapsedTime();
1155   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1156   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1157   _remark_times.add((now - start) * 1000.0);
1158 
1159   g1p->record_concurrent_mark_remark_end();
1160 
1161   G1CMIsAliveClosure is_alive(g1h);
1162   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1163 }
1164 
1165 // Base class of the closures that finalize and verify the
1166 // liveness counting data.
1167 class CMCountDataClosureBase: public HeapRegionClosure {
1168 protected:
1169   G1CollectedHeap* _g1h;
1170   ConcurrentMark* _cm;
1171   CardTableModRefBS* _ct_bs;
1172 
1173   BitMap* _region_bm;
1174   BitMap* _card_bm;
1175 
1176   // Takes a region that's not empty (i.e., it has at least one
1177   // live object in it and sets its corresponding bit on the region
1178   // bitmap to 1.
1179   void set_bit_for_region(HeapRegion* hr) {
1180     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1181     _region_bm->par_at_put(index, true);
1182   }
1183 
1184 public:
1185   CMCountDataClosureBase(G1CollectedHeap* g1h,
1186                          BitMap* region_bm, BitMap* card_bm):
1187     _g1h(g1h), _cm(g1h->concurrent_mark()),
1188     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1189     _region_bm(region_bm), _card_bm(card_bm) { }
1190 };
1191 
1192 // Closure that calculates the # live objects per region. Used
1193 // for verification purposes during the cleanup pause.
1194 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1195   CMBitMapRO* _bm;
1196   size_t _region_marked_bytes;
1197 
1198 public:
1199   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1200                          BitMap* region_bm, BitMap* card_bm) :
1201     CMCountDataClosureBase(g1h, region_bm, card_bm),
1202     _bm(bm), _region_marked_bytes(0) { }
1203 
1204   bool doHeapRegion(HeapRegion* hr) {
1205     HeapWord* ntams = hr->next_top_at_mark_start();
1206     HeapWord* start = hr->bottom();
1207 
1208     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1209            "Preconditions not met - "
1210            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1211            p2i(start), p2i(ntams), p2i(hr->end()));
1212 
1213     // Find the first marked object at or after "start".
1214     start = _bm->getNextMarkedWordAddress(start, ntams);
1215 
1216     size_t marked_bytes = 0;
1217 
1218     while (start < ntams) {
1219       oop obj = oop(start);
1220       int obj_sz = obj->size();
1221       HeapWord* obj_end = start + obj_sz;
1222 
1223       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1224       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1225 
1226       // Note: if we're looking at the last region in heap - obj_end
1227       // could be actually just beyond the end of the heap; end_idx
1228       // will then correspond to a (non-existent) card that is also
1229       // just beyond the heap.
1230       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1231         // end of object is not card aligned - increment to cover
1232         // all the cards spanned by the object
1233         end_idx += 1;
1234       }
1235 
1236       // Set the bits in the card BM for the cards spanned by this object.
1237       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1238 
1239       // Add the size of this object to the number of marked bytes.
1240       marked_bytes += (size_t)obj_sz * HeapWordSize;
1241 
1242       // This will happen if we are handling a humongous object that spans
1243       // several heap regions.
1244       if (obj_end > hr->end()) {
1245         break;
1246       }
1247       // Find the next marked object after this one.
1248       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1249     }
1250 
1251     // Mark the allocated-since-marking portion...
1252     HeapWord* top = hr->top();
1253     if (ntams < top) {
1254       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1255       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1256 
1257       // Note: if we're looking at the last region in heap - top
1258       // could be actually just beyond the end of the heap; end_idx
1259       // will then correspond to a (non-existent) card that is also
1260       // just beyond the heap.
1261       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1262         // end of object is not card aligned - increment to cover
1263         // all the cards spanned by the object
1264         end_idx += 1;
1265       }
1266       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1267 
1268       // This definitely means the region has live objects.
1269       set_bit_for_region(hr);
1270     }
1271 
1272     // Update the live region bitmap.
1273     if (marked_bytes > 0) {
1274       set_bit_for_region(hr);
1275     }
1276 
1277     // Set the marked bytes for the current region so that
1278     // it can be queried by a calling verification routine
1279     _region_marked_bytes = marked_bytes;
1280 
1281     return false;
1282   }
1283 
1284   size_t region_marked_bytes() const { return _region_marked_bytes; }
1285 };
1286 
1287 // Heap region closure used for verifying the counting data
1288 // that was accumulated concurrently and aggregated during
1289 // the remark pause. This closure is applied to the heap
1290 // regions during the STW cleanup pause.
1291 
1292 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1293   G1CollectedHeap* _g1h;
1294   ConcurrentMark* _cm;
1295   CalcLiveObjectsClosure _calc_cl;
1296   BitMap* _region_bm;   // Region BM to be verified
1297   BitMap* _card_bm;     // Card BM to be verified
1298 
1299   BitMap* _exp_region_bm; // Expected Region BM values
1300   BitMap* _exp_card_bm;   // Expected card BM values
1301 
1302   int _failures;
1303 
1304 public:
1305   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1306                                 BitMap* region_bm,
1307                                 BitMap* card_bm,
1308                                 BitMap* exp_region_bm,
1309                                 BitMap* exp_card_bm) :
1310     _g1h(g1h), _cm(g1h->concurrent_mark()),
1311     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1312     _region_bm(region_bm), _card_bm(card_bm),
1313     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1314     _failures(0) { }
1315 
1316   int failures() const { return _failures; }
1317 
1318   bool doHeapRegion(HeapRegion* hr) {
1319     int failures = 0;
1320 
1321     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1322     // this region and set the corresponding bits in the expected region
1323     // and card bitmaps.
1324     bool res = _calc_cl.doHeapRegion(hr);
1325     assert(res == false, "should be continuing");
1326 
1327     // Verify the marked bytes for this region.
1328     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1329     size_t act_marked_bytes = hr->next_marked_bytes();
1330 
1331     if (exp_marked_bytes > act_marked_bytes) {
1332       if (hr->is_starts_humongous()) {
1333         // For start_humongous regions, the size of the whole object will be
1334         // in exp_marked_bytes.
1335         HeapRegion* region = hr;
1336         int num_regions;
1337         for (num_regions = 0; region != NULL; num_regions++) {
1338           region = _g1h->next_region_in_humongous(region);
1339         }
1340         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1341           failures += 1;
1342         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1343           failures += 1;
1344         }
1345       } else {
1346         // We're not OK if expected marked bytes > actual marked bytes. It means
1347         // we have missed accounting some objects during the actual marking.
1348         failures += 1;
1349       }
1350     }
1351 
1352     // Verify the bit, for this region, in the actual and expected
1353     // (which was just calculated) region bit maps.
1354     // We're not OK if the bit in the calculated expected region
1355     // bitmap is set and the bit in the actual region bitmap is not.
1356     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1357 
1358     bool expected = _exp_region_bm->at(index);
1359     bool actual = _region_bm->at(index);
1360     if (expected && !actual) {
1361       failures += 1;
1362     }
1363 
1364     // Verify that the card bit maps for the cards spanned by the current
1365     // region match. We have an error if we have a set bit in the expected
1366     // bit map and the corresponding bit in the actual bitmap is not set.
1367 
1368     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1369     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1370 
1371     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1372       expected = _exp_card_bm->at(i);
1373       actual = _card_bm->at(i);
1374 
1375       if (expected && !actual) {
1376         failures += 1;
1377       }
1378     }
1379 
1380     _failures += failures;
1381 
1382     // We could stop iteration over the heap when we
1383     // find the first violating region by returning true.
1384     return false;
1385   }
1386 };
1387 
1388 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1389 protected:
1390   G1CollectedHeap* _g1h;
1391   ConcurrentMark* _cm;
1392   BitMap* _actual_region_bm;
1393   BitMap* _actual_card_bm;
1394 
1395   uint    _n_workers;
1396 
1397   BitMap* _expected_region_bm;
1398   BitMap* _expected_card_bm;
1399 
1400   int  _failures;
1401 
1402   HeapRegionClaimer _hrclaimer;
1403 
1404 public:
1405   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1406                             BitMap* region_bm, BitMap* card_bm,
1407                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1408     : AbstractGangTask("G1 verify final counting"),
1409       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1410       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1411       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1412       _failures(0),
1413       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1414     assert(VerifyDuringGC, "don't call this otherwise");
1415     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1416     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1417   }
1418 
1419   void work(uint worker_id) {
1420     assert(worker_id < _n_workers, "invariant");
1421 
1422     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1423                                             _actual_region_bm, _actual_card_bm,
1424                                             _expected_region_bm,
1425                                             _expected_card_bm);
1426 
1427     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1428 
1429     Atomic::add(verify_cl.failures(), &_failures);
1430   }
1431 
1432   int failures() const { return _failures; }
1433 };
1434 
1435 // Closure that finalizes the liveness counting data.
1436 // Used during the cleanup pause.
1437 // Sets the bits corresponding to the interval [NTAMS, top]
1438 // (which contains the implicitly live objects) in the
1439 // card liveness bitmap. Also sets the bit for each region,
1440 // containing live data, in the region liveness bitmap.
1441 
1442 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1443  public:
1444   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1445                               BitMap* region_bm,
1446                               BitMap* card_bm) :
1447     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1448 
1449   bool doHeapRegion(HeapRegion* hr) {
1450     HeapWord* ntams = hr->next_top_at_mark_start();
1451     HeapWord* top   = hr->top();
1452 
1453     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1454 
1455     // Mark the allocated-since-marking portion...
1456     if (ntams < top) {
1457       // This definitely means the region has live objects.
1458       set_bit_for_region(hr);
1459 
1460       // Now set the bits in the card bitmap for [ntams, top)
1461       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1462       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1463 
1464       // Note: if we're looking at the last region in heap - top
1465       // could be actually just beyond the end of the heap; end_idx
1466       // will then correspond to a (non-existent) card that is also
1467       // just beyond the heap.
1468       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1469         // end of object is not card aligned - increment to cover
1470         // all the cards spanned by the object
1471         end_idx += 1;
1472       }
1473 
1474       assert(end_idx <= _card_bm->size(),
1475              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1476              end_idx, _card_bm->size());
1477       assert(start_idx < _card_bm->size(),
1478              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1479              start_idx, _card_bm->size());
1480 
1481       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1482     }
1483 
1484     // Set the bit for the region if it contains live data
1485     if (hr->next_marked_bytes() > 0) {
1486       set_bit_for_region(hr);
1487     }
1488 
1489     return false;
1490   }
1491 };
1492 
1493 class G1ParFinalCountTask: public AbstractGangTask {
1494 protected:
1495   G1CollectedHeap* _g1h;
1496   ConcurrentMark* _cm;
1497   BitMap* _actual_region_bm;
1498   BitMap* _actual_card_bm;
1499 
1500   uint    _n_workers;
1501   HeapRegionClaimer _hrclaimer;
1502 
1503 public:
1504   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1505     : AbstractGangTask("G1 final counting"),
1506       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1507       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1508       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1509   }
1510 
1511   void work(uint worker_id) {
1512     assert(worker_id < _n_workers, "invariant");
1513 
1514     FinalCountDataUpdateClosure final_update_cl(_g1h,
1515                                                 _actual_region_bm,
1516                                                 _actual_card_bm);
1517 
1518     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1519   }
1520 };
1521 
1522 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1523   G1CollectedHeap* _g1;
1524   size_t _freed_bytes;
1525   FreeRegionList* _local_cleanup_list;
1526   HeapRegionSetCount _old_regions_removed;
1527   HeapRegionSetCount _humongous_regions_removed;
1528   HRRSCleanupTask* _hrrs_cleanup_task;
1529 
1530 public:
1531   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1532                              FreeRegionList* local_cleanup_list,
1533                              HRRSCleanupTask* hrrs_cleanup_task) :
1534     _g1(g1),
1535     _freed_bytes(0),
1536     _local_cleanup_list(local_cleanup_list),
1537     _old_regions_removed(),
1538     _humongous_regions_removed(),
1539     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1540 
1541   size_t freed_bytes() { return _freed_bytes; }
1542   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1543   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1544 
1545   bool doHeapRegion(HeapRegion *hr) {
1546     if (hr->is_archive()) {
1547       return false;
1548     }
1549     // We use a claim value of zero here because all regions
1550     // were claimed with value 1 in the FinalCount task.
1551     _g1->reset_gc_time_stamps(hr);
1552     hr->note_end_of_marking();
1553 
1554     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1555       _freed_bytes += hr->used();
1556       hr->set_containing_set(NULL);
1557       if (hr->is_humongous()) {
1558         _humongous_regions_removed.increment(1u, hr->capacity());
1559         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1560       } else {
1561         _old_regions_removed.increment(1u, hr->capacity());
1562         _g1->free_region(hr, _local_cleanup_list, true);
1563       }
1564     } else {
1565       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1566     }
1567 
1568     return false;
1569   }
1570 };
1571 
1572 class G1ParNoteEndTask: public AbstractGangTask {
1573   friend class G1NoteEndOfConcMarkClosure;
1574 
1575 protected:
1576   G1CollectedHeap* _g1h;
1577   FreeRegionList* _cleanup_list;
1578   HeapRegionClaimer _hrclaimer;
1579 
1580 public:
1581   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1582       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1583   }
1584 
1585   void work(uint worker_id) {
1586     FreeRegionList local_cleanup_list("Local Cleanup List");
1587     HRRSCleanupTask hrrs_cleanup_task;
1588     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1589                                            &hrrs_cleanup_task);
1590     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1591     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1592 
1593     // Now update the lists
1594     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1595     {
1596       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1597       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1598 
1599       // If we iterate over the global cleanup list at the end of
1600       // cleanup to do this printing we will not guarantee to only
1601       // generate output for the newly-reclaimed regions (the list
1602       // might not be empty at the beginning of cleanup; we might
1603       // still be working on its previous contents). So we do the
1604       // printing here, before we append the new regions to the global
1605       // cleanup list.
1606 
1607       G1HRPrinter* hr_printer = _g1h->hr_printer();
1608       if (hr_printer->is_active()) {
1609         FreeRegionListIterator iter(&local_cleanup_list);
1610         while (iter.more_available()) {
1611           HeapRegion* hr = iter.get_next();
1612           hr_printer->cleanup(hr);
1613         }
1614       }
1615 
1616       _cleanup_list->add_ordered(&local_cleanup_list);
1617       assert(local_cleanup_list.is_empty(), "post-condition");
1618 
1619       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1620     }
1621   }
1622 };
1623 
1624 class G1ParScrubRemSetTask: public AbstractGangTask {
1625 protected:
1626   G1RemSet* _g1rs;
1627   BitMap* _region_bm;
1628   BitMap* _card_bm;
1629   HeapRegionClaimer _hrclaimer;
1630 
1631 public:
1632   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1633       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1634   }
1635 
1636   void work(uint worker_id) {
1637     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1638   }
1639 
1640 };
1641 
1642 void ConcurrentMark::cleanup() {
1643   // world is stopped at this checkpoint
1644   assert(SafepointSynchronize::is_at_safepoint(),
1645          "world should be stopped");
1646   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1647 
1648   // If a full collection has happened, we shouldn't do this.
1649   if (has_aborted()) {
1650     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1651     return;
1652   }
1653 
1654   g1h->verify_region_sets_optional();
1655 
1656   if (VerifyDuringGC) {
1657     HandleMark hm;  // handle scope
1658     g1h->prepare_for_verify();
1659     Universe::verify(VerifyOption_G1UsePrevMarking,
1660                      " VerifyDuringGC:(before)");
1661   }
1662   g1h->check_bitmaps("Cleanup Start");
1663 
1664   G1CollectorPolicy* g1p = g1h->g1_policy();
1665   g1p->record_concurrent_mark_cleanup_start();
1666 
1667   double start = os::elapsedTime();
1668 
1669   HeapRegionRemSet::reset_for_cleanup_tasks();
1670 
1671   // Do counting once more with the world stopped for good measure.
1672   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1673 
1674   g1h->workers()->run_task(&g1_par_count_task);
1675 
1676   if (VerifyDuringGC) {
1677     // Verify that the counting data accumulated during marking matches
1678     // that calculated by walking the marking bitmap.
1679 
1680     // Bitmaps to hold expected values
1681     BitMap expected_region_bm(_region_bm.size(), true);
1682     BitMap expected_card_bm(_card_bm.size(), true);
1683 
1684     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1685                                                  &_region_bm,
1686                                                  &_card_bm,
1687                                                  &expected_region_bm,
1688                                                  &expected_card_bm);
1689 
1690     g1h->workers()->run_task(&g1_par_verify_task);
1691 
1692     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1693   }
1694 
1695   size_t start_used_bytes = g1h->used();
1696   g1h->collector_state()->set_mark_in_progress(false);
1697 
1698   double count_end = os::elapsedTime();
1699   double this_final_counting_time = (count_end - start);
1700   _total_counting_time += this_final_counting_time;
1701 
1702   if (G1PrintRegionLivenessInfo) {
1703     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1704     _g1h->heap_region_iterate(&cl);
1705   }
1706 
1707   // Install newly created mark bitMap as "prev".
1708   swapMarkBitMaps();
1709 
1710   g1h->reset_gc_time_stamp();
1711 
1712   uint n_workers = _g1h->workers()->active_workers();
1713 
1714   // Note end of marking in all heap regions.
1715   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1716   g1h->workers()->run_task(&g1_par_note_end_task);
1717   g1h->check_gc_time_stamps();
1718 
1719   if (!cleanup_list_is_empty()) {
1720     // The cleanup list is not empty, so we'll have to process it
1721     // concurrently. Notify anyone else that might be wanting free
1722     // regions that there will be more free regions coming soon.
1723     g1h->set_free_regions_coming();
1724   }
1725 
1726   // call below, since it affects the metric by which we sort the heap
1727   // regions.
1728   if (G1ScrubRemSets) {
1729     double rs_scrub_start = os::elapsedTime();
1730     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1731     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1732 
1733     double rs_scrub_end = os::elapsedTime();
1734     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1735     _total_rs_scrub_time += this_rs_scrub_time;
1736   }
1737 
1738   // this will also free any regions totally full of garbage objects,
1739   // and sort the regions.
1740   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1741 
1742   // Statistics.
1743   double end = os::elapsedTime();
1744   _cleanup_times.add((end - start) * 1000.0);
1745 
1746   if (G1Log::fine()) {
1747     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1748   }
1749 
1750   // Clean up will have freed any regions completely full of garbage.
1751   // Update the soft reference policy with the new heap occupancy.
1752   Universe::update_heap_info_at_gc();
1753 
1754   if (VerifyDuringGC) {
1755     HandleMark hm;  // handle scope
1756     g1h->prepare_for_verify();
1757     Universe::verify(VerifyOption_G1UsePrevMarking,
1758                      " VerifyDuringGC:(after)");
1759   }
1760 
1761   g1h->check_bitmaps("Cleanup End");
1762 
1763   g1h->verify_region_sets_optional();
1764 
1765   // We need to make this be a "collection" so any collection pause that
1766   // races with it goes around and waits for completeCleanup to finish.
1767   g1h->increment_total_collections();
1768 
1769   // Clean out dead classes and update Metaspace sizes.
1770   if (ClassUnloadingWithConcurrentMark) {
1771     ClassLoaderDataGraph::purge();
1772   }
1773   MetaspaceGC::compute_new_size();
1774 
1775   // We reclaimed old regions so we should calculate the sizes to make
1776   // sure we update the old gen/space data.
1777   g1h->g1mm()->update_sizes();
1778   g1h->allocation_context_stats().update_after_mark();
1779 
1780   g1h->trace_heap_after_concurrent_cycle();
1781 }
1782 
1783 void ConcurrentMark::completeCleanup() {
1784   if (has_aborted()) return;
1785 
1786   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1787 
1788   _cleanup_list.verify_optional();
1789   FreeRegionList tmp_free_list("Tmp Free List");
1790 
1791   if (G1ConcRegionFreeingVerbose) {
1792     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1793                            "cleanup list has %u entries",
1794                            _cleanup_list.length());
1795   }
1796 
1797   // No one else should be accessing the _cleanup_list at this point,
1798   // so it is not necessary to take any locks
1799   while (!_cleanup_list.is_empty()) {
1800     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1801     assert(hr != NULL, "Got NULL from a non-empty list");
1802     hr->par_clear();
1803     tmp_free_list.add_ordered(hr);
1804 
1805     // Instead of adding one region at a time to the secondary_free_list,
1806     // we accumulate them in the local list and move them a few at a
1807     // time. This also cuts down on the number of notify_all() calls
1808     // we do during this process. We'll also append the local list when
1809     // _cleanup_list is empty (which means we just removed the last
1810     // region from the _cleanup_list).
1811     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1812         _cleanup_list.is_empty()) {
1813       if (G1ConcRegionFreeingVerbose) {
1814         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1815                                "appending %u entries to the secondary_free_list, "
1816                                "cleanup list still has %u entries",
1817                                tmp_free_list.length(),
1818                                _cleanup_list.length());
1819       }
1820 
1821       {
1822         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1823         g1h->secondary_free_list_add(&tmp_free_list);
1824         SecondaryFreeList_lock->notify_all();
1825       }
1826 #ifndef PRODUCT
1827       if (G1StressConcRegionFreeing) {
1828         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1829           os::sleep(Thread::current(), (jlong) 1, false);
1830         }
1831       }
1832 #endif
1833     }
1834   }
1835   assert(tmp_free_list.is_empty(), "post-condition");
1836 }
1837 
1838 // Supporting Object and Oop closures for reference discovery
1839 // and processing in during marking
1840 
1841 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1842   HeapWord* addr = (HeapWord*)obj;
1843   return addr != NULL &&
1844          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1845 }
1846 
1847 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1848 // Uses the CMTask associated with a worker thread (for serial reference
1849 // processing the CMTask for worker 0 is used) to preserve (mark) and
1850 // trace referent objects.
1851 //
1852 // Using the CMTask and embedded local queues avoids having the worker
1853 // threads operating on the global mark stack. This reduces the risk
1854 // of overflowing the stack - which we would rather avoid at this late
1855 // state. Also using the tasks' local queues removes the potential
1856 // of the workers interfering with each other that could occur if
1857 // operating on the global stack.
1858 
1859 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1860   ConcurrentMark* _cm;
1861   CMTask*         _task;
1862   int             _ref_counter_limit;
1863   int             _ref_counter;
1864   bool            _is_serial;
1865  public:
1866   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1867     _cm(cm), _task(task), _is_serial(is_serial),
1868     _ref_counter_limit(G1RefProcDrainInterval) {
1869     assert(_ref_counter_limit > 0, "sanity");
1870     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1871     _ref_counter = _ref_counter_limit;
1872   }
1873 
1874   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1875   virtual void do_oop(      oop* p) { do_oop_work(p); }
1876 
1877   template <class T> void do_oop_work(T* p) {
1878     if (!_cm->has_overflown()) {
1879       oop obj = oopDesc::load_decode_heap_oop(p);
1880       _task->deal_with_reference(obj);
1881       _ref_counter--;
1882 
1883       if (_ref_counter == 0) {
1884         // We have dealt with _ref_counter_limit references, pushing them
1885         // and objects reachable from them on to the local stack (and
1886         // possibly the global stack). Call CMTask::do_marking_step() to
1887         // process these entries.
1888         //
1889         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1890         // there's nothing more to do (i.e. we're done with the entries that
1891         // were pushed as a result of the CMTask::deal_with_reference() calls
1892         // above) or we overflow.
1893         //
1894         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1895         // flag while there may still be some work to do. (See the comment at
1896         // the beginning of CMTask::do_marking_step() for those conditions -
1897         // one of which is reaching the specified time target.) It is only
1898         // when CMTask::do_marking_step() returns without setting the
1899         // has_aborted() flag that the marking step has completed.
1900         do {
1901           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1902           _task->do_marking_step(mark_step_duration_ms,
1903                                  false      /* do_termination */,
1904                                  _is_serial);
1905         } while (_task->has_aborted() && !_cm->has_overflown());
1906         _ref_counter = _ref_counter_limit;
1907       }
1908     }
1909   }
1910 };
1911 
1912 // 'Drain' oop closure used by both serial and parallel reference processing.
1913 // Uses the CMTask associated with a given worker thread (for serial
1914 // reference processing the CMtask for worker 0 is used). Calls the
1915 // do_marking_step routine, with an unbelievably large timeout value,
1916 // to drain the marking data structures of the remaining entries
1917 // added by the 'keep alive' oop closure above.
1918 
1919 class G1CMDrainMarkingStackClosure: public VoidClosure {
1920   ConcurrentMark* _cm;
1921   CMTask*         _task;
1922   bool            _is_serial;
1923  public:
1924   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1925     _cm(cm), _task(task), _is_serial(is_serial) {
1926     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1927   }
1928 
1929   void do_void() {
1930     do {
1931       // We call CMTask::do_marking_step() to completely drain the local
1932       // and global marking stacks of entries pushed by the 'keep alive'
1933       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1934       //
1935       // CMTask::do_marking_step() is called in a loop, which we'll exit
1936       // if there's nothing more to do (i.e. we've completely drained the
1937       // entries that were pushed as a a result of applying the 'keep alive'
1938       // closure to the entries on the discovered ref lists) or we overflow
1939       // the global marking stack.
1940       //
1941       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1942       // flag while there may still be some work to do. (See the comment at
1943       // the beginning of CMTask::do_marking_step() for those conditions -
1944       // one of which is reaching the specified time target.) It is only
1945       // when CMTask::do_marking_step() returns without setting the
1946       // has_aborted() flag that the marking step has completed.
1947 
1948       _task->do_marking_step(1000000000.0 /* something very large */,
1949                              true         /* do_termination */,
1950                              _is_serial);
1951     } while (_task->has_aborted() && !_cm->has_overflown());
1952   }
1953 };
1954 
1955 // Implementation of AbstractRefProcTaskExecutor for parallel
1956 // reference processing at the end of G1 concurrent marking
1957 
1958 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1959 private:
1960   G1CollectedHeap* _g1h;
1961   ConcurrentMark*  _cm;
1962   WorkGang*        _workers;
1963   uint             _active_workers;
1964 
1965 public:
1966   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1967                           ConcurrentMark* cm,
1968                           WorkGang* workers,
1969                           uint n_workers) :
1970     _g1h(g1h), _cm(cm),
1971     _workers(workers), _active_workers(n_workers) { }
1972 
1973   // Executes the given task using concurrent marking worker threads.
1974   virtual void execute(ProcessTask& task);
1975   virtual void execute(EnqueueTask& task);
1976 };
1977 
1978 class G1CMRefProcTaskProxy: public AbstractGangTask {
1979   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1980   ProcessTask&     _proc_task;
1981   G1CollectedHeap* _g1h;
1982   ConcurrentMark*  _cm;
1983 
1984 public:
1985   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1986                      G1CollectedHeap* g1h,
1987                      ConcurrentMark* cm) :
1988     AbstractGangTask("Process reference objects in parallel"),
1989     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1990     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1991     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1992   }
1993 
1994   virtual void work(uint worker_id) {
1995     ResourceMark rm;
1996     HandleMark hm;
1997     CMTask* task = _cm->task(worker_id);
1998     G1CMIsAliveClosure g1_is_alive(_g1h);
1999     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2000     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2001 
2002     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2003   }
2004 };
2005 
2006 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2007   assert(_workers != NULL, "Need parallel worker threads.");
2008   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2009 
2010   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2011 
2012   // We need to reset the concurrency level before each
2013   // proxy task execution, so that the termination protocol
2014   // and overflow handling in CMTask::do_marking_step() knows
2015   // how many workers to wait for.
2016   _cm->set_concurrency(_active_workers);
2017   _workers->run_task(&proc_task_proxy);
2018 }
2019 
2020 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2021   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2022   EnqueueTask& _enq_task;
2023 
2024 public:
2025   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2026     AbstractGangTask("Enqueue reference objects in parallel"),
2027     _enq_task(enq_task) { }
2028 
2029   virtual void work(uint worker_id) {
2030     _enq_task.work(worker_id);
2031   }
2032 };
2033 
2034 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2035   assert(_workers != NULL, "Need parallel worker threads.");
2036   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2037 
2038   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2039 
2040   // Not strictly necessary but...
2041   //
2042   // We need to reset the concurrency level before each
2043   // proxy task execution, so that the termination protocol
2044   // and overflow handling in CMTask::do_marking_step() knows
2045   // how many workers to wait for.
2046   _cm->set_concurrency(_active_workers);
2047   _workers->run_task(&enq_task_proxy);
2048 }
2049 
2050 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2051   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2052 }
2053 
2054 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2055   if (has_overflown()) {
2056     // Skip processing the discovered references if we have
2057     // overflown the global marking stack. Reference objects
2058     // only get discovered once so it is OK to not
2059     // de-populate the discovered reference lists. We could have,
2060     // but the only benefit would be that, when marking restarts,
2061     // less reference objects are discovered.
2062     return;
2063   }
2064 
2065   ResourceMark rm;
2066   HandleMark   hm;
2067 
2068   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2069 
2070   // Is alive closure.
2071   G1CMIsAliveClosure g1_is_alive(g1h);
2072 
2073   // Inner scope to exclude the cleaning of the string and symbol
2074   // tables from the displayed time.
2075   {
2076     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2077 
2078     ReferenceProcessor* rp = g1h->ref_processor_cm();
2079 
2080     // See the comment in G1CollectedHeap::ref_processing_init()
2081     // about how reference processing currently works in G1.
2082 
2083     // Set the soft reference policy
2084     rp->setup_policy(clear_all_soft_refs);
2085     assert(_markStack.isEmpty(), "mark stack should be empty");
2086 
2087     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2088     // in serial reference processing. Note these closures are also
2089     // used for serially processing (by the the current thread) the
2090     // JNI references during parallel reference processing.
2091     //
2092     // These closures do not need to synchronize with the worker
2093     // threads involved in parallel reference processing as these
2094     // instances are executed serially by the current thread (e.g.
2095     // reference processing is not multi-threaded and is thus
2096     // performed by the current thread instead of a gang worker).
2097     //
2098     // The gang tasks involved in parallel reference processing create
2099     // their own instances of these closures, which do their own
2100     // synchronization among themselves.
2101     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2102     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2103 
2104     // We need at least one active thread. If reference processing
2105     // is not multi-threaded we use the current (VMThread) thread,
2106     // otherwise we use the work gang from the G1CollectedHeap and
2107     // we utilize all the worker threads we can.
2108     bool processing_is_mt = rp->processing_is_mt();
2109     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2110     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2111 
2112     // Parallel processing task executor.
2113     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2114                                               g1h->workers(), active_workers);
2115     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2116 
2117     // Set the concurrency level. The phase was already set prior to
2118     // executing the remark task.
2119     set_concurrency(active_workers);
2120 
2121     // Set the degree of MT processing here.  If the discovery was done MT,
2122     // the number of threads involved during discovery could differ from
2123     // the number of active workers.  This is OK as long as the discovered
2124     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2125     rp->set_active_mt_degree(active_workers);
2126 
2127     // Process the weak references.
2128     const ReferenceProcessorStats& stats =
2129         rp->process_discovered_references(&g1_is_alive,
2130                                           &g1_keep_alive,
2131                                           &g1_drain_mark_stack,
2132                                           executor,
2133                                           g1h->gc_timer_cm());
2134     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2135 
2136     // The do_oop work routines of the keep_alive and drain_marking_stack
2137     // oop closures will set the has_overflown flag if we overflow the
2138     // global marking stack.
2139 
2140     assert(_markStack.overflow() || _markStack.isEmpty(),
2141             "mark stack should be empty (unless it overflowed)");
2142 
2143     if (_markStack.overflow()) {
2144       // This should have been done already when we tried to push an
2145       // entry on to the global mark stack. But let's do it again.
2146       set_has_overflown();
2147     }
2148 
2149     assert(rp->num_q() == active_workers, "why not");
2150 
2151     rp->enqueue_discovered_references(executor);
2152 
2153     rp->verify_no_references_recorded();
2154     assert(!rp->discovery_enabled(), "Post condition");
2155   }
2156 
2157   if (has_overflown()) {
2158     // We can not trust g1_is_alive if the marking stack overflowed
2159     return;
2160   }
2161 
2162   assert(_markStack.isEmpty(), "Marking should have completed");
2163 
2164   // Unload Klasses, String, Symbols, Code Cache, etc.
2165   {
2166     G1CMTraceTime trace("Unloading", G1Log::finer());
2167 
2168     if (ClassUnloadingWithConcurrentMark) {
2169       bool purged_classes;
2170 
2171       {
2172         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2173         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2174       }
2175 
2176       {
2177         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2178         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2179       }
2180     }
2181 
2182     if (G1StringDedup::is_enabled()) {
2183       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2184       G1StringDedup::unlink(&g1_is_alive);
2185     }
2186   }
2187 }
2188 
2189 void ConcurrentMark::swapMarkBitMaps() {
2190   CMBitMapRO* temp = _prevMarkBitMap;
2191   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2192   _nextMarkBitMap  = (CMBitMap*)  temp;
2193 }
2194 
2195 // Closure for marking entries in SATB buffers.
2196 class CMSATBBufferClosure : public SATBBufferClosure {
2197 private:
2198   CMTask* _task;
2199   G1CollectedHeap* _g1h;
2200 
2201   // This is very similar to CMTask::deal_with_reference, but with
2202   // more relaxed requirements for the argument, so this must be more
2203   // circumspect about treating the argument as an object.
2204   void do_entry(void* entry) const {
2205     _task->increment_refs_reached();
2206     HeapRegion* hr = _g1h->heap_region_containing(entry);
2207     if (entry < hr->next_top_at_mark_start()) {
2208       // Until we get here, we don't know whether entry refers to a valid
2209       // object; it could instead have been a stale reference.
2210       oop obj = static_cast<oop>(entry);
2211       assert(obj->is_oop(true /* ignore mark word */),
2212              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2213       _task->make_reference_grey(obj, hr);
2214     }
2215   }
2216 
2217 public:
2218   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2219     : _task(task), _g1h(g1h) { }
2220 
2221   virtual void do_buffer(void** buffer, size_t size) {
2222     for (size_t i = 0; i < size; ++i) {
2223       do_entry(buffer[i]);
2224     }
2225   }
2226 };
2227 
2228 class G1RemarkThreadsClosure : public ThreadClosure {
2229   CMSATBBufferClosure _cm_satb_cl;
2230   G1CMOopClosure _cm_cl;
2231   MarkingCodeBlobClosure _code_cl;
2232   int _thread_parity;
2233 
2234  public:
2235   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2236     _cm_satb_cl(task, g1h),
2237     _cm_cl(g1h, g1h->concurrent_mark(), task),
2238     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2239     _thread_parity(Threads::thread_claim_parity()) {}
2240 
2241   void do_thread(Thread* thread) {
2242     if (thread->is_Java_thread()) {
2243       if (thread->claim_oops_do(true, _thread_parity)) {
2244         JavaThread* jt = (JavaThread*)thread;
2245 
2246         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2247         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2248         // * Alive if on the stack of an executing method
2249         // * Weakly reachable otherwise
2250         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2251         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2252         jt->nmethods_do(&_code_cl);
2253 
2254         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2255       }
2256     } else if (thread->is_VM_thread()) {
2257       if (thread->claim_oops_do(true, _thread_parity)) {
2258         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2259       }
2260     }
2261   }
2262 };
2263 
2264 class CMRemarkTask: public AbstractGangTask {
2265 private:
2266   ConcurrentMark* _cm;
2267 public:
2268   void work(uint worker_id) {
2269     // Since all available tasks are actually started, we should
2270     // only proceed if we're supposed to be active.
2271     if (worker_id < _cm->active_tasks()) {
2272       CMTask* task = _cm->task(worker_id);
2273       task->record_start_time();
2274       {
2275         ResourceMark rm;
2276         HandleMark hm;
2277 
2278         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2279         Threads::threads_do(&threads_f);
2280       }
2281 
2282       do {
2283         task->do_marking_step(1000000000.0 /* something very large */,
2284                               true         /* do_termination       */,
2285                               false        /* is_serial            */);
2286       } while (task->has_aborted() && !_cm->has_overflown());
2287       // If we overflow, then we do not want to restart. We instead
2288       // want to abort remark and do concurrent marking again.
2289       task->record_end_time();
2290     }
2291   }
2292 
2293   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2294     AbstractGangTask("Par Remark"), _cm(cm) {
2295     _cm->terminator()->reset_for_reuse(active_workers);
2296   }
2297 };
2298 
2299 void ConcurrentMark::checkpointRootsFinalWork() {
2300   ResourceMark rm;
2301   HandleMark   hm;
2302   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2303 
2304   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2305 
2306   g1h->ensure_parsability(false);
2307 
2308   // this is remark, so we'll use up all active threads
2309   uint active_workers = g1h->workers()->active_workers();
2310   set_concurrency_and_phase(active_workers, false /* concurrent */);
2311   // Leave _parallel_marking_threads at it's
2312   // value originally calculated in the ConcurrentMark
2313   // constructor and pass values of the active workers
2314   // through the gang in the task.
2315 
2316   {
2317     StrongRootsScope srs(active_workers);
2318 
2319     CMRemarkTask remarkTask(this, active_workers);
2320     // We will start all available threads, even if we decide that the
2321     // active_workers will be fewer. The extra ones will just bail out
2322     // immediately.
2323     g1h->workers()->run_task(&remarkTask);
2324   }
2325 
2326   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2327   guarantee(has_overflown() ||
2328             satb_mq_set.completed_buffers_num() == 0,
2329             "Invariant: has_overflown = %s, num buffers = %d",
2330             BOOL_TO_STR(has_overflown()),
2331             satb_mq_set.completed_buffers_num());
2332 
2333   print_stats();
2334 }
2335 
2336 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2337   // Note we are overriding the read-only view of the prev map here, via
2338   // the cast.
2339   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2340 }
2341 
2342 HeapRegion*
2343 ConcurrentMark::claim_region(uint worker_id) {
2344   // "checkpoint" the finger
2345   HeapWord* finger = _finger;
2346 
2347   // _heap_end will not change underneath our feet; it only changes at
2348   // yield points.
2349   while (finger < _heap_end) {
2350     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2351 
2352     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2353 
2354     // Above heap_region_containing may return NULL as we always scan claim
2355     // until the end of the heap. In this case, just jump to the next region.
2356     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2357 
2358     // Is the gap between reading the finger and doing the CAS too long?
2359     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2360     if (res == finger && curr_region != NULL) {
2361       // we succeeded
2362       HeapWord*   bottom        = curr_region->bottom();
2363       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2364 
2365       // notice that _finger == end cannot be guaranteed here since,
2366       // someone else might have moved the finger even further
2367       assert(_finger >= end, "the finger should have moved forward");
2368 
2369       if (limit > bottom) {
2370         return curr_region;
2371       } else {
2372         assert(limit == bottom,
2373                "the region limit should be at bottom");
2374         // we return NULL and the caller should try calling
2375         // claim_region() again.
2376         return NULL;
2377       }
2378     } else {
2379       assert(_finger > finger, "the finger should have moved forward");
2380       // read it again
2381       finger = _finger;
2382     }
2383   }
2384 
2385   return NULL;
2386 }
2387 
2388 #ifndef PRODUCT
2389 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2390 private:
2391   G1CollectedHeap* _g1h;
2392   const char* _phase;
2393   int _info;
2394 
2395 public:
2396   VerifyNoCSetOops(const char* phase, int info = -1) :
2397     _g1h(G1CollectedHeap::heap()),
2398     _phase(phase),
2399     _info(info)
2400   { }
2401 
2402   void operator()(oop obj) const {
2403     guarantee(obj->is_oop(),
2404               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2405               p2i(obj), _phase, _info);
2406     guarantee(!_g1h->obj_in_cs(obj),
2407               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2408               p2i(obj), _phase, _info);
2409   }
2410 };
2411 
2412 void ConcurrentMark::verify_no_cset_oops() {
2413   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2414   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2415     return;
2416   }
2417 
2418   // Verify entries on the global mark stack
2419   _markStack.iterate(VerifyNoCSetOops("Stack"));
2420 
2421   // Verify entries on the task queues
2422   for (uint i = 0; i < _max_worker_id; ++i) {
2423     CMTaskQueue* queue = _task_queues->queue(i);
2424     queue->iterate(VerifyNoCSetOops("Queue", i));
2425   }
2426 
2427   // Verify the global finger
2428   HeapWord* global_finger = finger();
2429   if (global_finger != NULL && global_finger < _heap_end) {
2430     // Since we always iterate over all regions, we might get a NULL HeapRegion
2431     // here.
2432     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2433     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2434               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2435               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2436   }
2437 
2438   // Verify the task fingers
2439   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2440   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2441     CMTask* task = _tasks[i];
2442     HeapWord* task_finger = task->finger();
2443     if (task_finger != NULL && task_finger < _heap_end) {
2444       // See above note on the global finger verification.
2445       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2446       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2447                 !task_hr->in_collection_set(),
2448                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2449                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2450     }
2451   }
2452 }
2453 #endif // PRODUCT
2454 
2455 // Aggregate the counting data that was constructed concurrently
2456 // with marking.
2457 class AggregateCountDataHRClosure: public HeapRegionClosure {
2458   G1CollectedHeap* _g1h;
2459   ConcurrentMark* _cm;
2460   CardTableModRefBS* _ct_bs;
2461   BitMap* _cm_card_bm;
2462   uint _max_worker_id;
2463 
2464  public:
2465   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2466                               BitMap* cm_card_bm,
2467                               uint max_worker_id) :
2468     _g1h(g1h), _cm(g1h->concurrent_mark()),
2469     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2470     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2471 
2472   bool doHeapRegion(HeapRegion* hr) {
2473     HeapWord* start = hr->bottom();
2474     HeapWord* limit = hr->next_top_at_mark_start();
2475     HeapWord* end = hr->end();
2476 
2477     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2478            "Preconditions not met - "
2479            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2480            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2481            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2482 
2483     assert(hr->next_marked_bytes() == 0, "Precondition");
2484 
2485     if (start == limit) {
2486       // NTAMS of this region has not been set so nothing to do.
2487       return false;
2488     }
2489 
2490     // 'start' should be in the heap.
2491     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2492     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2493     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2494 
2495     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2496     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2497     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2498 
2499     // If ntams is not card aligned then we bump card bitmap index
2500     // for limit so that we get the all the cards spanned by
2501     // the object ending at ntams.
2502     // Note: if this is the last region in the heap then ntams
2503     // could be actually just beyond the end of the the heap;
2504     // limit_idx will then  correspond to a (non-existent) card
2505     // that is also outside the heap.
2506     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2507       limit_idx += 1;
2508     }
2509 
2510     assert(limit_idx <= end_idx, "or else use atomics");
2511 
2512     // Aggregate the "stripe" in the count data associated with hr.
2513     uint hrm_index = hr->hrm_index();
2514     size_t marked_bytes = 0;
2515 
2516     for (uint i = 0; i < _max_worker_id; i += 1) {
2517       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2518       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2519 
2520       // Fetch the marked_bytes in this region for task i and
2521       // add it to the running total for this region.
2522       marked_bytes += marked_bytes_array[hrm_index];
2523 
2524       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2525       // into the global card bitmap.
2526       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2527 
2528       while (scan_idx < limit_idx) {
2529         assert(task_card_bm->at(scan_idx) == true, "should be");
2530         _cm_card_bm->set_bit(scan_idx);
2531         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2532 
2533         // BitMap::get_next_one_offset() can handle the case when
2534         // its left_offset parameter is greater than its right_offset
2535         // parameter. It does, however, have an early exit if
2536         // left_offset == right_offset. So let's limit the value
2537         // passed in for left offset here.
2538         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2539         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2540       }
2541     }
2542 
2543     // Update the marked bytes for this region.
2544     hr->add_to_marked_bytes(marked_bytes);
2545 
2546     // Next heap region
2547     return false;
2548   }
2549 };
2550 
2551 class G1AggregateCountDataTask: public AbstractGangTask {
2552 protected:
2553   G1CollectedHeap* _g1h;
2554   ConcurrentMark* _cm;
2555   BitMap* _cm_card_bm;
2556   uint _max_worker_id;
2557   uint _active_workers;
2558   HeapRegionClaimer _hrclaimer;
2559 
2560 public:
2561   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2562                            ConcurrentMark* cm,
2563                            BitMap* cm_card_bm,
2564                            uint max_worker_id,
2565                            uint n_workers) :
2566       AbstractGangTask("Count Aggregation"),
2567       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2568       _max_worker_id(max_worker_id),
2569       _active_workers(n_workers),
2570       _hrclaimer(_active_workers) {
2571   }
2572 
2573   void work(uint worker_id) {
2574     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2575 
2576     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2577   }
2578 };
2579 
2580 
2581 void ConcurrentMark::aggregate_count_data() {
2582   uint n_workers = _g1h->workers()->active_workers();
2583 
2584   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2585                                            _max_worker_id, n_workers);
2586 
2587   _g1h->workers()->run_task(&g1_par_agg_task);
2588 }
2589 
2590 // Clear the per-worker arrays used to store the per-region counting data
2591 void ConcurrentMark::clear_all_count_data() {
2592   // Clear the global card bitmap - it will be filled during
2593   // liveness count aggregation (during remark) and the
2594   // final counting task.
2595   _card_bm.clear();
2596 
2597   // Clear the global region bitmap - it will be filled as part
2598   // of the final counting task.
2599   _region_bm.clear();
2600 
2601   uint max_regions = _g1h->max_regions();
2602   assert(_max_worker_id > 0, "uninitialized");
2603 
2604   for (uint i = 0; i < _max_worker_id; i += 1) {
2605     BitMap* task_card_bm = count_card_bitmap_for(i);
2606     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2607 
2608     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2609     assert(marked_bytes_array != NULL, "uninitialized");
2610 
2611     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2612     task_card_bm->clear();
2613   }
2614 }
2615 
2616 void ConcurrentMark::print_stats() {
2617   if (G1MarkingVerboseLevel > 0) {
2618     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2619     for (size_t i = 0; i < _active_tasks; ++i) {
2620       _tasks[i]->print_stats();
2621       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2622     }
2623   }
2624 }
2625 
2626 // abandon current marking iteration due to a Full GC
2627 void ConcurrentMark::abort() {
2628   if (!cmThread()->during_cycle() || _has_aborted) {
2629     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2630     return;
2631   }
2632 
2633   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2634   // concurrent bitmap clearing.
2635   _nextMarkBitMap->clearAll();
2636 
2637   // Note we cannot clear the previous marking bitmap here
2638   // since VerifyDuringGC verifies the objects marked during
2639   // a full GC against the previous bitmap.
2640 
2641   // Clear the liveness counting data
2642   clear_all_count_data();
2643   // Empty mark stack
2644   reset_marking_state();
2645   for (uint i = 0; i < _max_worker_id; ++i) {
2646     _tasks[i]->clear_region_fields();
2647   }
2648   _first_overflow_barrier_sync.abort();
2649   _second_overflow_barrier_sync.abort();
2650   _has_aborted = true;
2651 
2652   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2653   satb_mq_set.abandon_partial_marking();
2654   // This can be called either during or outside marking, we'll read
2655   // the expected_active value from the SATB queue set.
2656   satb_mq_set.set_active_all_threads(
2657                                  false, /* new active value */
2658                                  satb_mq_set.is_active() /* expected_active */);
2659 
2660   _g1h->trace_heap_after_concurrent_cycle();
2661   _g1h->register_concurrent_cycle_end();
2662 }
2663 
2664 static void print_ms_time_info(const char* prefix, const char* name,
2665                                NumberSeq& ns) {
2666   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2667                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2668   if (ns.num() > 0) {
2669     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2670                            prefix, ns.sd(), ns.maximum());
2671   }
2672 }
2673 
2674 void ConcurrentMark::print_summary_info() {
2675   gclog_or_tty->print_cr(" Concurrent marking:");
2676   print_ms_time_info("  ", "init marks", _init_times);
2677   print_ms_time_info("  ", "remarks", _remark_times);
2678   {
2679     print_ms_time_info("     ", "final marks", _remark_mark_times);
2680     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2681 
2682   }
2683   print_ms_time_info("  ", "cleanups", _cleanup_times);
2684   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2685                          _total_counting_time,
2686                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2687                           (double)_cleanup_times.num()
2688                          : 0.0));
2689   if (G1ScrubRemSets) {
2690     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2691                            _total_rs_scrub_time,
2692                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2693                             (double)_cleanup_times.num()
2694                            : 0.0));
2695   }
2696   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2697                          (_init_times.sum() + _remark_times.sum() +
2698                           _cleanup_times.sum())/1000.0);
2699   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2700                 "(%8.2f s marking).",
2701                 cmThread()->vtime_accum(),
2702                 cmThread()->vtime_mark_accum());
2703 }
2704 
2705 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2706   _parallel_workers->print_worker_threads_on(st);
2707 }
2708 
2709 void ConcurrentMark::print_on_error(outputStream* st) const {
2710   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2711       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2712   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2713   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2714 }
2715 
2716 // We take a break if someone is trying to stop the world.
2717 bool ConcurrentMark::do_yield_check(uint worker_id) {
2718   if (SuspendibleThreadSet::should_yield()) {
2719     if (worker_id == 0) {
2720       _g1h->g1_policy()->record_concurrent_pause();
2721     }
2722     SuspendibleThreadSet::yield();
2723     return true;
2724   } else {
2725     return false;
2726   }
2727 }
2728 
2729 // Closure for iteration over bitmaps
2730 class CMBitMapClosure : public BitMapClosure {
2731 private:
2732   // the bitmap that is being iterated over
2733   CMBitMap*                   _nextMarkBitMap;
2734   ConcurrentMark*             _cm;
2735   CMTask*                     _task;
2736 
2737 public:
2738   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2739     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2740 
2741   bool do_bit(size_t offset) {
2742     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2743     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2744     assert( addr < _cm->finger(), "invariant");
2745     assert(addr >= _task->finger(), "invariant");
2746 
2747     // We move that task's local finger along.
2748     _task->move_finger_to(addr);
2749 
2750     _task->scan_object(oop(addr));
2751     // we only partially drain the local queue and global stack
2752     _task->drain_local_queue(true);
2753     _task->drain_global_stack(true);
2754 
2755     // if the has_aborted flag has been raised, we need to bail out of
2756     // the iteration
2757     return !_task->has_aborted();
2758   }
2759 };
2760 
2761 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2762   ReferenceProcessor* result = NULL;
2763   if (G1UseConcMarkReferenceProcessing) {
2764     result = g1h->ref_processor_cm();
2765     assert(result != NULL, "should not be NULL");
2766   }
2767   return result;
2768 }
2769 
2770 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2771                                ConcurrentMark* cm,
2772                                CMTask* task)
2773   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2774     _g1h(g1h), _cm(cm), _task(task)
2775 { }
2776 
2777 void CMTask::setup_for_region(HeapRegion* hr) {
2778   assert(hr != NULL,
2779         "claim_region() should have filtered out NULL regions");
2780   _curr_region  = hr;
2781   _finger       = hr->bottom();
2782   update_region_limit();
2783 }
2784 
2785 void CMTask::update_region_limit() {
2786   HeapRegion* hr            = _curr_region;
2787   HeapWord* bottom          = hr->bottom();
2788   HeapWord* limit           = hr->next_top_at_mark_start();
2789 
2790   if (limit == bottom) {
2791     // The region was collected underneath our feet.
2792     // We set the finger to bottom to ensure that the bitmap
2793     // iteration that will follow this will not do anything.
2794     // (this is not a condition that holds when we set the region up,
2795     // as the region is not supposed to be empty in the first place)
2796     _finger = bottom;
2797   } else if (limit >= _region_limit) {
2798     assert(limit >= _finger, "peace of mind");
2799   } else {
2800     assert(limit < _region_limit, "only way to get here");
2801     // This can happen under some pretty unusual circumstances.  An
2802     // evacuation pause empties the region underneath our feet (NTAMS
2803     // at bottom). We then do some allocation in the region (NTAMS
2804     // stays at bottom), followed by the region being used as a GC
2805     // alloc region (NTAMS will move to top() and the objects
2806     // originally below it will be grayed). All objects now marked in
2807     // the region are explicitly grayed, if below the global finger,
2808     // and we do not need in fact to scan anything else. So, we simply
2809     // set _finger to be limit to ensure that the bitmap iteration
2810     // doesn't do anything.
2811     _finger = limit;
2812   }
2813 
2814   _region_limit = limit;
2815 }
2816 
2817 void CMTask::giveup_current_region() {
2818   assert(_curr_region != NULL, "invariant");
2819   clear_region_fields();
2820 }
2821 
2822 void CMTask::clear_region_fields() {
2823   // Values for these three fields that indicate that we're not
2824   // holding on to a region.
2825   _curr_region   = NULL;
2826   _finger        = NULL;
2827   _region_limit  = NULL;
2828 }
2829 
2830 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2831   if (cm_oop_closure == NULL) {
2832     assert(_cm_oop_closure != NULL, "invariant");
2833   } else {
2834     assert(_cm_oop_closure == NULL, "invariant");
2835   }
2836   _cm_oop_closure = cm_oop_closure;
2837 }
2838 
2839 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2840   guarantee(nextMarkBitMap != NULL, "invariant");
2841   _nextMarkBitMap                = nextMarkBitMap;
2842   clear_region_fields();
2843 
2844   _calls                         = 0;
2845   _elapsed_time_ms               = 0.0;
2846   _termination_time_ms           = 0.0;
2847   _termination_start_time_ms     = 0.0;
2848 }
2849 
2850 bool CMTask::should_exit_termination() {
2851   regular_clock_call();
2852   // This is called when we are in the termination protocol. We should
2853   // quit if, for some reason, this task wants to abort or the global
2854   // stack is not empty (this means that we can get work from it).
2855   return !_cm->mark_stack_empty() || has_aborted();
2856 }
2857 
2858 void CMTask::reached_limit() {
2859   assert(_words_scanned >= _words_scanned_limit ||
2860          _refs_reached >= _refs_reached_limit ,
2861          "shouldn't have been called otherwise");
2862   regular_clock_call();
2863 }
2864 
2865 void CMTask::regular_clock_call() {
2866   if (has_aborted()) return;
2867 
2868   // First, we need to recalculate the words scanned and refs reached
2869   // limits for the next clock call.
2870   recalculate_limits();
2871 
2872   // During the regular clock call we do the following
2873 
2874   // (1) If an overflow has been flagged, then we abort.
2875   if (_cm->has_overflown()) {
2876     set_has_aborted();
2877     return;
2878   }
2879 
2880   // If we are not concurrent (i.e. we're doing remark) we don't need
2881   // to check anything else. The other steps are only needed during
2882   // the concurrent marking phase.
2883   if (!concurrent()) return;
2884 
2885   // (2) If marking has been aborted for Full GC, then we also abort.
2886   if (_cm->has_aborted()) {
2887     set_has_aborted();
2888     return;
2889   }
2890 
2891   double curr_time_ms = os::elapsedVTime() * 1000.0;
2892 
2893   // (4) We check whether we should yield. If we have to, then we abort.
2894   if (SuspendibleThreadSet::should_yield()) {
2895     // We should yield. To do this we abort the task. The caller is
2896     // responsible for yielding.
2897     set_has_aborted();
2898     return;
2899   }
2900 
2901   // (5) We check whether we've reached our time quota. If we have,
2902   // then we abort.
2903   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2904   if (elapsed_time_ms > _time_target_ms) {
2905     set_has_aborted();
2906     _has_timed_out = true;
2907     return;
2908   }
2909 
2910   // (6) Finally, we check whether there are enough completed STAB
2911   // buffers available for processing. If there are, we abort.
2912   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2913   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2914     // we do need to process SATB buffers, we'll abort and restart
2915     // the marking task to do so
2916     set_has_aborted();
2917     return;
2918   }
2919 }
2920 
2921 void CMTask::recalculate_limits() {
2922   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2923   _words_scanned_limit      = _real_words_scanned_limit;
2924 
2925   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2926   _refs_reached_limit       = _real_refs_reached_limit;
2927 }
2928 
2929 void CMTask::decrease_limits() {
2930   // This is called when we believe that we're going to do an infrequent
2931   // operation which will increase the per byte scanned cost (i.e. move
2932   // entries to/from the global stack). It basically tries to decrease the
2933   // scanning limit so that the clock is called earlier.
2934 
2935   _words_scanned_limit = _real_words_scanned_limit -
2936     3 * words_scanned_period / 4;
2937   _refs_reached_limit  = _real_refs_reached_limit -
2938     3 * refs_reached_period / 4;
2939 }
2940 
2941 void CMTask::move_entries_to_global_stack() {
2942   // local array where we'll store the entries that will be popped
2943   // from the local queue
2944   oop buffer[global_stack_transfer_size];
2945 
2946   int n = 0;
2947   oop obj;
2948   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2949     buffer[n] = obj;
2950     ++n;
2951   }
2952 
2953   if (n > 0) {
2954     // we popped at least one entry from the local queue
2955 
2956     if (!_cm->mark_stack_push(buffer, n)) {
2957       set_has_aborted();
2958     }
2959   }
2960 
2961   // this operation was quite expensive, so decrease the limits
2962   decrease_limits();
2963 }
2964 
2965 void CMTask::get_entries_from_global_stack() {
2966   // local array where we'll store the entries that will be popped
2967   // from the global stack.
2968   oop buffer[global_stack_transfer_size];
2969   int n;
2970   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2971   assert(n <= global_stack_transfer_size,
2972          "we should not pop more than the given limit");
2973   if (n > 0) {
2974     // yes, we did actually pop at least one entry
2975     for (int i = 0; i < n; ++i) {
2976       bool success = _task_queue->push(buffer[i]);
2977       // We only call this when the local queue is empty or under a
2978       // given target limit. So, we do not expect this push to fail.
2979       assert(success, "invariant");
2980     }
2981   }
2982 
2983   // this operation was quite expensive, so decrease the limits
2984   decrease_limits();
2985 }
2986 
2987 void CMTask::drain_local_queue(bool partially) {
2988   if (has_aborted()) return;
2989 
2990   // Decide what the target size is, depending whether we're going to
2991   // drain it partially (so that other tasks can steal if they run out
2992   // of things to do) or totally (at the very end).
2993   size_t target_size;
2994   if (partially) {
2995     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2996   } else {
2997     target_size = 0;
2998   }
2999 
3000   if (_task_queue->size() > target_size) {
3001     oop obj;
3002     bool ret = _task_queue->pop_local(obj);
3003     while (ret) {
3004       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3005       assert(!_g1h->is_on_master_free_list(
3006                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3007 
3008       scan_object(obj);
3009 
3010       if (_task_queue->size() <= target_size || has_aborted()) {
3011         ret = false;
3012       } else {
3013         ret = _task_queue->pop_local(obj);
3014       }
3015     }
3016   }
3017 }
3018 
3019 void CMTask::drain_global_stack(bool partially) {
3020   if (has_aborted()) return;
3021 
3022   // We have a policy to drain the local queue before we attempt to
3023   // drain the global stack.
3024   assert(partially || _task_queue->size() == 0, "invariant");
3025 
3026   // Decide what the target size is, depending whether we're going to
3027   // drain it partially (so that other tasks can steal if they run out
3028   // of things to do) or totally (at the very end).  Notice that,
3029   // because we move entries from the global stack in chunks or
3030   // because another task might be doing the same, we might in fact
3031   // drop below the target. But, this is not a problem.
3032   size_t target_size;
3033   if (partially) {
3034     target_size = _cm->partial_mark_stack_size_target();
3035   } else {
3036     target_size = 0;
3037   }
3038 
3039   if (_cm->mark_stack_size() > target_size) {
3040     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3041       get_entries_from_global_stack();
3042       drain_local_queue(partially);
3043     }
3044   }
3045 }
3046 
3047 // SATB Queue has several assumptions on whether to call the par or
3048 // non-par versions of the methods. this is why some of the code is
3049 // replicated. We should really get rid of the single-threaded version
3050 // of the code to simplify things.
3051 void CMTask::drain_satb_buffers() {
3052   if (has_aborted()) return;
3053 
3054   // We set this so that the regular clock knows that we're in the
3055   // middle of draining buffers and doesn't set the abort flag when it
3056   // notices that SATB buffers are available for draining. It'd be
3057   // very counter productive if it did that. :-)
3058   _draining_satb_buffers = true;
3059 
3060   CMSATBBufferClosure satb_cl(this, _g1h);
3061   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3062 
3063   // This keeps claiming and applying the closure to completed buffers
3064   // until we run out of buffers or we need to abort.
3065   while (!has_aborted() &&
3066          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3067     regular_clock_call();
3068   }
3069 
3070   _draining_satb_buffers = false;
3071 
3072   assert(has_aborted() ||
3073          concurrent() ||
3074          satb_mq_set.completed_buffers_num() == 0, "invariant");
3075 
3076   // again, this was a potentially expensive operation, decrease the
3077   // limits to get the regular clock call early
3078   decrease_limits();
3079 }
3080 
3081 void CMTask::print_stats() {
3082   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3083                          _worker_id, _calls);
3084   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3085                          _elapsed_time_ms, _termination_time_ms);
3086   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3087                          _step_times_ms.num(), _step_times_ms.avg(),
3088                          _step_times_ms.sd());
3089   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3090                          _step_times_ms.maximum(), _step_times_ms.sum());
3091 }
3092 
3093 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3094   return _task_queues->steal(worker_id, hash_seed, obj);
3095 }
3096 
3097 /*****************************************************************************
3098 
3099     The do_marking_step(time_target_ms, ...) method is the building
3100     block of the parallel marking framework. It can be called in parallel
3101     with other invocations of do_marking_step() on different tasks
3102     (but only one per task, obviously) and concurrently with the
3103     mutator threads, or during remark, hence it eliminates the need
3104     for two versions of the code. When called during remark, it will
3105     pick up from where the task left off during the concurrent marking
3106     phase. Interestingly, tasks are also claimable during evacuation
3107     pauses too, since do_marking_step() ensures that it aborts before
3108     it needs to yield.
3109 
3110     The data structures that it uses to do marking work are the
3111     following:
3112 
3113       (1) Marking Bitmap. If there are gray objects that appear only
3114       on the bitmap (this happens either when dealing with an overflow
3115       or when the initial marking phase has simply marked the roots
3116       and didn't push them on the stack), then tasks claim heap
3117       regions whose bitmap they then scan to find gray objects. A
3118       global finger indicates where the end of the last claimed region
3119       is. A local finger indicates how far into the region a task has
3120       scanned. The two fingers are used to determine how to gray an
3121       object (i.e. whether simply marking it is OK, as it will be
3122       visited by a task in the future, or whether it needs to be also
3123       pushed on a stack).
3124 
3125       (2) Local Queue. The local queue of the task which is accessed
3126       reasonably efficiently by the task. Other tasks can steal from
3127       it when they run out of work. Throughout the marking phase, a
3128       task attempts to keep its local queue short but not totally
3129       empty, so that entries are available for stealing by other
3130       tasks. Only when there is no more work, a task will totally
3131       drain its local queue.
3132 
3133       (3) Global Mark Stack. This handles local queue overflow. During
3134       marking only sets of entries are moved between it and the local
3135       queues, as access to it requires a mutex and more fine-grain
3136       interaction with it which might cause contention. If it
3137       overflows, then the marking phase should restart and iterate
3138       over the bitmap to identify gray objects. Throughout the marking
3139       phase, tasks attempt to keep the global mark stack at a small
3140       length but not totally empty, so that entries are available for
3141       popping by other tasks. Only when there is no more work, tasks
3142       will totally drain the global mark stack.
3143 
3144       (4) SATB Buffer Queue. This is where completed SATB buffers are
3145       made available. Buffers are regularly removed from this queue
3146       and scanned for roots, so that the queue doesn't get too
3147       long. During remark, all completed buffers are processed, as
3148       well as the filled in parts of any uncompleted buffers.
3149 
3150     The do_marking_step() method tries to abort when the time target
3151     has been reached. There are a few other cases when the
3152     do_marking_step() method also aborts:
3153 
3154       (1) When the marking phase has been aborted (after a Full GC).
3155 
3156       (2) When a global overflow (on the global stack) has been
3157       triggered. Before the task aborts, it will actually sync up with
3158       the other tasks to ensure that all the marking data structures
3159       (local queues, stacks, fingers etc.)  are re-initialized so that
3160       when do_marking_step() completes, the marking phase can
3161       immediately restart.
3162 
3163       (3) When enough completed SATB buffers are available. The
3164       do_marking_step() method only tries to drain SATB buffers right
3165       at the beginning. So, if enough buffers are available, the
3166       marking step aborts and the SATB buffers are processed at
3167       the beginning of the next invocation.
3168 
3169       (4) To yield. when we have to yield then we abort and yield
3170       right at the end of do_marking_step(). This saves us from a lot
3171       of hassle as, by yielding we might allow a Full GC. If this
3172       happens then objects will be compacted underneath our feet, the
3173       heap might shrink, etc. We save checking for this by just
3174       aborting and doing the yield right at the end.
3175 
3176     From the above it follows that the do_marking_step() method should
3177     be called in a loop (or, otherwise, regularly) until it completes.
3178 
3179     If a marking step completes without its has_aborted() flag being
3180     true, it means it has completed the current marking phase (and
3181     also all other marking tasks have done so and have all synced up).
3182 
3183     A method called regular_clock_call() is invoked "regularly" (in
3184     sub ms intervals) throughout marking. It is this clock method that
3185     checks all the abort conditions which were mentioned above and
3186     decides when the task should abort. A work-based scheme is used to
3187     trigger this clock method: when the number of object words the
3188     marking phase has scanned or the number of references the marking
3189     phase has visited reach a given limit. Additional invocations to
3190     the method clock have been planted in a few other strategic places
3191     too. The initial reason for the clock method was to avoid calling
3192     vtime too regularly, as it is quite expensive. So, once it was in
3193     place, it was natural to piggy-back all the other conditions on it
3194     too and not constantly check them throughout the code.
3195 
3196     If do_termination is true then do_marking_step will enter its
3197     termination protocol.
3198 
3199     The value of is_serial must be true when do_marking_step is being
3200     called serially (i.e. by the VMThread) and do_marking_step should
3201     skip any synchronization in the termination and overflow code.
3202     Examples include the serial remark code and the serial reference
3203     processing closures.
3204 
3205     The value of is_serial must be false when do_marking_step is
3206     being called by any of the worker threads in a work gang.
3207     Examples include the concurrent marking code (CMMarkingTask),
3208     the MT remark code, and the MT reference processing closures.
3209 
3210  *****************************************************************************/
3211 
3212 void CMTask::do_marking_step(double time_target_ms,
3213                              bool do_termination,
3214                              bool is_serial) {
3215   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3216   assert(concurrent() == _cm->concurrent(), "they should be the same");
3217 
3218   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3219   assert(_task_queues != NULL, "invariant");
3220   assert(_task_queue != NULL, "invariant");
3221   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3222 
3223   assert(!_claimed,
3224          "only one thread should claim this task at any one time");
3225 
3226   // OK, this doesn't safeguard again all possible scenarios, as it is
3227   // possible for two threads to set the _claimed flag at the same
3228   // time. But it is only for debugging purposes anyway and it will
3229   // catch most problems.
3230   _claimed = true;
3231 
3232   _start_time_ms = os::elapsedVTime() * 1000.0;
3233 
3234   // If do_stealing is true then do_marking_step will attempt to
3235   // steal work from the other CMTasks. It only makes sense to
3236   // enable stealing when the termination protocol is enabled
3237   // and do_marking_step() is not being called serially.
3238   bool do_stealing = do_termination && !is_serial;
3239 
3240   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3241   _time_target_ms = time_target_ms - diff_prediction_ms;
3242 
3243   // set up the variables that are used in the work-based scheme to
3244   // call the regular clock method
3245   _words_scanned = 0;
3246   _refs_reached  = 0;
3247   recalculate_limits();
3248 
3249   // clear all flags
3250   clear_has_aborted();
3251   _has_timed_out = false;
3252   _draining_satb_buffers = false;
3253 
3254   ++_calls;
3255 
3256   // Set up the bitmap and oop closures. Anything that uses them is
3257   // eventually called from this method, so it is OK to allocate these
3258   // statically.
3259   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3260   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3261   set_cm_oop_closure(&cm_oop_closure);
3262 
3263   if (_cm->has_overflown()) {
3264     // This can happen if the mark stack overflows during a GC pause
3265     // and this task, after a yield point, restarts. We have to abort
3266     // as we need to get into the overflow protocol which happens
3267     // right at the end of this task.
3268     set_has_aborted();
3269   }
3270 
3271   // First drain any available SATB buffers. After this, we will not
3272   // look at SATB buffers before the next invocation of this method.
3273   // If enough completed SATB buffers are queued up, the regular clock
3274   // will abort this task so that it restarts.
3275   drain_satb_buffers();
3276   // ...then partially drain the local queue and the global stack
3277   drain_local_queue(true);
3278   drain_global_stack(true);
3279 
3280   do {
3281     if (!has_aborted() && _curr_region != NULL) {
3282       // This means that we're already holding on to a region.
3283       assert(_finger != NULL, "if region is not NULL, then the finger "
3284              "should not be NULL either");
3285 
3286       // We might have restarted this task after an evacuation pause
3287       // which might have evacuated the region we're holding on to
3288       // underneath our feet. Let's read its limit again to make sure
3289       // that we do not iterate over a region of the heap that
3290       // contains garbage (update_region_limit() will also move
3291       // _finger to the start of the region if it is found empty).
3292       update_region_limit();
3293       // We will start from _finger not from the start of the region,
3294       // as we might be restarting this task after aborting half-way
3295       // through scanning this region. In this case, _finger points to
3296       // the address where we last found a marked object. If this is a
3297       // fresh region, _finger points to start().
3298       MemRegion mr = MemRegion(_finger, _region_limit);
3299 
3300       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3301              "humongous regions should go around loop once only");
3302 
3303       // Some special cases:
3304       // If the memory region is empty, we can just give up the region.
3305       // If the current region is humongous then we only need to check
3306       // the bitmap for the bit associated with the start of the object,
3307       // scan the object if it's live, and give up the region.
3308       // Otherwise, let's iterate over the bitmap of the part of the region
3309       // that is left.
3310       // If the iteration is successful, give up the region.
3311       if (mr.is_empty()) {
3312         giveup_current_region();
3313         regular_clock_call();
3314       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3315         if (_nextMarkBitMap->isMarked(mr.start())) {
3316           // The object is marked - apply the closure
3317           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3318           bitmap_closure.do_bit(offset);
3319         }
3320         // Even if this task aborted while scanning the humongous object
3321         // we can (and should) give up the current region.
3322         giveup_current_region();
3323         regular_clock_call();
3324       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3325         giveup_current_region();
3326         regular_clock_call();
3327       } else {
3328         assert(has_aborted(), "currently the only way to do so");
3329         // The only way to abort the bitmap iteration is to return
3330         // false from the do_bit() method. However, inside the
3331         // do_bit() method we move the _finger to point to the
3332         // object currently being looked at. So, if we bail out, we
3333         // have definitely set _finger to something non-null.
3334         assert(_finger != NULL, "invariant");
3335 
3336         // Region iteration was actually aborted. So now _finger
3337         // points to the address of the object we last scanned. If we
3338         // leave it there, when we restart this task, we will rescan
3339         // the object. It is easy to avoid this. We move the finger by
3340         // enough to point to the next possible object header (the
3341         // bitmap knows by how much we need to move it as it knows its
3342         // granularity).
3343         assert(_finger < _region_limit, "invariant");
3344         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3345         // Check if bitmap iteration was aborted while scanning the last object
3346         if (new_finger >= _region_limit) {
3347           giveup_current_region();
3348         } else {
3349           move_finger_to(new_finger);
3350         }
3351       }
3352     }
3353     // At this point we have either completed iterating over the
3354     // region we were holding on to, or we have aborted.
3355 
3356     // We then partially drain the local queue and the global stack.
3357     // (Do we really need this?)
3358     drain_local_queue(true);
3359     drain_global_stack(true);
3360 
3361     // Read the note on the claim_region() method on why it might
3362     // return NULL with potentially more regions available for
3363     // claiming and why we have to check out_of_regions() to determine
3364     // whether we're done or not.
3365     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3366       // We are going to try to claim a new region. We should have
3367       // given up on the previous one.
3368       // Separated the asserts so that we know which one fires.
3369       assert(_curr_region  == NULL, "invariant");
3370       assert(_finger       == NULL, "invariant");
3371       assert(_region_limit == NULL, "invariant");
3372       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3373       if (claimed_region != NULL) {
3374         // Yes, we managed to claim one
3375         setup_for_region(claimed_region);
3376         assert(_curr_region == claimed_region, "invariant");
3377       }
3378       // It is important to call the regular clock here. It might take
3379       // a while to claim a region if, for example, we hit a large
3380       // block of empty regions. So we need to call the regular clock
3381       // method once round the loop to make sure it's called
3382       // frequently enough.
3383       regular_clock_call();
3384     }
3385 
3386     if (!has_aborted() && _curr_region == NULL) {
3387       assert(_cm->out_of_regions(),
3388              "at this point we should be out of regions");
3389     }
3390   } while ( _curr_region != NULL && !has_aborted());
3391 
3392   if (!has_aborted()) {
3393     // We cannot check whether the global stack is empty, since other
3394     // tasks might be pushing objects to it concurrently.
3395     assert(_cm->out_of_regions(),
3396            "at this point we should be out of regions");
3397     // Try to reduce the number of available SATB buffers so that
3398     // remark has less work to do.
3399     drain_satb_buffers();
3400   }
3401 
3402   // Since we've done everything else, we can now totally drain the
3403   // local queue and global stack.
3404   drain_local_queue(false);
3405   drain_global_stack(false);
3406 
3407   // Attempt at work stealing from other task's queues.
3408   if (do_stealing && !has_aborted()) {
3409     // We have not aborted. This means that we have finished all that
3410     // we could. Let's try to do some stealing...
3411 
3412     // We cannot check whether the global stack is empty, since other
3413     // tasks might be pushing objects to it concurrently.
3414     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3415            "only way to reach here");
3416     while (!has_aborted()) {
3417       oop obj;
3418       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3419         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3420                "any stolen object should be marked");
3421         scan_object(obj);
3422 
3423         // And since we're towards the end, let's totally drain the
3424         // local queue and global stack.
3425         drain_local_queue(false);
3426         drain_global_stack(false);
3427       } else {
3428         break;
3429       }
3430     }
3431   }
3432 
3433   // We still haven't aborted. Now, let's try to get into the
3434   // termination protocol.
3435   if (do_termination && !has_aborted()) {
3436     // We cannot check whether the global stack is empty, since other
3437     // tasks might be concurrently pushing objects on it.
3438     // Separated the asserts so that we know which one fires.
3439     assert(_cm->out_of_regions(), "only way to reach here");
3440     assert(_task_queue->size() == 0, "only way to reach here");
3441     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3442 
3443     // The CMTask class also extends the TerminatorTerminator class,
3444     // hence its should_exit_termination() method will also decide
3445     // whether to exit the termination protocol or not.
3446     bool finished = (is_serial ||
3447                      _cm->terminator()->offer_termination(this));
3448     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3449     _termination_time_ms +=
3450       termination_end_time_ms - _termination_start_time_ms;
3451 
3452     if (finished) {
3453       // We're all done.
3454 
3455       if (_worker_id == 0) {
3456         // let's allow task 0 to do this
3457         if (concurrent()) {
3458           assert(_cm->concurrent_marking_in_progress(), "invariant");
3459           // we need to set this to false before the next
3460           // safepoint. This way we ensure that the marking phase
3461           // doesn't observe any more heap expansions.
3462           _cm->clear_concurrent_marking_in_progress();
3463         }
3464       }
3465 
3466       // We can now guarantee that the global stack is empty, since
3467       // all other tasks have finished. We separated the guarantees so
3468       // that, if a condition is false, we can immediately find out
3469       // which one.
3470       guarantee(_cm->out_of_regions(), "only way to reach here");
3471       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3472       guarantee(_task_queue->size() == 0, "only way to reach here");
3473       guarantee(!_cm->has_overflown(), "only way to reach here");
3474       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3475     } else {
3476       // Apparently there's more work to do. Let's abort this task. It
3477       // will restart it and we can hopefully find more things to do.
3478       set_has_aborted();
3479     }
3480   }
3481 
3482   // Mainly for debugging purposes to make sure that a pointer to the
3483   // closure which was statically allocated in this frame doesn't
3484   // escape it by accident.
3485   set_cm_oop_closure(NULL);
3486   double end_time_ms = os::elapsedVTime() * 1000.0;
3487   double elapsed_time_ms = end_time_ms - _start_time_ms;
3488   // Update the step history.
3489   _step_times_ms.add(elapsed_time_ms);
3490 
3491   if (has_aborted()) {
3492     // The task was aborted for some reason.
3493     if (_has_timed_out) {
3494       double diff_ms = elapsed_time_ms - _time_target_ms;
3495       // Keep statistics of how well we did with respect to hitting
3496       // our target only if we actually timed out (if we aborted for
3497       // other reasons, then the results might get skewed).
3498       _marking_step_diffs_ms.add(diff_ms);
3499     }
3500 
3501     if (_cm->has_overflown()) {
3502       // This is the interesting one. We aborted because a global
3503       // overflow was raised. This means we have to restart the
3504       // marking phase and start iterating over regions. However, in
3505       // order to do this we have to make sure that all tasks stop
3506       // what they are doing and re-initialize in a safe manner. We
3507       // will achieve this with the use of two barrier sync points.
3508 
3509       if (!is_serial) {
3510         // We only need to enter the sync barrier if being called
3511         // from a parallel context
3512         _cm->enter_first_sync_barrier(_worker_id);
3513 
3514         // When we exit this sync barrier we know that all tasks have
3515         // stopped doing marking work. So, it's now safe to
3516         // re-initialize our data structures. At the end of this method,
3517         // task 0 will clear the global data structures.
3518       }
3519 
3520       // We clear the local state of this task...
3521       clear_region_fields();
3522 
3523       if (!is_serial) {
3524         // ...and enter the second barrier.
3525         _cm->enter_second_sync_barrier(_worker_id);
3526       }
3527       // At this point, if we're during the concurrent phase of
3528       // marking, everything has been re-initialized and we're
3529       // ready to restart.
3530     }
3531   }
3532 
3533   _claimed = false;
3534 }
3535 
3536 CMTask::CMTask(uint worker_id,
3537                ConcurrentMark* cm,
3538                size_t* marked_bytes,
3539                BitMap* card_bm,
3540                CMTaskQueue* task_queue,
3541                CMTaskQueueSet* task_queues)
3542   : _g1h(G1CollectedHeap::heap()),
3543     _worker_id(worker_id), _cm(cm),
3544     _claimed(false),
3545     _nextMarkBitMap(NULL), _hash_seed(17),
3546     _task_queue(task_queue),
3547     _task_queues(task_queues),
3548     _cm_oop_closure(NULL),
3549     _marked_bytes_array(marked_bytes),
3550     _card_bm(card_bm) {
3551   guarantee(task_queue != NULL, "invariant");
3552   guarantee(task_queues != NULL, "invariant");
3553 
3554   _marking_step_diffs_ms.add(0.5);
3555 }
3556 
3557 // These are formatting macros that are used below to ensure
3558 // consistent formatting. The *_H_* versions are used to format the
3559 // header for a particular value and they should be kept consistent
3560 // with the corresponding macro. Also note that most of the macros add
3561 // the necessary white space (as a prefix) which makes them a bit
3562 // easier to compose.
3563 
3564 // All the output lines are prefixed with this string to be able to
3565 // identify them easily in a large log file.
3566 #define G1PPRL_LINE_PREFIX            "###"
3567 
3568 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3569 #ifdef _LP64
3570 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3571 #else // _LP64
3572 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3573 #endif // _LP64
3574 
3575 // For per-region info
3576 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3577 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3578 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3579 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3580 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3581 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3582 
3583 // For summary info
3584 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3585 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3586 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3587 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3588 
3589 G1PrintRegionLivenessInfoClosure::
3590 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
3591   : _out(out),
3592     _total_used_bytes(0), _total_capacity_bytes(0),
3593     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3594     _hum_used_bytes(0), _hum_capacity_bytes(0),
3595     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3596     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3597   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3598   MemRegion g1_reserved = g1h->g1_reserved();
3599   double now = os::elapsedTime();
3600 
3601   // Print the header of the output.
3602   _out->cr();
3603   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3604   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
3605                  G1PPRL_SUM_ADDR_FORMAT("reserved")
3606                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
3607                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3608                  HeapRegion::GrainBytes);
3609   _out->print_cr(G1PPRL_LINE_PREFIX);
3610   _out->print_cr(G1PPRL_LINE_PREFIX
3611                 G1PPRL_TYPE_H_FORMAT
3612                 G1PPRL_ADDR_BASE_H_FORMAT
3613                 G1PPRL_BYTE_H_FORMAT
3614                 G1PPRL_BYTE_H_FORMAT
3615                 G1PPRL_BYTE_H_FORMAT
3616                 G1PPRL_DOUBLE_H_FORMAT
3617                 G1PPRL_BYTE_H_FORMAT
3618                 G1PPRL_BYTE_H_FORMAT,
3619                 "type", "address-range",
3620                 "used", "prev-live", "next-live", "gc-eff",
3621                 "remset", "code-roots");
3622   _out->print_cr(G1PPRL_LINE_PREFIX
3623                 G1PPRL_TYPE_H_FORMAT
3624                 G1PPRL_ADDR_BASE_H_FORMAT
3625                 G1PPRL_BYTE_H_FORMAT
3626                 G1PPRL_BYTE_H_FORMAT
3627                 G1PPRL_BYTE_H_FORMAT
3628                 G1PPRL_DOUBLE_H_FORMAT
3629                 G1PPRL_BYTE_H_FORMAT
3630                 G1PPRL_BYTE_H_FORMAT,
3631                 "", "",
3632                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3633                 "(bytes)", "(bytes)");
3634 }
3635 
3636 // It takes as a parameter a reference to one of the _hum_* fields, it
3637 // deduces the corresponding value for a region in a humongous region
3638 // series (either the region size, or what's left if the _hum_* field
3639 // is < the region size), and updates the _hum_* field accordingly.
3640 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3641   size_t bytes = 0;
3642   // The > 0 check is to deal with the prev and next live bytes which
3643   // could be 0.
3644   if (*hum_bytes > 0) {
3645     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3646     *hum_bytes -= bytes;
3647   }
3648   return bytes;
3649 }
3650 
3651 // It deduces the values for a region in a humongous region series
3652 // from the _hum_* fields and updates those accordingly. It assumes
3653 // that that _hum_* fields have already been set up from the "starts
3654 // humongous" region and we visit the regions in address order.
3655 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3656                                                      size_t* capacity_bytes,
3657                                                      size_t* prev_live_bytes,
3658                                                      size_t* next_live_bytes) {
3659   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3660   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3661   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3662   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3663   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3664 }
3665 
3666 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3667   const char* type       = r->get_type_str();
3668   HeapWord* bottom       = r->bottom();
3669   HeapWord* end          = r->end();
3670   size_t capacity_bytes  = r->capacity();
3671   size_t used_bytes      = r->used();
3672   size_t prev_live_bytes = r->live_bytes();
3673   size_t next_live_bytes = r->next_live_bytes();
3674   double gc_eff          = r->gc_efficiency();
3675   size_t remset_bytes    = r->rem_set()->mem_size();
3676   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3677 
3678   if (r->is_starts_humongous()) {
3679     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3680            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3681            "they should have been zeroed after the last time we used them");
3682     // Set up the _hum_* fields.
3683     _hum_capacity_bytes  = capacity_bytes;
3684     _hum_used_bytes      = used_bytes;
3685     _hum_prev_live_bytes = prev_live_bytes;
3686     _hum_next_live_bytes = next_live_bytes;
3687     get_hum_bytes(&used_bytes, &capacity_bytes,
3688                   &prev_live_bytes, &next_live_bytes);
3689     end = bottom + HeapRegion::GrainWords;
3690   } else if (r->is_continues_humongous()) {
3691     get_hum_bytes(&used_bytes, &capacity_bytes,
3692                   &prev_live_bytes, &next_live_bytes);
3693     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3694   }
3695 
3696   _total_used_bytes      += used_bytes;
3697   _total_capacity_bytes  += capacity_bytes;
3698   _total_prev_live_bytes += prev_live_bytes;
3699   _total_next_live_bytes += next_live_bytes;
3700   _total_remset_bytes    += remset_bytes;
3701   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3702 
3703   // Print a line for this particular region.
3704   _out->print_cr(G1PPRL_LINE_PREFIX
3705                  G1PPRL_TYPE_FORMAT
3706                  G1PPRL_ADDR_BASE_FORMAT
3707                  G1PPRL_BYTE_FORMAT
3708                  G1PPRL_BYTE_FORMAT
3709                  G1PPRL_BYTE_FORMAT
3710                  G1PPRL_DOUBLE_FORMAT
3711                  G1PPRL_BYTE_FORMAT
3712                  G1PPRL_BYTE_FORMAT,
3713                  type, p2i(bottom), p2i(end),
3714                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3715                  remset_bytes, strong_code_roots_bytes);
3716 
3717   return false;
3718 }
3719 
3720 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3721   // add static memory usages to remembered set sizes
3722   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3723   // Print the footer of the output.
3724   _out->print_cr(G1PPRL_LINE_PREFIX);
3725   _out->print_cr(G1PPRL_LINE_PREFIX
3726                  " SUMMARY"
3727                  G1PPRL_SUM_MB_FORMAT("capacity")
3728                  G1PPRL_SUM_MB_PERC_FORMAT("used")
3729                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3730                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3731                  G1PPRL_SUM_MB_FORMAT("remset")
3732                  G1PPRL_SUM_MB_FORMAT("code-roots"),
3733                  bytes_to_mb(_total_capacity_bytes),
3734                  bytes_to_mb(_total_used_bytes),
3735                  perc(_total_used_bytes, _total_capacity_bytes),
3736                  bytes_to_mb(_total_prev_live_bytes),
3737                  perc(_total_prev_live_bytes, _total_capacity_bytes),
3738                  bytes_to_mb(_total_next_live_bytes),
3739                  perc(_total_next_live_bytes, _total_capacity_bytes),
3740                  bytes_to_mb(_total_remset_bytes),
3741                  bytes_to_mb(_total_strong_code_roots_bytes));
3742   _out->cr();
3743 }