New src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1OopClosures.inline.hpp"
  35 #include "gc/g1/g1RemSet.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionManager.inline.hpp"
  39 #include "gc/g1/heapRegionRemSet.hpp"
  40 #include "gc/g1/heapRegionSet.inline.hpp"
  41 #include "gc/g1/suspendibleThreadSet.hpp"
  42 #include "gc/shared/gcId.hpp"
  43 #include "gc/shared/gcTimer.hpp"
  44 #include "gc/shared/gcTrace.hpp"
  45 #include "gc/shared/gcTraceTime.inline.hpp"
  46 #include "gc/shared/genOopClosures.inline.hpp"
  47 #include "gc/shared/referencePolicy.hpp"
  48 #include "gc/shared/strongRootsScope.hpp"
  49 #include "gc/shared/taskqueue.inline.hpp"
  50 #include "gc/shared/vmGCOperations.hpp"
  51 #include "logging/log.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 CMBitMapRO::CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   assert(limit != NULL, "limit must not be NULL");
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 #ifndef PRODUCT
  87 bool CMBitMapRO::covers(MemRegion heap_rs) const {
  88   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  89   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  90          "size inconsistency");
  91   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  92          _bmWordSize  == heap_rs.word_size();
  93 }
  94 #endif
  95 
  96 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  97   _bm.print_on_error(st, prefix);
  98 }
  99 
 100 size_t CMBitMap::compute_size(size_t heap_size) {
 101   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 102 }
 103 
 104 size_t CMBitMap::mark_distance() {
 105   return MinObjAlignmentInBytes * BitsPerByte;
 106 }
 107 
 108 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 109   _bmStartWord = heap.start();
 110   _bmWordSize = heap.word_size();
 111 
 112   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 113   _bm.set_size(_bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clearRange(mr);
 125 }
 126 
 127 // Closure used for clearing the given mark bitmap.
 128 class ClearBitmapHRClosure : public HeapRegionClosure {
 129  private:
 130   ConcurrentMark* _cm;
 131   CMBitMap* _bitmap;
 132   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 133  public:
 134   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 135     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 136   }
 137 
 138   virtual bool doHeapRegion(HeapRegion* r) {
 139     size_t const chunk_size_in_words = M / HeapWordSize;
 140 
 141     HeapWord* cur = r->bottom();
 142     HeapWord* const end = r->end();
 143 
 144     while (cur < end) {
 145       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 146       _bitmap->clearRange(mr);
 147 
 148       cur += chunk_size_in_words;
 149 
 150       // Abort iteration if after yielding the marking has been aborted.
 151       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 152         return true;
 153       }
 154       // Repeat the asserts from before the start of the closure. We will do them
 155       // as asserts here to minimize their overhead on the product. However, we
 156       // will have them as guarantees at the beginning / end of the bitmap
 157       // clearing to get some checking in the product.
 158       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 159       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 160     }
 161 
 162     return false;
 163   }
 164 };
 165 
 166 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 167   ClearBitmapHRClosure* _cl;
 168   HeapRegionClaimer     _hrclaimer;
 169   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 170 
 171 public:
 172   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 173       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 174 
 175   void work(uint worker_id) {
 176     SuspendibleThreadSetJoiner sts_join(_suspendible);
 177     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 178   }
 179 };
 180 
 181 void CMBitMap::clearAll() {
 182   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 183   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 184   uint n_workers = g1h->workers()->active_workers();
 185   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 186   g1h->workers()->run_task(&task);
 187   guarantee(cl.complete(), "Must have completed iteration.");
 188   return;
 189 }
 190 
 191 void CMBitMap::clearRange(MemRegion mr) {
 192   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 193   assert(!mr.is_empty(), "unexpected empty region");
 194   // convert address range into offset range
 195   _bm.at_put_range(heapWordToOffset(mr.start()),
 196                    heapWordToOffset(mr.end()), false);
 197 }
 198 
 199 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 200   _base(NULL), _cm(cm)
 201 {}
 202 
 203 bool CMMarkStack::allocate(size_t capacity) {
 204   // allocate a stack of the requisite depth
 205   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 206   if (!rs.is_reserved()) {
 207     warning("ConcurrentMark MarkStack allocation failure");
 208     return false;
 209   }
 210   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 211   if (!_virtual_space.initialize(rs, rs.size())) {
 212     warning("ConcurrentMark MarkStack backing store failure");
 213     // Release the virtual memory reserved for the marking stack
 214     rs.release();
 215     return false;
 216   }
 217   assert(_virtual_space.committed_size() == rs.size(),
 218          "Didn't reserve backing store for all of ConcurrentMark stack?");
 219   _base = (oop*) _virtual_space.low();
 220   setEmpty();
 221   _capacity = (jint) capacity;
 222   _saved_index = -1;
 223   _should_expand = false;
 224   return true;
 225 }
 226 
 227 void CMMarkStack::expand() {
 228   // Called, during remark, if we've overflown the marking stack during marking.
 229   assert(isEmpty(), "stack should been emptied while handling overflow");
 230   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 231   // Clear expansion flag
 232   _should_expand = false;
 233   if (_capacity == (jint) MarkStackSizeMax) {
 234     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 235     return;
 236   }
 237   // Double capacity if possible
 238   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 239   // Do not give up existing stack until we have managed to
 240   // get the double capacity that we desired.
 241   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 242                                                            sizeof(oop)));
 243   if (rs.is_reserved()) {
 244     // Release the backing store associated with old stack
 245     _virtual_space.release();
 246     // Reinitialize virtual space for new stack
 247     if (!_virtual_space.initialize(rs, rs.size())) {
 248       fatal("Not enough swap for expanded marking stack capacity");
 249     }
 250     _base = (oop*)(_virtual_space.low());
 251     _index = 0;
 252     _capacity = new_capacity;
 253   } else {
 254     // Failed to double capacity, continue;
 255     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 256                   _capacity / K, new_capacity / K);
 257   }
 258 }
 259 
 260 void CMMarkStack::set_should_expand() {
 261   // If we're resetting the marking state because of an
 262   // marking stack overflow, record that we should, if
 263   // possible, expand the stack.
 264   _should_expand = _cm->has_overflown();
 265 }
 266 
 267 CMMarkStack::~CMMarkStack() {
 268   if (_base != NULL) {
 269     _base = NULL;
 270     _virtual_space.release();
 271   }
 272 }
 273 
 274 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 275   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 276   jint start = _index;
 277   jint next_index = start + n;
 278   if (next_index > _capacity) {
 279     _overflow = true;
 280     return;
 281   }
 282   // Otherwise.
 283   _index = next_index;
 284   for (int i = 0; i < n; i++) {
 285     int ind = start + i;
 286     assert(ind < _capacity, "By overflow test above.");
 287     _base[ind] = ptr_arr[i];
 288   }
 289 }
 290 
 291 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 292   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 293   jint index = _index;
 294   if (index == 0) {
 295     *n = 0;
 296     return false;
 297   } else {
 298     int k = MIN2(max, index);
 299     jint  new_ind = index - k;
 300     for (int j = 0; j < k; j++) {
 301       ptr_arr[j] = _base[new_ind + j];
 302     }
 303     _index = new_ind;
 304     *n = k;
 305     return true;
 306   }
 307 }
 308 
 309 void CMMarkStack::note_start_of_gc() {
 310   assert(_saved_index == -1,
 311          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 312   _saved_index = _index;
 313 }
 314 
 315 void CMMarkStack::note_end_of_gc() {
 316   // This is intentionally a guarantee, instead of an assert. If we
 317   // accidentally add something to the mark stack during GC, it
 318   // will be a correctness issue so it's better if we crash. we'll
 319   // only check this once per GC anyway, so it won't be a performance
 320   // issue in any way.
 321   guarantee(_saved_index == _index,
 322             "saved index: %d index: %d", _saved_index, _index);
 323   _saved_index = -1;
 324 }
 325 
 326 CMRootRegions::CMRootRegions() :
 327   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 328   _should_abort(false),  _next_survivor(NULL) { }
 329 
 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 331   _young_list = g1h->young_list();
 332   _cm = cm;
 333 }
 334 
 335 void CMRootRegions::prepare_for_scan() {
 336   assert(!scan_in_progress(), "pre-condition");
 337 
 338   // Currently, only survivors can be root regions.
 339   assert(_next_survivor == NULL, "pre-condition");
 340   _next_survivor = _young_list->first_survivor_region();
 341   _scan_in_progress = (_next_survivor != NULL);
 342   _should_abort = false;
 343 }
 344 
 345 HeapRegion* CMRootRegions::claim_next() {
 346   if (_should_abort) {
 347     // If someone has set the should_abort flag, we return NULL to
 348     // force the caller to bail out of their loop.
 349     return NULL;
 350   }
 351 
 352   // Currently, only survivors can be root regions.
 353   HeapRegion* res = _next_survivor;
 354   if (res != NULL) {
 355     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 356     // Read it again in case it changed while we were waiting for the lock.
 357     res = _next_survivor;
 358     if (res != NULL) {
 359       if (res == _young_list->last_survivor_region()) {
 360         // We just claimed the last survivor so store NULL to indicate
 361         // that we're done.
 362         _next_survivor = NULL;
 363       } else {
 364         _next_survivor = res->get_next_young_region();
 365       }
 366     } else {
 367       // Someone else claimed the last survivor while we were trying
 368       // to take the lock so nothing else to do.
 369     }
 370   }
 371   assert(res == NULL || res->is_survivor(), "post-condition");
 372 
 373   return res;
 374 }
 375 
 376 void CMRootRegions::scan_finished() {
 377   assert(scan_in_progress(), "pre-condition");
 378 
 379   // Currently, only survivors can be root regions.
 380   if (!_should_abort) {
 381     assert(_next_survivor == NULL, "we should have claimed all survivors");
 382   }
 383   _next_survivor = NULL;
 384 
 385   {
 386     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 387     _scan_in_progress = false;
 388     RootRegionScan_lock->notify_all();
 389   }
 390 }
 391 
 392 bool CMRootRegions::wait_until_scan_finished() {
 393   if (!scan_in_progress()) return false;
 394 
 395   {
 396     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 397     while (scan_in_progress()) {
 398       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 399     }
 400   }
 401   return true;
 402 }
 403 
 404 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 405   return MAX2((n_par_threads + 2) / 4, 1U);
 406 }
 407 
 408 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 409   _g1h(g1h),
 410   _markBitMap1(),
 411   _markBitMap2(),
 412   _parallel_marking_threads(0),
 413   _max_parallel_marking_threads(0),
 414   _sleep_factor(0.0),
 415   _marking_task_overhead(1.0),
 416   _cleanup_list("Cleanup List"),
 417   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 418   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 419             CardTableModRefBS::card_shift,
 420             false /* in_resource_area*/),
 421 
 422   _prevMarkBitMap(&_markBitMap1),
 423   _nextMarkBitMap(&_markBitMap2),
 424 
 425   _markStack(this),
 426   // _finger set in set_non_marking_state
 427 
 428   _max_worker_id(ParallelGCThreads),
 429   // _active_tasks set in set_non_marking_state
 430   // _tasks set inside the constructor
 431   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 432   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 433 
 434   _has_overflown(false),
 435   _concurrent(false),
 436   _has_aborted(false),
 437   _restart_for_overflow(false),
 438   _concurrent_marking_in_progress(false),
 439 
 440   // _verbose_level set below
 441 
 442   _init_times(),
 443   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 444   _cleanup_times(),
 445   _total_counting_time(0.0),
 446   _total_rs_scrub_time(0.0),
 447 
 448   _parallel_workers(NULL),
 449 
 450   _count_card_bitmaps(NULL),
 451   _count_marked_bytes(NULL),
 452   _completed_initialization(false) {
 453 
 454   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 455   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 456 
 457   // Create & start a ConcurrentMark thread.
 458   _cmThread = new ConcurrentMarkThread(this);
 459   assert(cmThread() != NULL, "CM Thread should have been created");
 460   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 461   if (_cmThread->osthread() == NULL) {
 462       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 463   }
 464 
 465   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 466   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 467   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 468 
 469   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 470   satb_qs.set_buffer_size(G1SATBBufferSize);
 471 
 472   _root_regions.init(_g1h, this);
 473 
 474   if (ConcGCThreads > ParallelGCThreads) {
 475     warning("Can't have more ConcGCThreads (%u) "
 476             "than ParallelGCThreads (%u).",
 477             ConcGCThreads, ParallelGCThreads);
 478     return;
 479   }
 480   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 481     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 482     // if both are set
 483     _sleep_factor             = 0.0;
 484     _marking_task_overhead    = 1.0;
 485   } else if (G1MarkingOverheadPercent > 0) {
 486     // We will calculate the number of parallel marking threads based
 487     // on a target overhead with respect to the soft real-time goal
 488     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 489     double overall_cm_overhead =
 490       (double) MaxGCPauseMillis * marking_overhead /
 491       (double) GCPauseIntervalMillis;
 492     double cpu_ratio = 1.0 / (double) os::processor_count();
 493     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 494     double marking_task_overhead =
 495       overall_cm_overhead / marking_thread_num *
 496                                               (double) os::processor_count();
 497     double sleep_factor =
 498                        (1.0 - marking_task_overhead) / marking_task_overhead;
 499 
 500     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 501     _sleep_factor             = sleep_factor;
 502     _marking_task_overhead    = marking_task_overhead;
 503   } else {
 504     // Calculate the number of parallel marking threads by scaling
 505     // the number of parallel GC threads.
 506     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 507     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 508     _sleep_factor             = 0.0;
 509     _marking_task_overhead    = 1.0;
 510   }
 511 
 512   assert(ConcGCThreads > 0, "Should have been set");
 513   _parallel_marking_threads = ConcGCThreads;
 514   _max_parallel_marking_threads = _parallel_marking_threads;
 515 
 516   _parallel_workers = new WorkGang("G1 Marker",
 517        _max_parallel_marking_threads, false, true);
 518   if (_parallel_workers == NULL) {
 519     vm_exit_during_initialization("Failed necessary allocation.");
 520   } else {
 521     _parallel_workers->initialize_workers();
 522   }
 523 
 524   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 525     size_t mark_stack_size =
 526       MIN2(MarkStackSizeMax,
 527           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 528     // Verify that the calculated value for MarkStackSize is in range.
 529     // It would be nice to use the private utility routine from Arguments.
 530     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 531       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 532               "must be between 1 and " SIZE_FORMAT,
 533               mark_stack_size, MarkStackSizeMax);
 534       return;
 535     }
 536     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 537   } else {
 538     // Verify MarkStackSize is in range.
 539     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 540       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 541         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 542           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 543                   "must be between 1 and " SIZE_FORMAT,
 544                   MarkStackSize, MarkStackSizeMax);
 545           return;
 546         }
 547       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 548         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 549           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 550                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 551                   MarkStackSize, MarkStackSizeMax);
 552           return;
 553         }
 554       }
 555     }
 556   }
 557 
 558   if (!_markStack.allocate(MarkStackSize)) {
 559     warning("Failed to allocate CM marking stack");
 560     return;
 561   }
 562 
 563   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 564   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 565 
 566   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 567   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 568 
 569   BitMap::idx_t card_bm_size = _card_bm.size();
 570 
 571   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 572   _active_tasks = _max_worker_id;
 573 
 574   uint max_regions = _g1h->max_regions();
 575   for (uint i = 0; i < _max_worker_id; ++i) {
 576     CMTaskQueue* task_queue = new CMTaskQueue();
 577     task_queue->initialize();
 578     _task_queues->register_queue(i, task_queue);
 579 
 580     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 581     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 582 
 583     _tasks[i] = new CMTask(i, this,
 584                            _count_marked_bytes[i],
 585                            &_count_card_bitmaps[i],
 586                            task_queue, _task_queues);
 587 
 588     _accum_task_vtime[i] = 0.0;
 589   }
 590 
 591   // Calculate the card number for the bottom of the heap. Used
 592   // in biasing indexes into the accounting card bitmaps.
 593   _heap_bottom_card_num =
 594     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 595                                 CardTableModRefBS::card_shift);
 596 
 597   // Clear all the liveness counting data
 598   clear_all_count_data();
 599 
 600   // so that the call below can read a sensible value
 601   _heap_start = g1h->reserved_region().start();
 602   set_non_marking_state();
 603   _completed_initialization = true;
 604 }
 605 
 606 void ConcurrentMark::reset() {
 607   // Starting values for these two. This should be called in a STW
 608   // phase.
 609   MemRegion reserved = _g1h->g1_reserved();
 610   _heap_start = reserved.start();
 611   _heap_end   = reserved.end();
 612 
 613   // Separated the asserts so that we know which one fires.
 614   assert(_heap_start != NULL, "heap bounds should look ok");
 615   assert(_heap_end != NULL, "heap bounds should look ok");
 616   assert(_heap_start < _heap_end, "heap bounds should look ok");
 617 
 618   // Reset all the marking data structures and any necessary flags
 619   reset_marking_state();
 620 
 621   // We do reset all of them, since different phases will use
 622   // different number of active threads. So, it's easiest to have all
 623   // of them ready.
 624   for (uint i = 0; i < _max_worker_id; ++i) {
 625     _tasks[i]->reset(_nextMarkBitMap);
 626   }
 627 
 628   // we need this to make sure that the flag is on during the evac
 629   // pause with initial mark piggy-backed
 630   set_concurrent_marking_in_progress();
 631 }
 632 
 633 
 634 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 635   _markStack.set_should_expand();
 636   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 637   if (clear_overflow) {
 638     clear_has_overflown();
 639   } else {
 640     assert(has_overflown(), "pre-condition");
 641   }
 642   _finger = _heap_start;
 643 
 644   for (uint i = 0; i < _max_worker_id; ++i) {
 645     CMTaskQueue* queue = _task_queues->queue(i);
 646     queue->set_empty();
 647   }
 648 }
 649 
 650 void ConcurrentMark::set_concurrency(uint active_tasks) {
 651   assert(active_tasks <= _max_worker_id, "we should not have more");
 652 
 653   _active_tasks = active_tasks;
 654   // Need to update the three data structures below according to the
 655   // number of active threads for this phase.
 656   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 657   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 658   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 659 }
 660 
 661 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 662   set_concurrency(active_tasks);
 663 
 664   _concurrent = concurrent;
 665   // We propagate this to all tasks, not just the active ones.
 666   for (uint i = 0; i < _max_worker_id; ++i)
 667     _tasks[i]->set_concurrent(concurrent);
 668 
 669   if (concurrent) {
 670     set_concurrent_marking_in_progress();
 671   } else {
 672     // We currently assume that the concurrent flag has been set to
 673     // false before we start remark. At this point we should also be
 674     // in a STW phase.
 675     assert(!concurrent_marking_in_progress(), "invariant");
 676     assert(out_of_regions(),
 677            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 678            p2i(_finger), p2i(_heap_end));
 679   }
 680 }
 681 
 682 void ConcurrentMark::set_non_marking_state() {
 683   // We set the global marking state to some default values when we're
 684   // not doing marking.
 685   reset_marking_state();
 686   _active_tasks = 0;
 687   clear_concurrent_marking_in_progress();
 688 }
 689 
 690 ConcurrentMark::~ConcurrentMark() {
 691   // The ConcurrentMark instance is never freed.
 692   ShouldNotReachHere();
 693 }
 694 
 695 void ConcurrentMark::clearNextBitmap() {
 696   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 697 
 698   // Make sure that the concurrent mark thread looks to still be in
 699   // the current cycle.
 700   guarantee(cmThread()->during_cycle(), "invariant");
 701 
 702   // We are finishing up the current cycle by clearing the next
 703   // marking bitmap and getting it ready for the next cycle. During
 704   // this time no other cycle can start. So, let's make sure that this
 705   // is the case.
 706   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 707 
 708   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 709   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 710   _parallel_workers->run_task(&task);
 711 
 712   // Clear the liveness counting data. If the marking has been aborted, the abort()
 713   // call already did that.
 714   if (cl.complete()) {
 715     clear_all_count_data();
 716   }
 717 
 718   // Repeat the asserts from above.
 719   guarantee(cmThread()->during_cycle(), "invariant");
 720   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 721 }
 722 
 723 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 724   CMBitMap* _bitmap;
 725   bool _error;
 726  public:
 727   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 728   }
 729 
 730   virtual bool doHeapRegion(HeapRegion* r) {
 731     // This closure can be called concurrently to the mutator, so we must make sure
 732     // that the result of the getNextMarkedWordAddress() call is compared to the
 733     // value passed to it as limit to detect any found bits.
 734     // end never changes in G1.
 735     HeapWord* end = r->end();
 736     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 737   }
 738 };
 739 
 740 bool ConcurrentMark::nextMarkBitmapIsClear() {
 741   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 742   _g1h->heap_region_iterate(&cl);
 743   return cl.complete();
 744 }
 745 
 746 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 747 public:
 748   bool doHeapRegion(HeapRegion* r) {
 749     r->note_start_of_marking();
 750     return false;
 751   }
 752 };
 753 
 754 void ConcurrentMark::checkpointRootsInitialPre() {
 755   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 756   G1CollectorPolicy* g1p = g1h->g1_policy();
 757 
 758   _has_aborted = false;
 759 
 760   // Initialize marking structures. This has to be done in a STW phase.
 761   reset();
 762 
 763   // For each region note start of marking.
 764   NoteStartOfMarkHRClosure startcl;
 765   g1h->heap_region_iterate(&startcl);
 766 }
 767 
 768 
 769 void ConcurrentMark::checkpointRootsInitialPost() {
 770   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771 
 772   // Start Concurrent Marking weak-reference discovery.
 773   ReferenceProcessor* rp = g1h->ref_processor_cm();
 774   // enable ("weak") refs discovery
 775   rp->enable_discovery();
 776   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 777 
 778   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 779   // This is the start of  the marking cycle, we're expected all
 780   // threads to have SATB queues with active set to false.
 781   satb_mq_set.set_active_all_threads(true, /* new active value */
 782                                      false /* expected_active */);
 783 
 784   _root_regions.prepare_for_scan();
 785 
 786   // update_g1_committed() will be called at the end of an evac pause
 787   // when marking is on. So, it's also called at the end of the
 788   // initial-mark pause to update the heap end, if the heap expands
 789   // during it. No need to call it here.
 790 }
 791 
 792 /*
 793  * Notice that in the next two methods, we actually leave the STS
 794  * during the barrier sync and join it immediately afterwards. If we
 795  * do not do this, the following deadlock can occur: one thread could
 796  * be in the barrier sync code, waiting for the other thread to also
 797  * sync up, whereas another one could be trying to yield, while also
 798  * waiting for the other threads to sync up too.
 799  *
 800  * Note, however, that this code is also used during remark and in
 801  * this case we should not attempt to leave / enter the STS, otherwise
 802  * we'll either hit an assert (debug / fastdebug) or deadlock
 803  * (product). So we should only leave / enter the STS if we are
 804  * operating concurrently.
 805  *
 806  * Because the thread that does the sync barrier has left the STS, it
 807  * is possible to be suspended for a Full GC or an evacuation pause
 808  * could occur. This is actually safe, since the entering the sync
 809  * barrier is one of the last things do_marking_step() does, and it
 810  * doesn't manipulate any data structures afterwards.
 811  */
 812 
 813 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 814   bool barrier_aborted;
 815   {
 816     SuspendibleThreadSetLeaver sts_leave(concurrent());
 817     barrier_aborted = !_first_overflow_barrier_sync.enter();
 818   }
 819 
 820   // at this point everyone should have synced up and not be doing any
 821   // more work
 822 
 823   if (barrier_aborted) {
 824     // If the barrier aborted we ignore the overflow condition and
 825     // just abort the whole marking phase as quickly as possible.
 826     return;
 827   }
 828 
 829   // If we're executing the concurrent phase of marking, reset the marking
 830   // state; otherwise the marking state is reset after reference processing,
 831   // during the remark pause.
 832   // If we reset here as a result of an overflow during the remark we will
 833   // see assertion failures from any subsequent set_concurrency_and_phase()
 834   // calls.
 835   if (concurrent()) {
 836     // let the task associated with with worker 0 do this
 837     if (worker_id == 0) {
 838       // task 0 is responsible for clearing the global data structures
 839       // We should be here because of an overflow. During STW we should
 840       // not clear the overflow flag since we rely on it being true when
 841       // we exit this method to abort the pause and restart concurrent
 842       // marking.
 843       reset_marking_state(true /* clear_overflow */);
 844 
 845       log_info(gc)("Concurrent Mark reset for overflow");
 846     }
 847   }
 848 
 849   // after this, each task should reset its own data structures then
 850   // then go into the second barrier
 851 }
 852 
 853 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 854   SuspendibleThreadSetLeaver sts_leave(concurrent());
 855   _second_overflow_barrier_sync.enter();
 856 
 857   // at this point everything should be re-initialized and ready to go
 858 }
 859 
 860 class CMConcurrentMarkingTask: public AbstractGangTask {
 861 private:
 862   ConcurrentMark*       _cm;
 863   ConcurrentMarkThread* _cmt;
 864 
 865 public:
 866   void work(uint worker_id) {
 867     assert(Thread::current()->is_ConcurrentGC_thread(),
 868            "this should only be done by a conc GC thread");
 869     ResourceMark rm;
 870 
 871     double start_vtime = os::elapsedVTime();
 872 
 873     {
 874       SuspendibleThreadSetJoiner sts_join;
 875 
 876       assert(worker_id < _cm->active_tasks(), "invariant");
 877       CMTask* the_task = _cm->task(worker_id);
 878       the_task->record_start_time();
 879       if (!_cm->has_aborted()) {
 880         do {
 881           double start_vtime_sec = os::elapsedVTime();
 882           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 883 
 884           the_task->do_marking_step(mark_step_duration_ms,
 885                                     true  /* do_termination */,
 886                                     false /* is_serial*/);
 887 
 888           double end_vtime_sec = os::elapsedVTime();
 889           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 890           _cm->clear_has_overflown();
 891 
 892           _cm->do_yield_check(worker_id);
 893 
 894           jlong sleep_time_ms;
 895           if (!_cm->has_aborted() && the_task->has_aborted()) {
 896             sleep_time_ms =
 897               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 898             {
 899               SuspendibleThreadSetLeaver sts_leave;
 900               os::sleep(Thread::current(), sleep_time_ms, false);
 901             }
 902           }
 903         } while (!_cm->has_aborted() && the_task->has_aborted());
 904       }
 905       the_task->record_end_time();
 906       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 907     }
 908 
 909     double end_vtime = os::elapsedVTime();
 910     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 911   }
 912 
 913   CMConcurrentMarkingTask(ConcurrentMark* cm,
 914                           ConcurrentMarkThread* cmt) :
 915       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 916 
 917   ~CMConcurrentMarkingTask() { }
 918 };
 919 
 920 // Calculates the number of active workers for a concurrent
 921 // phase.
 922 uint ConcurrentMark::calc_parallel_marking_threads() {
 923   uint n_conc_workers = 0;
 924   if (!UseDynamicNumberOfGCThreads ||
 925       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 926        !ForceDynamicNumberOfGCThreads)) {
 927     n_conc_workers = max_parallel_marking_threads();
 928   } else {
 929     n_conc_workers =
 930       AdaptiveSizePolicy::calc_default_active_workers(
 931                                    max_parallel_marking_threads(),
 932                                    1, /* Minimum workers */
 933                                    parallel_marking_threads(),
 934                                    Threads::number_of_non_daemon_threads());
 935     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 936     // that scaling has already gone into "_max_parallel_marking_threads".
 937   }
 938   assert(n_conc_workers > 0, "Always need at least 1");
 939   return n_conc_workers;
 940 }
 941 
 942 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 943   // Currently, only survivors can be root regions.
 944   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 945   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 946 
 947   const uintx interval = PrefetchScanIntervalInBytes;
 948   HeapWord* curr = hr->bottom();
 949   const HeapWord* end = hr->top();
 950   while (curr < end) {
 951     Prefetch::read(curr, interval);
 952     oop obj = oop(curr);
 953     int size = obj->oop_iterate_size(&cl);
 954     assert(size == obj->size(), "sanity");
 955     curr += size;
 956   }
 957 }
 958 
 959 class CMRootRegionScanTask : public AbstractGangTask {
 960 private:
 961   ConcurrentMark* _cm;
 962 
 963 public:
 964   CMRootRegionScanTask(ConcurrentMark* cm) :
 965     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 966 
 967   void work(uint worker_id) {
 968     assert(Thread::current()->is_ConcurrentGC_thread(),
 969            "this should only be done by a conc GC thread");
 970 
 971     CMRootRegions* root_regions = _cm->root_regions();
 972     HeapRegion* hr = root_regions->claim_next();
 973     while (hr != NULL) {
 974       _cm->scanRootRegion(hr, worker_id);
 975       hr = root_regions->claim_next();
 976     }
 977   }
 978 };
 979 
 980 void ConcurrentMark::scanRootRegions() {
 981   // Start of concurrent marking.
 982   ClassLoaderDataGraph::clear_claimed_marks();
 983 
 984   // scan_in_progress() will have been set to true only if there was
 985   // at least one root region to scan. So, if it's false, we
 986   // should not attempt to do any further work.
 987   if (root_regions()->scan_in_progress()) {
 988     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 989 
 990     _parallel_marking_threads = calc_parallel_marking_threads();
 991     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 992            "Maximum number of marking threads exceeded");
 993     uint active_workers = MAX2(1U, parallel_marking_threads());
 994 
 995     CMRootRegionScanTask task(this);
 996     _parallel_workers->set_active_workers(active_workers);
 997     _parallel_workers->run_task(&task);
 998 
 999     // It's possible that has_aborted() is true here without actually
1000     // aborting the survivor scan earlier. This is OK as it's
1001     // mainly used for sanity checking.
1002     root_regions()->scan_finished();
1003   }
1004 }
1005 
1006 void ConcurrentMark::markFromRoots() {
1007   // we might be tempted to assert that:
1008   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1009   //        "inconsistent argument?");
1010   // However that wouldn't be right, because it's possible that
1011   // a safepoint is indeed in progress as a younger generation
1012   // stop-the-world GC happens even as we mark in this generation.
1013 
1014   _restart_for_overflow = false;
1015 
1016   // _g1h has _n_par_threads
1017   _parallel_marking_threads = calc_parallel_marking_threads();
1018   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1019     "Maximum number of marking threads exceeded");
1020 
1021   uint active_workers = MAX2(1U, parallel_marking_threads());
1022   assert(active_workers > 0, "Should have been set");
1023 
1024   // Parallel task terminator is set in "set_concurrency_and_phase()"
1025   set_concurrency_and_phase(active_workers, true /* concurrent */);
1026 
1027   CMConcurrentMarkingTask markingTask(this, cmThread());
1028   _parallel_workers->set_active_workers(active_workers);
1029   _parallel_workers->run_task(&markingTask);
1030   print_stats();
1031 }
1032 
1033 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1034   // world is stopped at this checkpoint
1035   assert(SafepointSynchronize::is_at_safepoint(),
1036          "world should be stopped");
1037 
1038   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1039 
1040   // If a full collection has happened, we shouldn't do this.
1041   if (has_aborted()) {
1042     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1043     return;
1044   }
1045 
1046   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1047 
1048   if (VerifyDuringGC) {
1049     HandleMark hm;  // handle scope
1050     g1h->prepare_for_verify();
1051     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1052   }
1053   g1h->check_bitmaps("Remark Start");
1054 
1055   G1CollectorPolicy* g1p = g1h->g1_policy();
1056   g1p->record_concurrent_mark_remark_start();
1057 
1058   double start = os::elapsedTime();
1059 
1060   checkpointRootsFinalWork();
1061 
1062   double mark_work_end = os::elapsedTime();
1063 
1064   weakRefsWork(clear_all_soft_refs);
1065 
1066   if (has_overflown()) {
1067     // Oops.  We overflowed.  Restart concurrent marking.
1068     _restart_for_overflow = true;
1069     log_develop_trace(gc)("Remark led to restart for overflow.");
1070 
1071     // Verify the heap w.r.t. the previous marking bitmap.
1072     if (VerifyDuringGC) {
1073       HandleMark hm;  // handle scope
1074       g1h->prepare_for_verify();
1075       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1076     }
1077 
1078     // Clear the marking state because we will be restarting
1079     // marking due to overflowing the global mark stack.
1080     reset_marking_state();
1081   } else {
1082     {
1083       GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
1084 
1085       // Aggregate the per-task counting data that we have accumulated
1086       // while marking.
1087       aggregate_count_data();
1088     }
1089 
1090     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1091     // We're done with marking.
1092     // This is the end of  the marking cycle, we're expected all
1093     // threads to have SATB queues with active set to true.
1094     satb_mq_set.set_active_all_threads(false, /* new active value */
1095                                        true /* expected_active */);
1096 
1097     if (VerifyDuringGC) {
1098       HandleMark hm;  // handle scope
1099       g1h->prepare_for_verify();
1100       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1101     }
1102     g1h->check_bitmaps("Remark End");
1103     assert(!restart_for_overflow(), "sanity");
1104     // Completely reset the marking state since marking completed
1105     set_non_marking_state();
1106   }
1107 
1108   // Expand the marking stack, if we have to and if we can.
1109   if (_markStack.should_expand()) {
1110     _markStack.expand();
1111   }
1112 
1113   // Statistics
1114   double now = os::elapsedTime();
1115   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1116   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1117   _remark_times.add((now - start) * 1000.0);
1118 
1119   g1p->record_concurrent_mark_remark_end();
1120 
1121   G1CMIsAliveClosure is_alive(g1h);
1122   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1123 }
1124 
1125 // Base class of the closures that finalize and verify the
1126 // liveness counting data.
1127 class CMCountDataClosureBase: public HeapRegionClosure {
1128 protected:
1129   G1CollectedHeap* _g1h;
1130   ConcurrentMark* _cm;
1131   CardTableModRefBS* _ct_bs;
1132 
1133   BitMap* _region_bm;
1134   BitMap* _card_bm;
1135 
1136   // Takes a region that's not empty (i.e., it has at least one
1137   // live object in it and sets its corresponding bit on the region
1138   // bitmap to 1.
1139   void set_bit_for_region(HeapRegion* hr) {
1140     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1141     _region_bm->par_at_put(index, true);
1142   }
1143 
1144 public:
1145   CMCountDataClosureBase(G1CollectedHeap* g1h,
1146                          BitMap* region_bm, BitMap* card_bm):
1147     _g1h(g1h), _cm(g1h->concurrent_mark()),
1148     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1149     _region_bm(region_bm), _card_bm(card_bm) { }
1150 };
1151 
1152 // Closure that calculates the # live objects per region. Used
1153 // for verification purposes during the cleanup pause.
1154 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1155   CMBitMapRO* _bm;
1156   size_t _region_marked_bytes;
1157 
1158 public:
1159   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1160                          BitMap* region_bm, BitMap* card_bm) :
1161     CMCountDataClosureBase(g1h, region_bm, card_bm),
1162     _bm(bm), _region_marked_bytes(0) { }
1163 
1164   bool doHeapRegion(HeapRegion* hr) {
1165     HeapWord* ntams = hr->next_top_at_mark_start();
1166     HeapWord* start = hr->bottom();
1167 
1168     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1169            "Preconditions not met - "
1170            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1171            p2i(start), p2i(ntams), p2i(hr->end()));
1172 
1173     // Find the first marked object at or after "start".
1174     start = _bm->getNextMarkedWordAddress(start, ntams);
1175 
1176     size_t marked_bytes = 0;
1177 
1178     while (start < ntams) {
1179       oop obj = oop(start);
1180       int obj_sz = obj->size();
1181       HeapWord* obj_end = start + obj_sz;
1182 
1183       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1184       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1185 
1186       // Note: if we're looking at the last region in heap - obj_end
1187       // could be actually just beyond the end of the heap; end_idx
1188       // will then correspond to a (non-existent) card that is also
1189       // just beyond the heap.
1190       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1191         // end of object is not card aligned - increment to cover
1192         // all the cards spanned by the object
1193         end_idx += 1;
1194       }
1195 
1196       // Set the bits in the card BM for the cards spanned by this object.
1197       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1198 
1199       // Add the size of this object to the number of marked bytes.
1200       marked_bytes += (size_t)obj_sz * HeapWordSize;
1201 
1202       // This will happen if we are handling a humongous object that spans
1203       // several heap regions.
1204       if (obj_end > hr->end()) {
1205         break;
1206       }
1207       // Find the next marked object after this one.
1208       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1209     }
1210 
1211     // Mark the allocated-since-marking portion...
1212     HeapWord* top = hr->top();
1213     if (ntams < top) {
1214       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1215       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1216 
1217       // Note: if we're looking at the last region in heap - top
1218       // could be actually just beyond the end of the heap; end_idx
1219       // will then correspond to a (non-existent) card that is also
1220       // just beyond the heap.
1221       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1222         // end of object is not card aligned - increment to cover
1223         // all the cards spanned by the object
1224         end_idx += 1;
1225       }
1226       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1227 
1228       // This definitely means the region has live objects.
1229       set_bit_for_region(hr);
1230     }
1231 
1232     // Update the live region bitmap.
1233     if (marked_bytes > 0) {
1234       set_bit_for_region(hr);
1235     }
1236 
1237     // Set the marked bytes for the current region so that
1238     // it can be queried by a calling verification routine
1239     _region_marked_bytes = marked_bytes;
1240 
1241     return false;
1242   }
1243 
1244   size_t region_marked_bytes() const { return _region_marked_bytes; }
1245 };
1246 
1247 // Heap region closure used for verifying the counting data
1248 // that was accumulated concurrently and aggregated during
1249 // the remark pause. This closure is applied to the heap
1250 // regions during the STW cleanup pause.
1251 
1252 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1253   G1CollectedHeap* _g1h;
1254   ConcurrentMark* _cm;
1255   CalcLiveObjectsClosure _calc_cl;
1256   BitMap* _region_bm;   // Region BM to be verified
1257   BitMap* _card_bm;     // Card BM to be verified
1258 
1259   BitMap* _exp_region_bm; // Expected Region BM values
1260   BitMap* _exp_card_bm;   // Expected card BM values
1261 
1262   int _failures;
1263 
1264 public:
1265   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1266                                 BitMap* region_bm,
1267                                 BitMap* card_bm,
1268                                 BitMap* exp_region_bm,
1269                                 BitMap* exp_card_bm) :
1270     _g1h(g1h), _cm(g1h->concurrent_mark()),
1271     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1272     _region_bm(region_bm), _card_bm(card_bm),
1273     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1274     _failures(0) { }
1275 
1276   int failures() const { return _failures; }
1277 
1278   bool doHeapRegion(HeapRegion* hr) {
1279     int failures = 0;
1280 
1281     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1282     // this region and set the corresponding bits in the expected region
1283     // and card bitmaps.
1284     bool res = _calc_cl.doHeapRegion(hr);
1285     assert(res == false, "should be continuing");
1286 
1287     // Verify the marked bytes for this region.
1288     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1289     size_t act_marked_bytes = hr->next_marked_bytes();
1290 
1291     if (exp_marked_bytes > act_marked_bytes) {
1292       if (hr->is_starts_humongous()) {
1293         // For start_humongous regions, the size of the whole object will be
1294         // in exp_marked_bytes.
1295         HeapRegion* region = hr;
1296         int num_regions;
1297         for (num_regions = 0; region != NULL; num_regions++) {
1298           region = _g1h->next_region_in_humongous(region);
1299         }
1300         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1301           failures += 1;
1302         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1303           failures += 1;
1304         }
1305       } else {
1306         // We're not OK if expected marked bytes > actual marked bytes. It means
1307         // we have missed accounting some objects during the actual marking.
1308         failures += 1;
1309       }
1310     }
1311 
1312     // Verify the bit, for this region, in the actual and expected
1313     // (which was just calculated) region bit maps.
1314     // We're not OK if the bit in the calculated expected region
1315     // bitmap is set and the bit in the actual region bitmap is not.
1316     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1317 
1318     bool expected = _exp_region_bm->at(index);
1319     bool actual = _region_bm->at(index);
1320     if (expected && !actual) {
1321       failures += 1;
1322     }
1323 
1324     // Verify that the card bit maps for the cards spanned by the current
1325     // region match. We have an error if we have a set bit in the expected
1326     // bit map and the corresponding bit in the actual bitmap is not set.
1327 
1328     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1329     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1330 
1331     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1332       expected = _exp_card_bm->at(i);
1333       actual = _card_bm->at(i);
1334 
1335       if (expected && !actual) {
1336         failures += 1;
1337       }
1338     }
1339 
1340     _failures += failures;
1341 
1342     // We could stop iteration over the heap when we
1343     // find the first violating region by returning true.
1344     return false;
1345   }
1346 };
1347 
1348 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1349 protected:
1350   G1CollectedHeap* _g1h;
1351   ConcurrentMark* _cm;
1352   BitMap* _actual_region_bm;
1353   BitMap* _actual_card_bm;
1354 
1355   uint    _n_workers;
1356 
1357   BitMap* _expected_region_bm;
1358   BitMap* _expected_card_bm;
1359 
1360   int  _failures;
1361 
1362   HeapRegionClaimer _hrclaimer;
1363 
1364 public:
1365   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1366                             BitMap* region_bm, BitMap* card_bm,
1367                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1368     : AbstractGangTask("G1 verify final counting"),
1369       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1370       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1371       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1372       _failures(0),
1373       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1374     assert(VerifyDuringGC, "don't call this otherwise");
1375     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1376     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1377   }
1378 
1379   void work(uint worker_id) {
1380     assert(worker_id < _n_workers, "invariant");
1381 
1382     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1383                                             _actual_region_bm, _actual_card_bm,
1384                                             _expected_region_bm,
1385                                             _expected_card_bm);
1386 
1387     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1388 
1389     Atomic::add(verify_cl.failures(), &_failures);
1390   }
1391 
1392   int failures() const { return _failures; }
1393 };
1394 
1395 // Closure that finalizes the liveness counting data.
1396 // Used during the cleanup pause.
1397 // Sets the bits corresponding to the interval [NTAMS, top]
1398 // (which contains the implicitly live objects) in the
1399 // card liveness bitmap. Also sets the bit for each region,
1400 // containing live data, in the region liveness bitmap.
1401 
1402 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1403  public:
1404   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1405                               BitMap* region_bm,
1406                               BitMap* card_bm) :
1407     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1408 
1409   bool doHeapRegion(HeapRegion* hr) {
1410     HeapWord* ntams = hr->next_top_at_mark_start();
1411     HeapWord* top   = hr->top();
1412 
1413     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1414 
1415     // Mark the allocated-since-marking portion...
1416     if (ntams < top) {
1417       // This definitely means the region has live objects.
1418       set_bit_for_region(hr);
1419 
1420       // Now set the bits in the card bitmap for [ntams, top)
1421       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1422       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1423 
1424       // Note: if we're looking at the last region in heap - top
1425       // could be actually just beyond the end of the heap; end_idx
1426       // will then correspond to a (non-existent) card that is also
1427       // just beyond the heap.
1428       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1429         // end of object is not card aligned - increment to cover
1430         // all the cards spanned by the object
1431         end_idx += 1;
1432       }
1433 
1434       assert(end_idx <= _card_bm->size(),
1435              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1436              end_idx, _card_bm->size());
1437       assert(start_idx < _card_bm->size(),
1438              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1439              start_idx, _card_bm->size());
1440 
1441       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1442     }
1443 
1444     // Set the bit for the region if it contains live data
1445     if (hr->next_marked_bytes() > 0) {
1446       set_bit_for_region(hr);
1447     }
1448 
1449     return false;
1450   }
1451 };
1452 
1453 class G1ParFinalCountTask: public AbstractGangTask {
1454 protected:
1455   G1CollectedHeap* _g1h;
1456   ConcurrentMark* _cm;
1457   BitMap* _actual_region_bm;
1458   BitMap* _actual_card_bm;
1459 
1460   uint    _n_workers;
1461   HeapRegionClaimer _hrclaimer;
1462 
1463 public:
1464   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1465     : AbstractGangTask("G1 final counting"),
1466       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1467       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1468       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1469   }
1470 
1471   void work(uint worker_id) {
1472     assert(worker_id < _n_workers, "invariant");
1473 
1474     FinalCountDataUpdateClosure final_update_cl(_g1h,
1475                                                 _actual_region_bm,
1476                                                 _actual_card_bm);
1477 
1478     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1479   }
1480 };
1481 
1482 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1483   G1CollectedHeap* _g1;
1484   size_t _freed_bytes;
1485   FreeRegionList* _local_cleanup_list;
1486   HeapRegionSetCount _old_regions_removed;
1487   HeapRegionSetCount _humongous_regions_removed;
1488   HRRSCleanupTask* _hrrs_cleanup_task;
1489 
1490 public:
1491   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1492                              FreeRegionList* local_cleanup_list,
1493                              HRRSCleanupTask* hrrs_cleanup_task) :
1494     _g1(g1),
1495     _freed_bytes(0),
1496     _local_cleanup_list(local_cleanup_list),
1497     _old_regions_removed(),
1498     _humongous_regions_removed(),
1499     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1500 
1501   size_t freed_bytes() { return _freed_bytes; }
1502   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1503   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1504 
1505   bool doHeapRegion(HeapRegion *hr) {
1506     if (hr->is_archive()) {
1507       return false;
1508     }
1509     // We use a claim value of zero here because all regions
1510     // were claimed with value 1 in the FinalCount task.
1511     _g1->reset_gc_time_stamps(hr);
1512     hr->note_end_of_marking();
1513 
1514     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1515       _freed_bytes += hr->used();
1516       hr->set_containing_set(NULL);
1517       if (hr->is_humongous()) {
1518         _humongous_regions_removed.increment(1u, hr->capacity());
1519         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1520       } else {
1521         _old_regions_removed.increment(1u, hr->capacity());
1522         _g1->free_region(hr, _local_cleanup_list, true);
1523       }
1524     } else {
1525       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1526     }
1527 
1528     return false;
1529   }
1530 };
1531 
1532 class G1ParNoteEndTask: public AbstractGangTask {
1533   friend class G1NoteEndOfConcMarkClosure;
1534 
1535 protected:
1536   G1CollectedHeap* _g1h;
1537   FreeRegionList* _cleanup_list;
1538   HeapRegionClaimer _hrclaimer;
1539 
1540 public:
1541   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1542       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1543   }
1544 
1545   void work(uint worker_id) {
1546     FreeRegionList local_cleanup_list("Local Cleanup List");
1547     HRRSCleanupTask hrrs_cleanup_task;
1548     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1549                                            &hrrs_cleanup_task);
1550     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1551     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1552 
1553     // Now update the lists
1554     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1555     {
1556       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1557       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1558 
1559       // If we iterate over the global cleanup list at the end of
1560       // cleanup to do this printing we will not guarantee to only
1561       // generate output for the newly-reclaimed regions (the list
1562       // might not be empty at the beginning of cleanup; we might
1563       // still be working on its previous contents). So we do the
1564       // printing here, before we append the new regions to the global
1565       // cleanup list.
1566 
1567       G1HRPrinter* hr_printer = _g1h->hr_printer();
1568       if (hr_printer->is_active()) {
1569         FreeRegionListIterator iter(&local_cleanup_list);
1570         while (iter.more_available()) {
1571           HeapRegion* hr = iter.get_next();
1572           hr_printer->cleanup(hr);
1573         }
1574       }
1575 
1576       _cleanup_list->add_ordered(&local_cleanup_list);
1577       assert(local_cleanup_list.is_empty(), "post-condition");
1578 
1579       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1580     }
1581   }
1582 };
1583 
1584 class G1ParScrubRemSetTask: public AbstractGangTask {
1585 protected:
1586   G1RemSet* _g1rs;
1587   BitMap* _region_bm;
1588   BitMap* _card_bm;
1589   HeapRegionClaimer _hrclaimer;
1590 
1591 public:
1592   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1593       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1594   }
1595 
1596   void work(uint worker_id) {
1597     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1598   }
1599 
1600 };
1601 
1602 void ConcurrentMark::cleanup() {
1603   // world is stopped at this checkpoint
1604   assert(SafepointSynchronize::is_at_safepoint(),
1605          "world should be stopped");
1606   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1607 
1608   // If a full collection has happened, we shouldn't do this.
1609   if (has_aborted()) {
1610     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1611     return;
1612   }
1613 
1614   g1h->verify_region_sets_optional();
1615 
1616   if (VerifyDuringGC) {
1617     HandleMark hm;  // handle scope
1618     g1h->prepare_for_verify();
1619     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1620   }
1621   g1h->check_bitmaps("Cleanup Start");
1622 
1623   G1CollectorPolicy* g1p = g1h->g1_policy();
1624   g1p->record_concurrent_mark_cleanup_start();
1625 
1626   double start = os::elapsedTime();
1627 
1628   HeapRegionRemSet::reset_for_cleanup_tasks();
1629 
1630   // Do counting once more with the world stopped for good measure.
1631   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1632 
1633   g1h->workers()->run_task(&g1_par_count_task);
1634 
1635   if (VerifyDuringGC) {
1636     // Verify that the counting data accumulated during marking matches
1637     // that calculated by walking the marking bitmap.
1638 
1639     // Bitmaps to hold expected values
1640     BitMap expected_region_bm(_region_bm.size(), true);
1641     BitMap expected_card_bm(_card_bm.size(), true);
1642 
1643     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1644                                                  &_region_bm,
1645                                                  &_card_bm,
1646                                                  &expected_region_bm,
1647                                                  &expected_card_bm);
1648 
1649     g1h->workers()->run_task(&g1_par_verify_task);
1650 
1651     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1652   }
1653 
1654   size_t start_used_bytes = g1h->used();
1655   g1h->collector_state()->set_mark_in_progress(false);
1656 
1657   double count_end = os::elapsedTime();
1658   double this_final_counting_time = (count_end - start);
1659   _total_counting_time += this_final_counting_time;
1660 
1661   if (log_is_enabled(Trace, gc, liveness)) {
1662     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1663     _g1h->heap_region_iterate(&cl);
1664   }
1665 
1666   // Install newly created mark bitMap as "prev".
1667   swapMarkBitMaps();
1668 
1669   g1h->reset_gc_time_stamp();
1670 
1671   uint n_workers = _g1h->workers()->active_workers();
1672 
1673   // Note end of marking in all heap regions.
1674   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1675   g1h->workers()->run_task(&g1_par_note_end_task);
1676   g1h->check_gc_time_stamps();
1677 
1678   if (!cleanup_list_is_empty()) {
1679     // The cleanup list is not empty, so we'll have to process it
1680     // concurrently. Notify anyone else that might be wanting free
1681     // regions that there will be more free regions coming soon.
1682     g1h->set_free_regions_coming();
1683   }
1684 
1685   // call below, since it affects the metric by which we sort the heap
1686   // regions.
1687   if (G1ScrubRemSets) {
1688     double rs_scrub_start = os::elapsedTime();
1689     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1690     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1691 
1692     double rs_scrub_end = os::elapsedTime();
1693     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1694     _total_rs_scrub_time += this_rs_scrub_time;
1695   }
1696 
1697   // this will also free any regions totally full of garbage objects,
1698   // and sort the regions.
1699   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1700 
1701   // Statistics.
1702   double end = os::elapsedTime();
1703   _cleanup_times.add((end - start) * 1000.0);
1704 
1705   // Clean up will have freed any regions completely full of garbage.
1706   // Update the soft reference policy with the new heap occupancy.
1707   Universe::update_heap_info_at_gc();
1708 
1709   if (VerifyDuringGC) {
1710     HandleMark hm;  // handle scope
1711     g1h->prepare_for_verify();
1712     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1713   }
1714 
1715   g1h->check_bitmaps("Cleanup End");
1716 
1717   g1h->verify_region_sets_optional();
1718 
1719   // We need to make this be a "collection" so any collection pause that
1720   // races with it goes around and waits for completeCleanup to finish.
1721   g1h->increment_total_collections();
1722 
1723   // Clean out dead classes and update Metaspace sizes.
1724   if (ClassUnloadingWithConcurrentMark) {
1725     ClassLoaderDataGraph::purge();
1726   }
1727   MetaspaceGC::compute_new_size();
1728 
1729   // We reclaimed old regions so we should calculate the sizes to make
1730   // sure we update the old gen/space data.
1731   g1h->g1mm()->update_sizes();
1732   g1h->allocation_context_stats().update_after_mark();
1733 
1734   g1h->trace_heap_after_concurrent_cycle();
1735 }
1736 
1737 void ConcurrentMark::completeCleanup() {
1738   if (has_aborted()) return;
1739 
1740   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1741 
1742   _cleanup_list.verify_optional();
1743   FreeRegionList tmp_free_list("Tmp Free List");
1744 
1745   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1746                                   "cleanup list has %u entries",
1747                                   _cleanup_list.length());
1748 
1749   // No one else should be accessing the _cleanup_list at this point,
1750   // so it is not necessary to take any locks
1751   while (!_cleanup_list.is_empty()) {
1752     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1753     assert(hr != NULL, "Got NULL from a non-empty list");
1754     hr->par_clear();
1755     tmp_free_list.add_ordered(hr);
1756 
1757     // Instead of adding one region at a time to the secondary_free_list,
1758     // we accumulate them in the local list and move them a few at a
1759     // time. This also cuts down on the number of notify_all() calls
1760     // we do during this process. We'll also append the local list when
1761     // _cleanup_list is empty (which means we just removed the last
1762     // region from the _cleanup_list).
1763     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1764         _cleanup_list.is_empty()) {
1765       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1766                                       "appending %u entries to the secondary_free_list, "
1767                                       "cleanup list still has %u entries",
1768                                       tmp_free_list.length(),
1769                                       _cleanup_list.length());
1770 
1771       {
1772         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1773         g1h->secondary_free_list_add(&tmp_free_list);
1774         SecondaryFreeList_lock->notify_all();
1775       }
1776 #ifndef PRODUCT
1777       if (G1StressConcRegionFreeing) {
1778         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1779           os::sleep(Thread::current(), (jlong) 1, false);
1780         }
1781       }
1782 #endif
1783     }
1784   }
1785   assert(tmp_free_list.is_empty(), "post-condition");
1786 }
1787 
1788 // Supporting Object and Oop closures for reference discovery
1789 // and processing in during marking
1790 
1791 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1792   HeapWord* addr = (HeapWord*)obj;
1793   return addr != NULL &&
1794          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1795 }
1796 
1797 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1798 // Uses the CMTask associated with a worker thread (for serial reference
1799 // processing the CMTask for worker 0 is used) to preserve (mark) and
1800 // trace referent objects.
1801 //
1802 // Using the CMTask and embedded local queues avoids having the worker
1803 // threads operating on the global mark stack. This reduces the risk
1804 // of overflowing the stack - which we would rather avoid at this late
1805 // state. Also using the tasks' local queues removes the potential
1806 // of the workers interfering with each other that could occur if
1807 // operating on the global stack.
1808 
1809 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1810   ConcurrentMark* _cm;
1811   CMTask*         _task;
1812   int             _ref_counter_limit;
1813   int             _ref_counter;
1814   bool            _is_serial;
1815  public:
1816   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1817     _cm(cm), _task(task), _is_serial(is_serial),
1818     _ref_counter_limit(G1RefProcDrainInterval) {
1819     assert(_ref_counter_limit > 0, "sanity");
1820     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1821     _ref_counter = _ref_counter_limit;
1822   }
1823 
1824   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1825   virtual void do_oop(      oop* p) { do_oop_work(p); }
1826 
1827   template <class T> void do_oop_work(T* p) {
1828     if (!_cm->has_overflown()) {
1829       oop obj = oopDesc::load_decode_heap_oop(p);
1830       _task->deal_with_reference(obj);
1831       _ref_counter--;
1832 
1833       if (_ref_counter == 0) {
1834         // We have dealt with _ref_counter_limit references, pushing them
1835         // and objects reachable from them on to the local stack (and
1836         // possibly the global stack). Call CMTask::do_marking_step() to
1837         // process these entries.
1838         //
1839         // We call CMTask::do_marking_step() in a loop, which we'll exit if
1840         // there's nothing more to do (i.e. we're done with the entries that
1841         // were pushed as a result of the CMTask::deal_with_reference() calls
1842         // above) or we overflow.
1843         //
1844         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1845         // flag while there may still be some work to do. (See the comment at
1846         // the beginning of CMTask::do_marking_step() for those conditions -
1847         // one of which is reaching the specified time target.) It is only
1848         // when CMTask::do_marking_step() returns without setting the
1849         // has_aborted() flag that the marking step has completed.
1850         do {
1851           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1852           _task->do_marking_step(mark_step_duration_ms,
1853                                  false      /* do_termination */,
1854                                  _is_serial);
1855         } while (_task->has_aborted() && !_cm->has_overflown());
1856         _ref_counter = _ref_counter_limit;
1857       }
1858     }
1859   }
1860 };
1861 
1862 // 'Drain' oop closure used by both serial and parallel reference processing.
1863 // Uses the CMTask associated with a given worker thread (for serial
1864 // reference processing the CMtask for worker 0 is used). Calls the
1865 // do_marking_step routine, with an unbelievably large timeout value,
1866 // to drain the marking data structures of the remaining entries
1867 // added by the 'keep alive' oop closure above.
1868 
1869 class G1CMDrainMarkingStackClosure: public VoidClosure {
1870   ConcurrentMark* _cm;
1871   CMTask*         _task;
1872   bool            _is_serial;
1873  public:
1874   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
1875     _cm(cm), _task(task), _is_serial(is_serial) {
1876     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1877   }
1878 
1879   void do_void() {
1880     do {
1881       // We call CMTask::do_marking_step() to completely drain the local
1882       // and global marking stacks of entries pushed by the 'keep alive'
1883       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1884       //
1885       // CMTask::do_marking_step() is called in a loop, which we'll exit
1886       // if there's nothing more to do (i.e. we've completely drained the
1887       // entries that were pushed as a a result of applying the 'keep alive'
1888       // closure to the entries on the discovered ref lists) or we overflow
1889       // the global marking stack.
1890       //
1891       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
1892       // flag while there may still be some work to do. (See the comment at
1893       // the beginning of CMTask::do_marking_step() for those conditions -
1894       // one of which is reaching the specified time target.) It is only
1895       // when CMTask::do_marking_step() returns without setting the
1896       // has_aborted() flag that the marking step has completed.
1897 
1898       _task->do_marking_step(1000000000.0 /* something very large */,
1899                              true         /* do_termination */,
1900                              _is_serial);
1901     } while (_task->has_aborted() && !_cm->has_overflown());
1902   }
1903 };
1904 
1905 // Implementation of AbstractRefProcTaskExecutor for parallel
1906 // reference processing at the end of G1 concurrent marking
1907 
1908 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1909 private:
1910   G1CollectedHeap* _g1h;
1911   ConcurrentMark*  _cm;
1912   WorkGang*        _workers;
1913   uint             _active_workers;
1914 
1915 public:
1916   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1917                           ConcurrentMark* cm,
1918                           WorkGang* workers,
1919                           uint n_workers) :
1920     _g1h(g1h), _cm(cm),
1921     _workers(workers), _active_workers(n_workers) { }
1922 
1923   // Executes the given task using concurrent marking worker threads.
1924   virtual void execute(ProcessTask& task);
1925   virtual void execute(EnqueueTask& task);
1926 };
1927 
1928 class G1CMRefProcTaskProxy: public AbstractGangTask {
1929   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1930   ProcessTask&     _proc_task;
1931   G1CollectedHeap* _g1h;
1932   ConcurrentMark*  _cm;
1933 
1934 public:
1935   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1936                      G1CollectedHeap* g1h,
1937                      ConcurrentMark* cm) :
1938     AbstractGangTask("Process reference objects in parallel"),
1939     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1940     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1941     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1942   }
1943 
1944   virtual void work(uint worker_id) {
1945     ResourceMark rm;
1946     HandleMark hm;
1947     CMTask* task = _cm->task(worker_id);
1948     G1CMIsAliveClosure g1_is_alive(_g1h);
1949     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1950     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1951 
1952     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1953   }
1954 };
1955 
1956 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1957   assert(_workers != NULL, "Need parallel worker threads.");
1958   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1959 
1960   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1961 
1962   // We need to reset the concurrency level before each
1963   // proxy task execution, so that the termination protocol
1964   // and overflow handling in CMTask::do_marking_step() knows
1965   // how many workers to wait for.
1966   _cm->set_concurrency(_active_workers);
1967   _workers->run_task(&proc_task_proxy);
1968 }
1969 
1970 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1971   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1972   EnqueueTask& _enq_task;
1973 
1974 public:
1975   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1976     AbstractGangTask("Enqueue reference objects in parallel"),
1977     _enq_task(enq_task) { }
1978 
1979   virtual void work(uint worker_id) {
1980     _enq_task.work(worker_id);
1981   }
1982 };
1983 
1984 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1985   assert(_workers != NULL, "Need parallel worker threads.");
1986   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1987 
1988   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1989 
1990   // Not strictly necessary but...
1991   //
1992   // We need to reset the concurrency level before each
1993   // proxy task execution, so that the termination protocol
1994   // and overflow handling in CMTask::do_marking_step() knows
1995   // how many workers to wait for.
1996   _cm->set_concurrency(_active_workers);
1997   _workers->run_task(&enq_task_proxy);
1998 }
1999 
2000 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2001   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2002 }
2003 
2004 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2005   if (has_overflown()) {
2006     // Skip processing the discovered references if we have
2007     // overflown the global marking stack. Reference objects
2008     // only get discovered once so it is OK to not
2009     // de-populate the discovered reference lists. We could have,
2010     // but the only benefit would be that, when marking restarts,
2011     // less reference objects are discovered.
2012     return;
2013   }
2014 
2015   ResourceMark rm;
2016   HandleMark   hm;
2017 
2018   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2019 
2020   // Is alive closure.
2021   G1CMIsAliveClosure g1_is_alive(g1h);
2022 
2023   // Inner scope to exclude the cleaning of the string and symbol
2024   // tables from the displayed time.
2025   {
2026     GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
2027 
2028     ReferenceProcessor* rp = g1h->ref_processor_cm();
2029 
2030     // See the comment in G1CollectedHeap::ref_processing_init()
2031     // about how reference processing currently works in G1.
2032 
2033     // Set the soft reference policy
2034     rp->setup_policy(clear_all_soft_refs);
2035     assert(_markStack.isEmpty(), "mark stack should be empty");
2036 
2037     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2038     // in serial reference processing. Note these closures are also
2039     // used for serially processing (by the the current thread) the
2040     // JNI references during parallel reference processing.
2041     //
2042     // These closures do not need to synchronize with the worker
2043     // threads involved in parallel reference processing as these
2044     // instances are executed serially by the current thread (e.g.
2045     // reference processing is not multi-threaded and is thus
2046     // performed by the current thread instead of a gang worker).
2047     //
2048     // The gang tasks involved in parallel reference processing create
2049     // their own instances of these closures, which do their own
2050     // synchronization among themselves.
2051     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2052     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2053 
2054     // We need at least one active thread. If reference processing
2055     // is not multi-threaded we use the current (VMThread) thread,
2056     // otherwise we use the work gang from the G1CollectedHeap and
2057     // we utilize all the worker threads we can.
2058     bool processing_is_mt = rp->processing_is_mt();
2059     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2060     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2061 
2062     // Parallel processing task executor.
2063     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2064                                               g1h->workers(), active_workers);
2065     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2066 
2067     // Set the concurrency level. The phase was already set prior to
2068     // executing the remark task.
2069     set_concurrency(active_workers);
2070 
2071     // Set the degree of MT processing here.  If the discovery was done MT,
2072     // the number of threads involved during discovery could differ from
2073     // the number of active workers.  This is OK as long as the discovered
2074     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2075     rp->set_active_mt_degree(active_workers);
2076 
2077     // Process the weak references.
2078     const ReferenceProcessorStats& stats =
2079         rp->process_discovered_references(&g1_is_alive,
2080                                           &g1_keep_alive,
2081                                           &g1_drain_mark_stack,
2082                                           executor,
2083                                           g1h->gc_timer_cm());
2084     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2085 
2086     // The do_oop work routines of the keep_alive and drain_marking_stack
2087     // oop closures will set the has_overflown flag if we overflow the
2088     // global marking stack.
2089 
2090     assert(_markStack.overflow() || _markStack.isEmpty(),
2091             "mark stack should be empty (unless it overflowed)");
2092 
2093     if (_markStack.overflow()) {
2094       // This should have been done already when we tried to push an
2095       // entry on to the global mark stack. But let's do it again.
2096       set_has_overflown();
2097     }
2098 
2099     assert(rp->num_q() == active_workers, "why not");
2100 
2101     rp->enqueue_discovered_references(executor);
2102 
2103     rp->verify_no_references_recorded();
2104     assert(!rp->discovery_enabled(), "Post condition");
2105   }
2106 
2107   if (has_overflown()) {
2108     // We can not trust g1_is_alive if the marking stack overflowed
2109     return;
2110   }
2111 
2112   assert(_markStack.isEmpty(), "Marking should have completed");
2113 
2114   // Unload Klasses, String, Symbols, Code Cache, etc.
2115   {
2116     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2117 
2118     if (ClassUnloadingWithConcurrentMark) {
2119       bool purged_classes;
2120 
2121       {
2122         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2123         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2124       }
2125 
2126       {
2127         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2128         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2129       }
2130     }
2131 
2132     if (G1StringDedup::is_enabled()) {
2133       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2134       G1StringDedup::unlink(&g1_is_alive);
2135     }
2136   }
2137 }
2138 
2139 void ConcurrentMark::swapMarkBitMaps() {
2140   CMBitMapRO* temp = _prevMarkBitMap;
2141   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2142   _nextMarkBitMap  = (CMBitMap*)  temp;
2143 }
2144 
2145 // Closure for marking entries in SATB buffers.
2146 class CMSATBBufferClosure : public SATBBufferClosure {
2147 private:
2148   CMTask* _task;
2149   G1CollectedHeap* _g1h;
2150 
2151   // This is very similar to CMTask::deal_with_reference, but with
2152   // more relaxed requirements for the argument, so this must be more
2153   // circumspect about treating the argument as an object.
2154   void do_entry(void* entry) const {
2155     _task->increment_refs_reached();
2156     HeapRegion* hr = _g1h->heap_region_containing(entry);
2157     if (entry < hr->next_top_at_mark_start()) {
2158       // Until we get here, we don't know whether entry refers to a valid
2159       // object; it could instead have been a stale reference.
2160       oop obj = static_cast<oop>(entry);
2161       assert(obj->is_oop(true /* ignore mark word */),
2162              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2163       _task->make_reference_grey(obj, hr);
2164     }
2165   }
2166 
2167 public:
2168   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2169     : _task(task), _g1h(g1h) { }
2170 
2171   virtual void do_buffer(void** buffer, size_t size) {
2172     for (size_t i = 0; i < size; ++i) {
2173       do_entry(buffer[i]);
2174     }
2175   }
2176 };
2177 
2178 class G1RemarkThreadsClosure : public ThreadClosure {
2179   CMSATBBufferClosure _cm_satb_cl;
2180   G1CMOopClosure _cm_cl;
2181   MarkingCodeBlobClosure _code_cl;
2182   int _thread_parity;
2183 
2184  public:
2185   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2186     _cm_satb_cl(task, g1h),
2187     _cm_cl(g1h, g1h->concurrent_mark(), task),
2188     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2189     _thread_parity(Threads::thread_claim_parity()) {}
2190 
2191   void do_thread(Thread* thread) {
2192     if (thread->is_Java_thread()) {
2193       if (thread->claim_oops_do(true, _thread_parity)) {
2194         JavaThread* jt = (JavaThread*)thread;
2195 
2196         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2197         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2198         // * Alive if on the stack of an executing method
2199         // * Weakly reachable otherwise
2200         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2201         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2202         jt->nmethods_do(&_code_cl);
2203 
2204         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2205       }
2206     } else if (thread->is_VM_thread()) {
2207       if (thread->claim_oops_do(true, _thread_parity)) {
2208         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2209       }
2210     }
2211   }
2212 };
2213 
2214 class CMRemarkTask: public AbstractGangTask {
2215 private:
2216   ConcurrentMark* _cm;
2217 public:
2218   void work(uint worker_id) {
2219     // Since all available tasks are actually started, we should
2220     // only proceed if we're supposed to be active.
2221     if (worker_id < _cm->active_tasks()) {
2222       CMTask* task = _cm->task(worker_id);
2223       task->record_start_time();
2224       {
2225         ResourceMark rm;
2226         HandleMark hm;
2227 
2228         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2229         Threads::threads_do(&threads_f);
2230       }
2231 
2232       do {
2233         task->do_marking_step(1000000000.0 /* something very large */,
2234                               true         /* do_termination       */,
2235                               false        /* is_serial            */);
2236       } while (task->has_aborted() && !_cm->has_overflown());
2237       // If we overflow, then we do not want to restart. We instead
2238       // want to abort remark and do concurrent marking again.
2239       task->record_end_time();
2240     }
2241   }
2242 
2243   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2244     AbstractGangTask("Par Remark"), _cm(cm) {
2245     _cm->terminator()->reset_for_reuse(active_workers);
2246   }
2247 };
2248 
2249 void ConcurrentMark::checkpointRootsFinalWork() {
2250   ResourceMark rm;
2251   HandleMark   hm;
2252   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2253 
2254   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2255 
2256   g1h->ensure_parsability(false);
2257 
2258   // this is remark, so we'll use up all active threads
2259   uint active_workers = g1h->workers()->active_workers();
2260   set_concurrency_and_phase(active_workers, false /* concurrent */);
2261   // Leave _parallel_marking_threads at it's
2262   // value originally calculated in the ConcurrentMark
2263   // constructor and pass values of the active workers
2264   // through the gang in the task.
2265 
2266   {
2267     StrongRootsScope srs(active_workers);
2268 
2269     CMRemarkTask remarkTask(this, active_workers);
2270     // We will start all available threads, even if we decide that the
2271     // active_workers will be fewer. The extra ones will just bail out
2272     // immediately.
2273     g1h->workers()->run_task(&remarkTask);
2274   }
2275 
2276   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2277   guarantee(has_overflown() ||
2278             satb_mq_set.completed_buffers_num() == 0,
2279             "Invariant: has_overflown = %s, num buffers = %d",
2280             BOOL_TO_STR(has_overflown()),
2281             satb_mq_set.completed_buffers_num());
2282 
2283   print_stats();
2284 }
2285 
2286 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2287   // Note we are overriding the read-only view of the prev map here, via
2288   // the cast.
2289   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2290 }
2291 
2292 HeapRegion*
2293 ConcurrentMark::claim_region(uint worker_id) {
2294   // "checkpoint" the finger
2295   HeapWord* finger = _finger;
2296 
2297   // _heap_end will not change underneath our feet; it only changes at
2298   // yield points.
2299   while (finger < _heap_end) {
2300     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2301 
2302     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2303 
2304     // Above heap_region_containing may return NULL as we always scan claim
2305     // until the end of the heap. In this case, just jump to the next region.
2306     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2307 
2308     // Is the gap between reading the finger and doing the CAS too long?
2309     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2310     if (res == finger && curr_region != NULL) {
2311       // we succeeded
2312       HeapWord*   bottom        = curr_region->bottom();
2313       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2314 
2315       // notice that _finger == end cannot be guaranteed here since,
2316       // someone else might have moved the finger even further
2317       assert(_finger >= end, "the finger should have moved forward");
2318 
2319       if (limit > bottom) {
2320         return curr_region;
2321       } else {
2322         assert(limit == bottom,
2323                "the region limit should be at bottom");
2324         // we return NULL and the caller should try calling
2325         // claim_region() again.
2326         return NULL;
2327       }
2328     } else {
2329       assert(_finger > finger, "the finger should have moved forward");
2330       // read it again
2331       finger = _finger;
2332     }
2333   }
2334 
2335   return NULL;
2336 }
2337 
2338 #ifndef PRODUCT
2339 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2340 private:
2341   G1CollectedHeap* _g1h;
2342   const char* _phase;
2343   int _info;
2344 
2345 public:
2346   VerifyNoCSetOops(const char* phase, int info = -1) :
2347     _g1h(G1CollectedHeap::heap()),
2348     _phase(phase),
2349     _info(info)
2350   { }
2351 
2352   void operator()(oop obj) const {
2353     guarantee(obj->is_oop(),
2354               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2355               p2i(obj), _phase, _info);
2356     guarantee(!_g1h->obj_in_cs(obj),
2357               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2358               p2i(obj), _phase, _info);
2359   }
2360 };
2361 
2362 void ConcurrentMark::verify_no_cset_oops() {
2363   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2364   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2365     return;
2366   }
2367 
2368   // Verify entries on the global mark stack
2369   _markStack.iterate(VerifyNoCSetOops("Stack"));
2370 
2371   // Verify entries on the task queues
2372   for (uint i = 0; i < _max_worker_id; ++i) {
2373     CMTaskQueue* queue = _task_queues->queue(i);
2374     queue->iterate(VerifyNoCSetOops("Queue", i));
2375   }
2376 
2377   // Verify the global finger
2378   HeapWord* global_finger = finger();
2379   if (global_finger != NULL && global_finger < _heap_end) {
2380     // Since we always iterate over all regions, we might get a NULL HeapRegion
2381     // here.
2382     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2383     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2384               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2385               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2386   }
2387 
2388   // Verify the task fingers
2389   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2390   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2391     CMTask* task = _tasks[i];
2392     HeapWord* task_finger = task->finger();
2393     if (task_finger != NULL && task_finger < _heap_end) {
2394       // See above note on the global finger verification.
2395       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2396       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2397                 !task_hr->in_collection_set(),
2398                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2399                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2400     }
2401   }
2402 }
2403 #endif // PRODUCT
2404 
2405 // Aggregate the counting data that was constructed concurrently
2406 // with marking.
2407 class AggregateCountDataHRClosure: public HeapRegionClosure {
2408   G1CollectedHeap* _g1h;
2409   ConcurrentMark* _cm;
2410   CardTableModRefBS* _ct_bs;
2411   BitMap* _cm_card_bm;
2412   uint _max_worker_id;
2413 
2414  public:
2415   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2416                               BitMap* cm_card_bm,
2417                               uint max_worker_id) :
2418     _g1h(g1h), _cm(g1h->concurrent_mark()),
2419     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2420     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2421 
2422   bool doHeapRegion(HeapRegion* hr) {
2423     HeapWord* start = hr->bottom();
2424     HeapWord* limit = hr->next_top_at_mark_start();
2425     HeapWord* end = hr->end();
2426 
2427     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2428            "Preconditions not met - "
2429            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2430            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2431            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2432 
2433     assert(hr->next_marked_bytes() == 0, "Precondition");
2434 
2435     if (start == limit) {
2436       // NTAMS of this region has not been set so nothing to do.
2437       return false;
2438     }
2439 
2440     // 'start' should be in the heap.
2441     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2442     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2443     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2444 
2445     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2446     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2447     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2448 
2449     // If ntams is not card aligned then we bump card bitmap index
2450     // for limit so that we get the all the cards spanned by
2451     // the object ending at ntams.
2452     // Note: if this is the last region in the heap then ntams
2453     // could be actually just beyond the end of the the heap;
2454     // limit_idx will then  correspond to a (non-existent) card
2455     // that is also outside the heap.
2456     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2457       limit_idx += 1;
2458     }
2459 
2460     assert(limit_idx <= end_idx, "or else use atomics");
2461 
2462     // Aggregate the "stripe" in the count data associated with hr.
2463     uint hrm_index = hr->hrm_index();
2464     size_t marked_bytes = 0;
2465 
2466     for (uint i = 0; i < _max_worker_id; i += 1) {
2467       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2468       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2469 
2470       // Fetch the marked_bytes in this region for task i and
2471       // add it to the running total for this region.
2472       marked_bytes += marked_bytes_array[hrm_index];
2473 
2474       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2475       // into the global card bitmap.
2476       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2477 
2478       while (scan_idx < limit_idx) {
2479         assert(task_card_bm->at(scan_idx) == true, "should be");
2480         _cm_card_bm->set_bit(scan_idx);
2481         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2482 
2483         // BitMap::get_next_one_offset() can handle the case when
2484         // its left_offset parameter is greater than its right_offset
2485         // parameter. It does, however, have an early exit if
2486         // left_offset == right_offset. So let's limit the value
2487         // passed in for left offset here.
2488         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2489         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2490       }
2491     }
2492 
2493     // Update the marked bytes for this region.
2494     hr->add_to_marked_bytes(marked_bytes);
2495 
2496     // Next heap region
2497     return false;
2498   }
2499 };
2500 
2501 class G1AggregateCountDataTask: public AbstractGangTask {
2502 protected:
2503   G1CollectedHeap* _g1h;
2504   ConcurrentMark* _cm;
2505   BitMap* _cm_card_bm;
2506   uint _max_worker_id;
2507   uint _active_workers;
2508   HeapRegionClaimer _hrclaimer;
2509 
2510 public:
2511   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2512                            ConcurrentMark* cm,
2513                            BitMap* cm_card_bm,
2514                            uint max_worker_id,
2515                            uint n_workers) :
2516       AbstractGangTask("Count Aggregation"),
2517       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2518       _max_worker_id(max_worker_id),
2519       _active_workers(n_workers),
2520       _hrclaimer(_active_workers) {
2521   }
2522 
2523   void work(uint worker_id) {
2524     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2525 
2526     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2527   }
2528 };
2529 
2530 
2531 void ConcurrentMark::aggregate_count_data() {
2532   uint n_workers = _g1h->workers()->active_workers();
2533 
2534   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2535                                            _max_worker_id, n_workers);
2536 
2537   _g1h->workers()->run_task(&g1_par_agg_task);
2538 }
2539 
2540 // Clear the per-worker arrays used to store the per-region counting data
2541 void ConcurrentMark::clear_all_count_data() {
2542   // Clear the global card bitmap - it will be filled during
2543   // liveness count aggregation (during remark) and the
2544   // final counting task.
2545   _card_bm.clear();
2546 
2547   // Clear the global region bitmap - it will be filled as part
2548   // of the final counting task.
2549   _region_bm.clear();
2550 
2551   uint max_regions = _g1h->max_regions();
2552   assert(_max_worker_id > 0, "uninitialized");
2553 
2554   for (uint i = 0; i < _max_worker_id; i += 1) {
2555     BitMap* task_card_bm = count_card_bitmap_for(i);
2556     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2557 
2558     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2559     assert(marked_bytes_array != NULL, "uninitialized");
2560 
2561     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2562     task_card_bm->clear();
2563   }
2564 }
2565 
2566 void ConcurrentMark::print_stats() {
2567   if (!log_is_enabled(Debug, gc, stats)) {
2568     return;
2569   }
2570   log_debug(gc, stats)("---------------------------------------------------------------------");
2571   for (size_t i = 0; i < _active_tasks; ++i) {
2572     _tasks[i]->print_stats();
2573     log_debug(gc, stats)("---------------------------------------------------------------------");
2574   }
2575 }
2576 
2577 // abandon current marking iteration due to a Full GC
2578 void ConcurrentMark::abort() {
2579   if (!cmThread()->during_cycle() || _has_aborted) {
2580     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2581     return;
2582   }
2583 
2584   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2585   // concurrent bitmap clearing.
2586   _nextMarkBitMap->clearAll();
2587 
2588   // Note we cannot clear the previous marking bitmap here
2589   // since VerifyDuringGC verifies the objects marked during
2590   // a full GC against the previous bitmap.
2591 
2592   // Clear the liveness counting data
2593   clear_all_count_data();
2594   // Empty mark stack
2595   reset_marking_state();
2596   for (uint i = 0; i < _max_worker_id; ++i) {
2597     _tasks[i]->clear_region_fields();
2598   }
2599   _first_overflow_barrier_sync.abort();
2600   _second_overflow_barrier_sync.abort();
2601   _has_aborted = true;
2602 
2603   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2604   satb_mq_set.abandon_partial_marking();
2605   // This can be called either during or outside marking, we'll read
2606   // the expected_active value from the SATB queue set.
2607   satb_mq_set.set_active_all_threads(
2608                                  false, /* new active value */
2609                                  satb_mq_set.is_active() /* expected_active */);
2610 
2611   _g1h->trace_heap_after_concurrent_cycle();
2612   _g1h->register_concurrent_cycle_end();
2613 }
2614 
2615 static void print_ms_time_info(const char* prefix, const char* name,
2616                                NumberSeq& ns) {
2617   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2618                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2619   if (ns.num() > 0) {
2620     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2621                            prefix, ns.sd(), ns.maximum());
2622   }
2623 }
2624 
2625 void ConcurrentMark::print_summary_info() {
2626   LogHandle(gc, marking) log;
2627   if (!log.is_trace()) {
2628     return;
2629   }
2630 
2631   log.trace(" Concurrent marking:");
2632   print_ms_time_info("  ", "init marks", _init_times);
2633   print_ms_time_info("  ", "remarks", _remark_times);
2634   {
2635     print_ms_time_info("     ", "final marks", _remark_mark_times);
2636     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2637 
2638   }
2639   print_ms_time_info("  ", "cleanups", _cleanup_times);
2640   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2641             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2642   if (G1ScrubRemSets) {
2643     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2644               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2645   }
2646   log.trace("  Total stop_world time = %8.2f s.",
2647             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2648   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2649             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2650 }
2651 
2652 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2653   _parallel_workers->print_worker_threads_on(st);
2654 }
2655 
2656 void ConcurrentMark::print_on_error(outputStream* st) const {
2657   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2658       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2659   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2660   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2661 }
2662 
2663 // We take a break if someone is trying to stop the world.
2664 bool ConcurrentMark::do_yield_check(uint worker_id) {
2665   if (SuspendibleThreadSet::should_yield()) {
2666     if (worker_id == 0) {
2667       _g1h->g1_policy()->record_concurrent_pause();
2668     }
2669     SuspendibleThreadSet::yield();
2670     return true;
2671   } else {
2672     return false;
2673   }
2674 }
2675 
2676 // Closure for iteration over bitmaps
2677 class CMBitMapClosure : public BitMapClosure {
2678 private:
2679   // the bitmap that is being iterated over
2680   CMBitMap*                   _nextMarkBitMap;
2681   ConcurrentMark*             _cm;
2682   CMTask*                     _task;
2683 
2684 public:
2685   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2686     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2687 
2688   bool do_bit(size_t offset) {
2689     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2690     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2691     assert( addr < _cm->finger(), "invariant");
2692     assert(addr >= _task->finger(), "invariant");
2693 
2694     // We move that task's local finger along.
2695     _task->move_finger_to(addr);
2696 
2697     _task->scan_object(oop(addr));
2698     // we only partially drain the local queue and global stack
2699     _task->drain_local_queue(true);
2700     _task->drain_global_stack(true);
2701 
2702     // if the has_aborted flag has been raised, we need to bail out of
2703     // the iteration
2704     return !_task->has_aborted();
2705   }
2706 };
2707 
2708 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2709   ReferenceProcessor* result = NULL;
2710   if (G1UseConcMarkReferenceProcessing) {
2711     result = g1h->ref_processor_cm();
2712     assert(result != NULL, "should not be NULL");
2713   }
2714   return result;
2715 }
2716 
2717 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2718                                ConcurrentMark* cm,
2719                                CMTask* task)
2720   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2721     _g1h(g1h), _cm(cm), _task(task)
2722 { }
2723 
2724 void CMTask::setup_for_region(HeapRegion* hr) {
2725   assert(hr != NULL,
2726         "claim_region() should have filtered out NULL regions");
2727   _curr_region  = hr;
2728   _finger       = hr->bottom();
2729   update_region_limit();
2730 }
2731 
2732 void CMTask::update_region_limit() {
2733   HeapRegion* hr            = _curr_region;
2734   HeapWord* bottom          = hr->bottom();
2735   HeapWord* limit           = hr->next_top_at_mark_start();
2736 
2737   if (limit == bottom) {
2738     // The region was collected underneath our feet.
2739     // We set the finger to bottom to ensure that the bitmap
2740     // iteration that will follow this will not do anything.
2741     // (this is not a condition that holds when we set the region up,
2742     // as the region is not supposed to be empty in the first place)
2743     _finger = bottom;
2744   } else if (limit >= _region_limit) {
2745     assert(limit >= _finger, "peace of mind");
2746   } else {
2747     assert(limit < _region_limit, "only way to get here");
2748     // This can happen under some pretty unusual circumstances.  An
2749     // evacuation pause empties the region underneath our feet (NTAMS
2750     // at bottom). We then do some allocation in the region (NTAMS
2751     // stays at bottom), followed by the region being used as a GC
2752     // alloc region (NTAMS will move to top() and the objects
2753     // originally below it will be grayed). All objects now marked in
2754     // the region are explicitly grayed, if below the global finger,
2755     // and we do not need in fact to scan anything else. So, we simply
2756     // set _finger to be limit to ensure that the bitmap iteration
2757     // doesn't do anything.
2758     _finger = limit;
2759   }
2760 
2761   _region_limit = limit;
2762 }
2763 
2764 void CMTask::giveup_current_region() {
2765   assert(_curr_region != NULL, "invariant");
2766   clear_region_fields();
2767 }
2768 
2769 void CMTask::clear_region_fields() {
2770   // Values for these three fields that indicate that we're not
2771   // holding on to a region.
2772   _curr_region   = NULL;
2773   _finger        = NULL;
2774   _region_limit  = NULL;
2775 }
2776 
2777 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2778   if (cm_oop_closure == NULL) {
2779     assert(_cm_oop_closure != NULL, "invariant");
2780   } else {
2781     assert(_cm_oop_closure == NULL, "invariant");
2782   }
2783   _cm_oop_closure = cm_oop_closure;
2784 }
2785 
2786 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2787   guarantee(nextMarkBitMap != NULL, "invariant");
2788   _nextMarkBitMap                = nextMarkBitMap;
2789   clear_region_fields();
2790 
2791   _calls                         = 0;
2792   _elapsed_time_ms               = 0.0;
2793   _termination_time_ms           = 0.0;
2794   _termination_start_time_ms     = 0.0;
2795 }
2796 
2797 bool CMTask::should_exit_termination() {
2798   regular_clock_call();
2799   // This is called when we are in the termination protocol. We should
2800   // quit if, for some reason, this task wants to abort or the global
2801   // stack is not empty (this means that we can get work from it).
2802   return !_cm->mark_stack_empty() || has_aborted();
2803 }
2804 
2805 void CMTask::reached_limit() {
2806   assert(_words_scanned >= _words_scanned_limit ||
2807          _refs_reached >= _refs_reached_limit ,
2808          "shouldn't have been called otherwise");
2809   regular_clock_call();
2810 }
2811 
2812 void CMTask::regular_clock_call() {
2813   if (has_aborted()) return;
2814 
2815   // First, we need to recalculate the words scanned and refs reached
2816   // limits for the next clock call.
2817   recalculate_limits();
2818 
2819   // During the regular clock call we do the following
2820 
2821   // (1) If an overflow has been flagged, then we abort.
2822   if (_cm->has_overflown()) {
2823     set_has_aborted();
2824     return;
2825   }
2826 
2827   // If we are not concurrent (i.e. we're doing remark) we don't need
2828   // to check anything else. The other steps are only needed during
2829   // the concurrent marking phase.
2830   if (!concurrent()) return;
2831 
2832   // (2) If marking has been aborted for Full GC, then we also abort.
2833   if (_cm->has_aborted()) {
2834     set_has_aborted();
2835     return;
2836   }
2837 
2838   double curr_time_ms = os::elapsedVTime() * 1000.0;
2839 
2840   // (4) We check whether we should yield. If we have to, then we abort.
2841   if (SuspendibleThreadSet::should_yield()) {
2842     // We should yield. To do this we abort the task. The caller is
2843     // responsible for yielding.
2844     set_has_aborted();
2845     return;
2846   }
2847 
2848   // (5) We check whether we've reached our time quota. If we have,
2849   // then we abort.
2850   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2851   if (elapsed_time_ms > _time_target_ms) {
2852     set_has_aborted();
2853     _has_timed_out = true;
2854     return;
2855   }
2856 
2857   // (6) Finally, we check whether there are enough completed STAB
2858   // buffers available for processing. If there are, we abort.
2859   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2860   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2861     // we do need to process SATB buffers, we'll abort and restart
2862     // the marking task to do so
2863     set_has_aborted();
2864     return;
2865   }
2866 }
2867 
2868 void CMTask::recalculate_limits() {
2869   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2870   _words_scanned_limit      = _real_words_scanned_limit;
2871 
2872   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2873   _refs_reached_limit       = _real_refs_reached_limit;
2874 }
2875 
2876 void CMTask::decrease_limits() {
2877   // This is called when we believe that we're going to do an infrequent
2878   // operation which will increase the per byte scanned cost (i.e. move
2879   // entries to/from the global stack). It basically tries to decrease the
2880   // scanning limit so that the clock is called earlier.
2881 
2882   _words_scanned_limit = _real_words_scanned_limit -
2883     3 * words_scanned_period / 4;
2884   _refs_reached_limit  = _real_refs_reached_limit -
2885     3 * refs_reached_period / 4;
2886 }
2887 
2888 void CMTask::move_entries_to_global_stack() {
2889   // local array where we'll store the entries that will be popped
2890   // from the local queue
2891   oop buffer[global_stack_transfer_size];
2892 
2893   int n = 0;
2894   oop obj;
2895   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2896     buffer[n] = obj;
2897     ++n;
2898   }
2899 
2900   if (n > 0) {
2901     // we popped at least one entry from the local queue
2902 
2903     if (!_cm->mark_stack_push(buffer, n)) {
2904       set_has_aborted();
2905     }
2906   }
2907 
2908   // this operation was quite expensive, so decrease the limits
2909   decrease_limits();
2910 }
2911 
2912 void CMTask::get_entries_from_global_stack() {
2913   // local array where we'll store the entries that will be popped
2914   // from the global stack.
2915   oop buffer[global_stack_transfer_size];
2916   int n;
2917   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2918   assert(n <= global_stack_transfer_size,
2919          "we should not pop more than the given limit");
2920   if (n > 0) {
2921     // yes, we did actually pop at least one entry
2922     for (int i = 0; i < n; ++i) {
2923       bool success = _task_queue->push(buffer[i]);
2924       // We only call this when the local queue is empty or under a
2925       // given target limit. So, we do not expect this push to fail.
2926       assert(success, "invariant");
2927     }
2928   }
2929 
2930   // this operation was quite expensive, so decrease the limits
2931   decrease_limits();
2932 }
2933 
2934 void CMTask::drain_local_queue(bool partially) {
2935   if (has_aborted()) return;
2936 
2937   // Decide what the target size is, depending whether we're going to
2938   // drain it partially (so that other tasks can steal if they run out
2939   // of things to do) or totally (at the very end).
2940   size_t target_size;
2941   if (partially) {
2942     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2943   } else {
2944     target_size = 0;
2945   }
2946 
2947   if (_task_queue->size() > target_size) {
2948     oop obj;
2949     bool ret = _task_queue->pop_local(obj);
2950     while (ret) {
2951       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2952       assert(!_g1h->is_on_master_free_list(
2953                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2954 
2955       scan_object(obj);
2956 
2957       if (_task_queue->size() <= target_size || has_aborted()) {
2958         ret = false;
2959       } else {
2960         ret = _task_queue->pop_local(obj);
2961       }
2962     }
2963   }
2964 }
2965 
2966 void CMTask::drain_global_stack(bool partially) {
2967   if (has_aborted()) return;
2968 
2969   // We have a policy to drain the local queue before we attempt to
2970   // drain the global stack.
2971   assert(partially || _task_queue->size() == 0, "invariant");
2972 
2973   // Decide what the target size is, depending whether we're going to
2974   // drain it partially (so that other tasks can steal if they run out
2975   // of things to do) or totally (at the very end).  Notice that,
2976   // because we move entries from the global stack in chunks or
2977   // because another task might be doing the same, we might in fact
2978   // drop below the target. But, this is not a problem.
2979   size_t target_size;
2980   if (partially) {
2981     target_size = _cm->partial_mark_stack_size_target();
2982   } else {
2983     target_size = 0;
2984   }
2985 
2986   if (_cm->mark_stack_size() > target_size) {
2987     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2988       get_entries_from_global_stack();
2989       drain_local_queue(partially);
2990     }
2991   }
2992 }
2993 
2994 // SATB Queue has several assumptions on whether to call the par or
2995 // non-par versions of the methods. this is why some of the code is
2996 // replicated. We should really get rid of the single-threaded version
2997 // of the code to simplify things.
2998 void CMTask::drain_satb_buffers() {
2999   if (has_aborted()) return;
3000 
3001   // We set this so that the regular clock knows that we're in the
3002   // middle of draining buffers and doesn't set the abort flag when it
3003   // notices that SATB buffers are available for draining. It'd be
3004   // very counter productive if it did that. :-)
3005   _draining_satb_buffers = true;
3006 
3007   CMSATBBufferClosure satb_cl(this, _g1h);
3008   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3009 
3010   // This keeps claiming and applying the closure to completed buffers
3011   // until we run out of buffers or we need to abort.
3012   while (!has_aborted() &&
3013          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3014     regular_clock_call();
3015   }
3016 
3017   _draining_satb_buffers = false;
3018 
3019   assert(has_aborted() ||
3020          concurrent() ||
3021          satb_mq_set.completed_buffers_num() == 0, "invariant");
3022 
3023   // again, this was a potentially expensive operation, decrease the
3024   // limits to get the regular clock call early
3025   decrease_limits();
3026 }
3027 
3028 void CMTask::print_stats() {
3029   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3030                        _worker_id, _calls);
3031   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3032                        _elapsed_time_ms, _termination_time_ms);
3033   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3034                        _step_times_ms.num(), _step_times_ms.avg(),
3035                        _step_times_ms.sd());
3036   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3037                        _step_times_ms.maximum(), _step_times_ms.sum());
3038 }
3039 
3040 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3041   return _task_queues->steal(worker_id, hash_seed, obj);
3042 }
3043 
3044 /*****************************************************************************
3045 
3046     The do_marking_step(time_target_ms, ...) method is the building
3047     block of the parallel marking framework. It can be called in parallel
3048     with other invocations of do_marking_step() on different tasks
3049     (but only one per task, obviously) and concurrently with the
3050     mutator threads, or during remark, hence it eliminates the need
3051     for two versions of the code. When called during remark, it will
3052     pick up from where the task left off during the concurrent marking
3053     phase. Interestingly, tasks are also claimable during evacuation
3054     pauses too, since do_marking_step() ensures that it aborts before
3055     it needs to yield.
3056 
3057     The data structures that it uses to do marking work are the
3058     following:
3059 
3060       (1) Marking Bitmap. If there are gray objects that appear only
3061       on the bitmap (this happens either when dealing with an overflow
3062       or when the initial marking phase has simply marked the roots
3063       and didn't push them on the stack), then tasks claim heap
3064       regions whose bitmap they then scan to find gray objects. A
3065       global finger indicates where the end of the last claimed region
3066       is. A local finger indicates how far into the region a task has
3067       scanned. The two fingers are used to determine how to gray an
3068       object (i.e. whether simply marking it is OK, as it will be
3069       visited by a task in the future, or whether it needs to be also
3070       pushed on a stack).
3071 
3072       (2) Local Queue. The local queue of the task which is accessed
3073       reasonably efficiently by the task. Other tasks can steal from
3074       it when they run out of work. Throughout the marking phase, a
3075       task attempts to keep its local queue short but not totally
3076       empty, so that entries are available for stealing by other
3077       tasks. Only when there is no more work, a task will totally
3078       drain its local queue.
3079 
3080       (3) Global Mark Stack. This handles local queue overflow. During
3081       marking only sets of entries are moved between it and the local
3082       queues, as access to it requires a mutex and more fine-grain
3083       interaction with it which might cause contention. If it
3084       overflows, then the marking phase should restart and iterate
3085       over the bitmap to identify gray objects. Throughout the marking
3086       phase, tasks attempt to keep the global mark stack at a small
3087       length but not totally empty, so that entries are available for
3088       popping by other tasks. Only when there is no more work, tasks
3089       will totally drain the global mark stack.
3090 
3091       (4) SATB Buffer Queue. This is where completed SATB buffers are
3092       made available. Buffers are regularly removed from this queue
3093       and scanned for roots, so that the queue doesn't get too
3094       long. During remark, all completed buffers are processed, as
3095       well as the filled in parts of any uncompleted buffers.
3096 
3097     The do_marking_step() method tries to abort when the time target
3098     has been reached. There are a few other cases when the
3099     do_marking_step() method also aborts:
3100 
3101       (1) When the marking phase has been aborted (after a Full GC).
3102 
3103       (2) When a global overflow (on the global stack) has been
3104       triggered. Before the task aborts, it will actually sync up with
3105       the other tasks to ensure that all the marking data structures
3106       (local queues, stacks, fingers etc.)  are re-initialized so that
3107       when do_marking_step() completes, the marking phase can
3108       immediately restart.
3109 
3110       (3) When enough completed SATB buffers are available. The
3111       do_marking_step() method only tries to drain SATB buffers right
3112       at the beginning. So, if enough buffers are available, the
3113       marking step aborts and the SATB buffers are processed at
3114       the beginning of the next invocation.
3115 
3116       (4) To yield. when we have to yield then we abort and yield
3117       right at the end of do_marking_step(). This saves us from a lot
3118       of hassle as, by yielding we might allow a Full GC. If this
3119       happens then objects will be compacted underneath our feet, the
3120       heap might shrink, etc. We save checking for this by just
3121       aborting and doing the yield right at the end.
3122 
3123     From the above it follows that the do_marking_step() method should
3124     be called in a loop (or, otherwise, regularly) until it completes.
3125 
3126     If a marking step completes without its has_aborted() flag being
3127     true, it means it has completed the current marking phase (and
3128     also all other marking tasks have done so and have all synced up).
3129 
3130     A method called regular_clock_call() is invoked "regularly" (in
3131     sub ms intervals) throughout marking. It is this clock method that
3132     checks all the abort conditions which were mentioned above and
3133     decides when the task should abort. A work-based scheme is used to
3134     trigger this clock method: when the number of object words the
3135     marking phase has scanned or the number of references the marking
3136     phase has visited reach a given limit. Additional invocations to
3137     the method clock have been planted in a few other strategic places
3138     too. The initial reason for the clock method was to avoid calling
3139     vtime too regularly, as it is quite expensive. So, once it was in
3140     place, it was natural to piggy-back all the other conditions on it
3141     too and not constantly check them throughout the code.
3142 
3143     If do_termination is true then do_marking_step will enter its
3144     termination protocol.
3145 
3146     The value of is_serial must be true when do_marking_step is being
3147     called serially (i.e. by the VMThread) and do_marking_step should
3148     skip any synchronization in the termination and overflow code.
3149     Examples include the serial remark code and the serial reference
3150     processing closures.
3151 
3152     The value of is_serial must be false when do_marking_step is
3153     being called by any of the worker threads in a work gang.
3154     Examples include the concurrent marking code (CMMarkingTask),
3155     the MT remark code, and the MT reference processing closures.
3156 
3157  *****************************************************************************/
3158 
3159 void CMTask::do_marking_step(double time_target_ms,
3160                              bool do_termination,
3161                              bool is_serial) {
3162   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3163   assert(concurrent() == _cm->concurrent(), "they should be the same");
3164 
3165   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3166   assert(_task_queues != NULL, "invariant");
3167   assert(_task_queue != NULL, "invariant");
3168   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3169 
3170   assert(!_claimed,
3171          "only one thread should claim this task at any one time");
3172 
3173   // OK, this doesn't safeguard again all possible scenarios, as it is
3174   // possible for two threads to set the _claimed flag at the same
3175   // time. But it is only for debugging purposes anyway and it will
3176   // catch most problems.
3177   _claimed = true;
3178 
3179   _start_time_ms = os::elapsedVTime() * 1000.0;
3180 
3181   // If do_stealing is true then do_marking_step will attempt to
3182   // steal work from the other CMTasks. It only makes sense to
3183   // enable stealing when the termination protocol is enabled
3184   // and do_marking_step() is not being called serially.
3185   bool do_stealing = do_termination && !is_serial;
3186 
3187   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3188   _time_target_ms = time_target_ms - diff_prediction_ms;
3189 
3190   // set up the variables that are used in the work-based scheme to
3191   // call the regular clock method
3192   _words_scanned = 0;
3193   _refs_reached  = 0;
3194   recalculate_limits();
3195 
3196   // clear all flags
3197   clear_has_aborted();
3198   _has_timed_out = false;
3199   _draining_satb_buffers = false;
3200 
3201   ++_calls;
3202 
3203   // Set up the bitmap and oop closures. Anything that uses them is
3204   // eventually called from this method, so it is OK to allocate these
3205   // statically.
3206   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3207   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3208   set_cm_oop_closure(&cm_oop_closure);
3209 
3210   if (_cm->has_overflown()) {
3211     // This can happen if the mark stack overflows during a GC pause
3212     // and this task, after a yield point, restarts. We have to abort
3213     // as we need to get into the overflow protocol which happens
3214     // right at the end of this task.
3215     set_has_aborted();
3216   }
3217 
3218   // First drain any available SATB buffers. After this, we will not
3219   // look at SATB buffers before the next invocation of this method.
3220   // If enough completed SATB buffers are queued up, the regular clock
3221   // will abort this task so that it restarts.
3222   drain_satb_buffers();
3223   // ...then partially drain the local queue and the global stack
3224   drain_local_queue(true);
3225   drain_global_stack(true);
3226 
3227   do {
3228     if (!has_aborted() && _curr_region != NULL) {
3229       // This means that we're already holding on to a region.
3230       assert(_finger != NULL, "if region is not NULL, then the finger "
3231              "should not be NULL either");
3232 
3233       // We might have restarted this task after an evacuation pause
3234       // which might have evacuated the region we're holding on to
3235       // underneath our feet. Let's read its limit again to make sure
3236       // that we do not iterate over a region of the heap that
3237       // contains garbage (update_region_limit() will also move
3238       // _finger to the start of the region if it is found empty).
3239       update_region_limit();
3240       // We will start from _finger not from the start of the region,
3241       // as we might be restarting this task after aborting half-way
3242       // through scanning this region. In this case, _finger points to
3243       // the address where we last found a marked object. If this is a
3244       // fresh region, _finger points to start().
3245       MemRegion mr = MemRegion(_finger, _region_limit);
3246 
3247       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3248              "humongous regions should go around loop once only");
3249 
3250       // Some special cases:
3251       // If the memory region is empty, we can just give up the region.
3252       // If the current region is humongous then we only need to check
3253       // the bitmap for the bit associated with the start of the object,
3254       // scan the object if it's live, and give up the region.
3255       // Otherwise, let's iterate over the bitmap of the part of the region
3256       // that is left.
3257       // If the iteration is successful, give up the region.
3258       if (mr.is_empty()) {
3259         giveup_current_region();
3260         regular_clock_call();
3261       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3262         if (_nextMarkBitMap->isMarked(mr.start())) {
3263           // The object is marked - apply the closure
3264           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3265           bitmap_closure.do_bit(offset);
3266         }
3267         // Even if this task aborted while scanning the humongous object
3268         // we can (and should) give up the current region.
3269         giveup_current_region();
3270         regular_clock_call();
3271       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3272         giveup_current_region();
3273         regular_clock_call();
3274       } else {
3275         assert(has_aborted(), "currently the only way to do so");
3276         // The only way to abort the bitmap iteration is to return
3277         // false from the do_bit() method. However, inside the
3278         // do_bit() method we move the _finger to point to the
3279         // object currently being looked at. So, if we bail out, we
3280         // have definitely set _finger to something non-null.
3281         assert(_finger != NULL, "invariant");
3282 
3283         // Region iteration was actually aborted. So now _finger
3284         // points to the address of the object we last scanned. If we
3285         // leave it there, when we restart this task, we will rescan
3286         // the object. It is easy to avoid this. We move the finger by
3287         // enough to point to the next possible object header (the
3288         // bitmap knows by how much we need to move it as it knows its
3289         // granularity).
3290         assert(_finger < _region_limit, "invariant");
3291         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3292         // Check if bitmap iteration was aborted while scanning the last object
3293         if (new_finger >= _region_limit) {
3294           giveup_current_region();
3295         } else {
3296           move_finger_to(new_finger);
3297         }
3298       }
3299     }
3300     // At this point we have either completed iterating over the
3301     // region we were holding on to, or we have aborted.
3302 
3303     // We then partially drain the local queue and the global stack.
3304     // (Do we really need this?)
3305     drain_local_queue(true);
3306     drain_global_stack(true);
3307 
3308     // Read the note on the claim_region() method on why it might
3309     // return NULL with potentially more regions available for
3310     // claiming and why we have to check out_of_regions() to determine
3311     // whether we're done or not.
3312     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3313       // We are going to try to claim a new region. We should have
3314       // given up on the previous one.
3315       // Separated the asserts so that we know which one fires.
3316       assert(_curr_region  == NULL, "invariant");
3317       assert(_finger       == NULL, "invariant");
3318       assert(_region_limit == NULL, "invariant");
3319       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3320       if (claimed_region != NULL) {
3321         // Yes, we managed to claim one
3322         setup_for_region(claimed_region);
3323         assert(_curr_region == claimed_region, "invariant");
3324       }
3325       // It is important to call the regular clock here. It might take
3326       // a while to claim a region if, for example, we hit a large
3327       // block of empty regions. So we need to call the regular clock
3328       // method once round the loop to make sure it's called
3329       // frequently enough.
3330       regular_clock_call();
3331     }
3332 
3333     if (!has_aborted() && _curr_region == NULL) {
3334       assert(_cm->out_of_regions(),
3335              "at this point we should be out of regions");
3336     }
3337   } while ( _curr_region != NULL && !has_aborted());
3338 
3339   if (!has_aborted()) {
3340     // We cannot check whether the global stack is empty, since other
3341     // tasks might be pushing objects to it concurrently.
3342     assert(_cm->out_of_regions(),
3343            "at this point we should be out of regions");
3344     // Try to reduce the number of available SATB buffers so that
3345     // remark has less work to do.
3346     drain_satb_buffers();
3347   }
3348 
3349   // Since we've done everything else, we can now totally drain the
3350   // local queue and global stack.
3351   drain_local_queue(false);
3352   drain_global_stack(false);
3353 
3354   // Attempt at work stealing from other task's queues.
3355   if (do_stealing && !has_aborted()) {
3356     // We have not aborted. This means that we have finished all that
3357     // we could. Let's try to do some stealing...
3358 
3359     // We cannot check whether the global stack is empty, since other
3360     // tasks might be pushing objects to it concurrently.
3361     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3362            "only way to reach here");
3363     while (!has_aborted()) {
3364       oop obj;
3365       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3366         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3367                "any stolen object should be marked");
3368         scan_object(obj);
3369 
3370         // And since we're towards the end, let's totally drain the
3371         // local queue and global stack.
3372         drain_local_queue(false);
3373         drain_global_stack(false);
3374       } else {
3375         break;
3376       }
3377     }
3378   }
3379 
3380   // We still haven't aborted. Now, let's try to get into the
3381   // termination protocol.
3382   if (do_termination && !has_aborted()) {
3383     // We cannot check whether the global stack is empty, since other
3384     // tasks might be concurrently pushing objects on it.
3385     // Separated the asserts so that we know which one fires.
3386     assert(_cm->out_of_regions(), "only way to reach here");
3387     assert(_task_queue->size() == 0, "only way to reach here");
3388     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3389 
3390     // The CMTask class also extends the TerminatorTerminator class,
3391     // hence its should_exit_termination() method will also decide
3392     // whether to exit the termination protocol or not.
3393     bool finished = (is_serial ||
3394                      _cm->terminator()->offer_termination(this));
3395     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3396     _termination_time_ms +=
3397       termination_end_time_ms - _termination_start_time_ms;
3398 
3399     if (finished) {
3400       // We're all done.
3401 
3402       if (_worker_id == 0) {
3403         // let's allow task 0 to do this
3404         if (concurrent()) {
3405           assert(_cm->concurrent_marking_in_progress(), "invariant");
3406           // we need to set this to false before the next
3407           // safepoint. This way we ensure that the marking phase
3408           // doesn't observe any more heap expansions.
3409           _cm->clear_concurrent_marking_in_progress();
3410         }
3411       }
3412 
3413       // We can now guarantee that the global stack is empty, since
3414       // all other tasks have finished. We separated the guarantees so
3415       // that, if a condition is false, we can immediately find out
3416       // which one.
3417       guarantee(_cm->out_of_regions(), "only way to reach here");
3418       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3419       guarantee(_task_queue->size() == 0, "only way to reach here");
3420       guarantee(!_cm->has_overflown(), "only way to reach here");
3421       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3422     } else {
3423       // Apparently there's more work to do. Let's abort this task. It
3424       // will restart it and we can hopefully find more things to do.
3425       set_has_aborted();
3426     }
3427   }
3428 
3429   // Mainly for debugging purposes to make sure that a pointer to the
3430   // closure which was statically allocated in this frame doesn't
3431   // escape it by accident.
3432   set_cm_oop_closure(NULL);
3433   double end_time_ms = os::elapsedVTime() * 1000.0;
3434   double elapsed_time_ms = end_time_ms - _start_time_ms;
3435   // Update the step history.
3436   _step_times_ms.add(elapsed_time_ms);
3437 
3438   if (has_aborted()) {
3439     // The task was aborted for some reason.
3440     if (_has_timed_out) {
3441       double diff_ms = elapsed_time_ms - _time_target_ms;
3442       // Keep statistics of how well we did with respect to hitting
3443       // our target only if we actually timed out (if we aborted for
3444       // other reasons, then the results might get skewed).
3445       _marking_step_diffs_ms.add(diff_ms);
3446     }
3447 
3448     if (_cm->has_overflown()) {
3449       // This is the interesting one. We aborted because a global
3450       // overflow was raised. This means we have to restart the
3451       // marking phase and start iterating over regions. However, in
3452       // order to do this we have to make sure that all tasks stop
3453       // what they are doing and re-initialize in a safe manner. We
3454       // will achieve this with the use of two barrier sync points.
3455 
3456       if (!is_serial) {
3457         // We only need to enter the sync barrier if being called
3458         // from a parallel context
3459         _cm->enter_first_sync_barrier(_worker_id);
3460 
3461         // When we exit this sync barrier we know that all tasks have
3462         // stopped doing marking work. So, it's now safe to
3463         // re-initialize our data structures. At the end of this method,
3464         // task 0 will clear the global data structures.
3465       }
3466 
3467       // We clear the local state of this task...
3468       clear_region_fields();
3469 
3470       if (!is_serial) {
3471         // ...and enter the second barrier.
3472         _cm->enter_second_sync_barrier(_worker_id);
3473       }
3474       // At this point, if we're during the concurrent phase of
3475       // marking, everything has been re-initialized and we're
3476       // ready to restart.
3477     }
3478   }
3479 
3480   _claimed = false;
3481 }
3482 
3483 CMTask::CMTask(uint worker_id,
3484                ConcurrentMark* cm,
3485                size_t* marked_bytes,
3486                BitMap* card_bm,
3487                CMTaskQueue* task_queue,
3488                CMTaskQueueSet* task_queues)
3489   : _g1h(G1CollectedHeap::heap()),
3490     _worker_id(worker_id), _cm(cm),
3491     _claimed(false),
3492     _nextMarkBitMap(NULL), _hash_seed(17),
3493     _task_queue(task_queue),
3494     _task_queues(task_queues),
3495     _cm_oop_closure(NULL),
3496     _marked_bytes_array(marked_bytes),
3497     _card_bm(card_bm) {
3498   guarantee(task_queue != NULL, "invariant");
3499   guarantee(task_queues != NULL, "invariant");
3500 
3501   _marking_step_diffs_ms.add(0.5);
3502 }
3503 
3504 // These are formatting macros that are used below to ensure
3505 // consistent formatting. The *_H_* versions are used to format the
3506 // header for a particular value and they should be kept consistent
3507 // with the corresponding macro. Also note that most of the macros add
3508 // the necessary white space (as a prefix) which makes them a bit
3509 // easier to compose.
3510 
3511 // All the output lines are prefixed with this string to be able to
3512 // identify them easily in a large log file.
3513 #define G1PPRL_LINE_PREFIX            "###"
3514 
3515 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3516 #ifdef _LP64
3517 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3518 #else // _LP64
3519 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3520 #endif // _LP64
3521 
3522 // For per-region info
3523 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3524 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3525 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3526 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3527 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3528 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3529 
3530 // For summary info
3531 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3532 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3533 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3534 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3535 
3536 G1PrintRegionLivenessInfoClosure::
3537 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3538   : _total_used_bytes(0), _total_capacity_bytes(0),
3539     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3540     _hum_used_bytes(0), _hum_capacity_bytes(0),
3541     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3542     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3543   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3544   MemRegion g1_reserved = g1h->g1_reserved();
3545   double now = os::elapsedTime();
3546 
3547   // Print the header of the output.
3548   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3549   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3550                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3551                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3552                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3553                           HeapRegion::GrainBytes);
3554   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3555   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3556                           G1PPRL_TYPE_H_FORMAT
3557                           G1PPRL_ADDR_BASE_H_FORMAT
3558                           G1PPRL_BYTE_H_FORMAT
3559                           G1PPRL_BYTE_H_FORMAT
3560                           G1PPRL_BYTE_H_FORMAT
3561                           G1PPRL_DOUBLE_H_FORMAT
3562                           G1PPRL_BYTE_H_FORMAT
3563                           G1PPRL_BYTE_H_FORMAT,
3564                           "type", "address-range",
3565                           "used", "prev-live", "next-live", "gc-eff",
3566                           "remset", "code-roots");
3567   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3568                           G1PPRL_TYPE_H_FORMAT
3569                           G1PPRL_ADDR_BASE_H_FORMAT
3570                           G1PPRL_BYTE_H_FORMAT
3571                           G1PPRL_BYTE_H_FORMAT
3572                           G1PPRL_BYTE_H_FORMAT
3573                           G1PPRL_DOUBLE_H_FORMAT
3574                           G1PPRL_BYTE_H_FORMAT
3575                           G1PPRL_BYTE_H_FORMAT,
3576                           "", "",
3577                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3578                           "(bytes)", "(bytes)");
3579 }
3580 
3581 // It takes as a parameter a reference to one of the _hum_* fields, it
3582 // deduces the corresponding value for a region in a humongous region
3583 // series (either the region size, or what's left if the _hum_* field
3584 // is < the region size), and updates the _hum_* field accordingly.
3585 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3586   size_t bytes = 0;
3587   // The > 0 check is to deal with the prev and next live bytes which
3588   // could be 0.
3589   if (*hum_bytes > 0) {
3590     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3591     *hum_bytes -= bytes;
3592   }
3593   return bytes;
3594 }
3595 
3596 // It deduces the values for a region in a humongous region series
3597 // from the _hum_* fields and updates those accordingly. It assumes
3598 // that that _hum_* fields have already been set up from the "starts
3599 // humongous" region and we visit the regions in address order.
3600 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3601                                                      size_t* capacity_bytes,
3602                                                      size_t* prev_live_bytes,
3603                                                      size_t* next_live_bytes) {
3604   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3605   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3606   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3607   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3608   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3609 }
3610 
3611 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3612   const char* type       = r->get_type_str();
3613   HeapWord* bottom       = r->bottom();
3614   HeapWord* end          = r->end();
3615   size_t capacity_bytes  = r->capacity();
3616   size_t used_bytes      = r->used();
3617   size_t prev_live_bytes = r->live_bytes();
3618   size_t next_live_bytes = r->next_live_bytes();
3619   double gc_eff          = r->gc_efficiency();
3620   size_t remset_bytes    = r->rem_set()->mem_size();
3621   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3622 
3623   if (r->is_starts_humongous()) {
3624     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3625            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3626            "they should have been zeroed after the last time we used them");
3627     // Set up the _hum_* fields.
3628     _hum_capacity_bytes  = capacity_bytes;
3629     _hum_used_bytes      = used_bytes;
3630     _hum_prev_live_bytes = prev_live_bytes;
3631     _hum_next_live_bytes = next_live_bytes;
3632     get_hum_bytes(&used_bytes, &capacity_bytes,
3633                   &prev_live_bytes, &next_live_bytes);
3634     end = bottom + HeapRegion::GrainWords;
3635   } else if (r->is_continues_humongous()) {
3636     get_hum_bytes(&used_bytes, &capacity_bytes,
3637                   &prev_live_bytes, &next_live_bytes);
3638     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3639   }
3640 
3641   _total_used_bytes      += used_bytes;
3642   _total_capacity_bytes  += capacity_bytes;
3643   _total_prev_live_bytes += prev_live_bytes;
3644   _total_next_live_bytes += next_live_bytes;
3645   _total_remset_bytes    += remset_bytes;
3646   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3647 
3648   // Print a line for this particular region.
3649   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3650                           G1PPRL_TYPE_FORMAT
3651                           G1PPRL_ADDR_BASE_FORMAT
3652                           G1PPRL_BYTE_FORMAT
3653                           G1PPRL_BYTE_FORMAT
3654                           G1PPRL_BYTE_FORMAT
3655                           G1PPRL_DOUBLE_FORMAT
3656                           G1PPRL_BYTE_FORMAT
3657                           G1PPRL_BYTE_FORMAT,
3658                           type, p2i(bottom), p2i(end),
3659                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3660                           remset_bytes, strong_code_roots_bytes);
3661 
3662   return false;
3663 }
3664 
3665 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3666   // add static memory usages to remembered set sizes
3667   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3668   // Print the footer of the output.
3669   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3670   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3671                          " SUMMARY"
3672                          G1PPRL_SUM_MB_FORMAT("capacity")
3673                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3674                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3675                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3676                          G1PPRL_SUM_MB_FORMAT("remset")
3677                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3678                          bytes_to_mb(_total_capacity_bytes),
3679                          bytes_to_mb(_total_used_bytes),
3680                          perc(_total_used_bytes, _total_capacity_bytes),
3681                          bytes_to_mb(_total_prev_live_bytes),
3682                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3683                          bytes_to_mb(_total_next_live_bytes),
3684                          perc(_total_next_live_bytes, _total_capacity_bytes),
3685                          bytes_to_mb(_total_remset_bytes),
3686                          bytes_to_mb(_total_strong_code_roots_bytes));
3687 }