New src/share/vm/gc/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcTimer.hpp"
  45 #include "gc/shared/gcTrace.hpp"
  46 #include "gc/shared/gcTraceTime.hpp"
  47 #include "gc/shared/genOopClosures.inline.hpp"
  48 #include "gc/shared/referencePolicy.hpp"
  49 #include "gc/shared/strongRootsScope.hpp"
  50 #include "gc/shared/taskqueue.inline.hpp"
  51 #include "gc/shared/vmGCOperations.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
  62 
  63   CMBitMap::initialize(heap, storage->reserved());
  64 
  65   storage->set_mapping_changed_listener(&_listener);
  66 }
  67 
  68 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
  69   if (zero_filled) {
  70     return;
  71   }
  72   // We need to clear the bitmap on commit, removing any existing information.
  73   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
  74   _bm->clearRange(mr);
  75 }
  76 
  77 // Closure used for clearing the given mark bitmap.
  78 class ClearBitmapHRClosure : public HeapRegionClosure {
  79  private:
  80   ConcurrentMark* _cm;
  81   CMBitMap* _bitmap;
  82   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
  83  public:
  84   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
  85     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
  86   }
  87 
  88   virtual bool doHeapRegion(HeapRegion* r) {
  89     size_t const chunk_size_in_words = M / HeapWordSize;
  90 
  91     HeapWord* cur = r->bottom();
  92     HeapWord* const end = r->end();
  93 
  94     while (cur < end) {
  95       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
  96       _bitmap->clearRange(mr);
  97 
  98       cur += chunk_size_in_words;
  99 
 100       // Abort iteration if after yielding the marking has been aborted.
 101       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 102         return true;
 103       }
 104       // Repeat the asserts from before the start of the closure. We will do them
 105       // as asserts here to minimize their overhead on the product. However, we
 106       // will have them as guarantees at the beginning / end of the bitmap
 107       // clearing to get some checking in the product.
 108       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 109       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 110     }
 111 
 112     return false;
 113   }
 114 };
 115 
 116 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 117   ClearBitmapHRClosure* _cl;
 118   HeapRegionClaimer     _hrclaimer;
 119   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 120 
 121 public:
 122   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 123       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 124 
 125   void work(uint worker_id) {
 126     SuspendibleThreadSetJoiner sts_join(_suspendible);
 127     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 128   }
 129 };
 130 
 131 void G1CMBitMap::clearAll() {
 132   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 133   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 134   uint n_workers = g1h->workers()->active_workers();
 135   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 136   g1h->workers()->run_task(&task);
 137   guarantee(cl.complete(), "Must have completed iteration.");
 138   return;
 139 }
 140 
 141 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 142   _base(NULL), _cm(cm)
 143 #ifdef ASSERT
 144   , _drain_in_progress(false)
 145   , _drain_in_progress_yields(false)
 146 #endif
 147 {}
 148 
 149 bool CMMarkStack::allocate(size_t capacity) {
 150   // allocate a stack of the requisite depth
 151   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 152   if (!rs.is_reserved()) {
 153     warning("ConcurrentMark MarkStack allocation failure");
 154     return false;
 155   }
 156   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 157   if (!_virtual_space.initialize(rs, rs.size())) {
 158     warning("ConcurrentMark MarkStack backing store failure");
 159     // Release the virtual memory reserved for the marking stack
 160     rs.release();
 161     return false;
 162   }
 163   assert(_virtual_space.committed_size() == rs.size(),
 164          "Didn't reserve backing store for all of ConcurrentMark stack?");
 165   _base = (oop*) _virtual_space.low();
 166   setEmpty();
 167   _capacity = (jint) capacity;
 168   _saved_index = -1;
 169   _should_expand = false;
 170   return true;
 171 }
 172 
 173 void CMMarkStack::expand() {
 174   // Called, during remark, if we've overflown the marking stack during marking.
 175   assert(isEmpty(), "stack should been emptied while handling overflow");
 176   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 177   // Clear expansion flag
 178   _should_expand = false;
 179   if (_capacity == (jint) MarkStackSizeMax) {
 180     if (PrintGCDetails && Verbose) {
 181       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 182     }
 183     return;
 184   }
 185   // Double capacity if possible
 186   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 187   // Do not give up existing stack until we have managed to
 188   // get the double capacity that we desired.
 189   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 190                                                            sizeof(oop)));
 191   if (rs.is_reserved()) {
 192     // Release the backing store associated with old stack
 193     _virtual_space.release();
 194     // Reinitialize virtual space for new stack
 195     if (!_virtual_space.initialize(rs, rs.size())) {
 196       fatal("Not enough swap for expanded marking stack capacity");
 197     }
 198     _base = (oop*)(_virtual_space.low());
 199     _index = 0;
 200     _capacity = new_capacity;
 201   } else {
 202     if (PrintGCDetails && Verbose) {
 203       // Failed to double capacity, continue;
 204       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 205                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 206                           _capacity / K, new_capacity / K);
 207     }
 208   }
 209 }
 210 
 211 void CMMarkStack::set_should_expand() {
 212   // If we're resetting the marking state because of an
 213   // marking stack overflow, record that we should, if
 214   // possible, expand the stack.
 215   _should_expand = _cm->has_overflown();
 216 }
 217 
 218 CMMarkStack::~CMMarkStack() {
 219   if (_base != NULL) {
 220     _base = NULL;
 221     _virtual_space.release();
 222   }
 223 }
 224 
 225 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 226   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 227   jint start = _index;
 228   jint next_index = start + n;
 229   if (next_index > _capacity) {
 230     _overflow = true;
 231     return;
 232   }
 233   // Otherwise.
 234   _index = next_index;
 235   for (int i = 0; i < n; i++) {
 236     int ind = start + i;
 237     assert(ind < _capacity, "By overflow test above.");
 238     _base[ind] = ptr_arr[i];
 239   }
 240 }
 241 
 242 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 244   jint index = _index;
 245   if (index == 0) {
 246     *n = 0;
 247     return false;
 248   } else {
 249     int k = MIN2(max, index);
 250     jint  new_ind = index - k;
 251     for (int j = 0; j < k; j++) {
 252       ptr_arr[j] = _base[new_ind + j];
 253     }
 254     _index = new_ind;
 255     *n = k;
 256     return true;
 257   }
 258 }
 259 
 260 template<class OopClosureClass>
 261 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 262   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 263          || SafepointSynchronize::is_at_safepoint(),
 264          "Drain recursion must be yield-safe.");
 265   bool res = true;
 266   debug_only(_drain_in_progress = true);
 267   debug_only(_drain_in_progress_yields = yield_after);
 268   while (!isEmpty()) {
 269     oop newOop = pop();
 270     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 271     assert(newOop->is_oop(), "Expected an oop");
 272     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 273            "only grey objects on this stack");
 274     newOop->oop_iterate(cl);
 275     if (yield_after && _cm->do_yield_check()) {
 276       res = false;
 277       break;
 278     }
 279   }
 280   debug_only(_drain_in_progress = false);
 281   return res;
 282 }
 283 
 284 void CMMarkStack::note_start_of_gc() {
 285   assert(_saved_index == -1,
 286          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 287   _saved_index = _index;
 288 }
 289 
 290 void CMMarkStack::note_end_of_gc() {
 291   // This is intentionally a guarantee, instead of an assert. If we
 292   // accidentally add something to the mark stack during GC, it
 293   // will be a correctness issue so it's better if we crash. we'll
 294   // only check this once per GC anyway, so it won't be a performance
 295   // issue in any way.
 296   guarantee(_saved_index == _index,
 297             err_msg("saved index: %d index: %d", _saved_index, _index));
 298   _saved_index = -1;
 299 }
 300 
 301 CMRootRegions::CMRootRegions() :
 302   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 303   _should_abort(false),  _next_survivor(NULL) { }
 304 
 305 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 306   _young_list = g1h->young_list();
 307   _cm = cm;
 308 }
 309 
 310 void CMRootRegions::prepare_for_scan() {
 311   assert(!scan_in_progress(), "pre-condition");
 312 
 313   // Currently, only survivors can be root regions.
 314   assert(_next_survivor == NULL, "pre-condition");
 315   _next_survivor = _young_list->first_survivor_region();
 316   _scan_in_progress = (_next_survivor != NULL);
 317   _should_abort = false;
 318 }
 319 
 320 HeapRegion* CMRootRegions::claim_next() {
 321   if (_should_abort) {
 322     // If someone has set the should_abort flag, we return NULL to
 323     // force the caller to bail out of their loop.
 324     return NULL;
 325   }
 326 
 327   // Currently, only survivors can be root regions.
 328   HeapRegion* res = _next_survivor;
 329   if (res != NULL) {
 330     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 331     // Read it again in case it changed while we were waiting for the lock.
 332     res = _next_survivor;
 333     if (res != NULL) {
 334       if (res == _young_list->last_survivor_region()) {
 335         // We just claimed the last survivor so store NULL to indicate
 336         // that we're done.
 337         _next_survivor = NULL;
 338       } else {
 339         _next_survivor = res->get_next_young_region();
 340       }
 341     } else {
 342       // Someone else claimed the last survivor while we were trying
 343       // to take the lock so nothing else to do.
 344     }
 345   }
 346   assert(res == NULL || res->is_survivor(), "post-condition");
 347 
 348   return res;
 349 }
 350 
 351 void CMRootRegions::scan_finished() {
 352   assert(scan_in_progress(), "pre-condition");
 353 
 354   // Currently, only survivors can be root regions.
 355   if (!_should_abort) {
 356     assert(_next_survivor == NULL, "we should have claimed all survivors");
 357   }
 358   _next_survivor = NULL;
 359 
 360   {
 361     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 362     _scan_in_progress = false;
 363     RootRegionScan_lock->notify_all();
 364   }
 365 }
 366 
 367 bool CMRootRegions::wait_until_scan_finished() {
 368   if (!scan_in_progress()) return false;
 369 
 370   {
 371     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 372     while (scan_in_progress()) {
 373       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 374     }
 375   }
 376   return true;
 377 }
 378 
 379 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 380 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 381 #endif // _MSC_VER
 382 
 383 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 384   return MAX2((n_par_threads + 2) / 4, 1U);
 385 }
 386 
 387 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 388   _g1h(g1h),
 389   _markBitMap1(),
 390   _markBitMap2(),
 391   _parallel_marking_threads(0),
 392   _max_parallel_marking_threads(0),
 393   _sleep_factor(0.0),
 394   _marking_task_overhead(1.0),
 395   _cleanup_sleep_factor(0.0),
 396   _cleanup_task_overhead(1.0),
 397   _cleanup_list("Cleanup List"),
 398   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 399   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 400             CardTableModRefBS::card_shift,
 401             false /* in_resource_area*/),
 402 
 403   _prevMarkBitMap(&_markBitMap1),
 404   _nextMarkBitMap(&_markBitMap2),
 405 
 406   _markStack(this),
 407   // _finger set in set_non_marking_state
 408 
 409   _max_worker_id(ParallelGCThreads),
 410   // _active_tasks set in set_non_marking_state
 411   // _tasks set inside the constructor
 412   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 413   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 414 
 415   _has_overflown(false),
 416   _concurrent(false),
 417   _has_aborted(false),
 418   _aborted_gc_id(GCId::undefined()),
 419   _restart_for_overflow(false),
 420   _concurrent_marking_in_progress(false),
 421 
 422   // _verbose_level set below
 423 
 424   _init_times(),
 425   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 426   _cleanup_times(),
 427   _total_counting_time(0.0),
 428   _total_rs_scrub_time(0.0),
 429 
 430   _parallel_workers(NULL),
 431 
 432   _count_card_bitmaps(NULL),
 433   _count_marked_bytes(NULL),
 434   _completed_initialization(false) {
 435   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 436   if (verbose_level < no_verbose) {
 437     verbose_level = no_verbose;
 438   }
 439   if (verbose_level > high_verbose) {
 440     verbose_level = high_verbose;
 441   }
 442   _verbose_level = verbose_level;
 443 
 444   if (verbose_low()) {
 445     gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", "
 446                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 447   }
 448 
 449   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 450   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 451 
 452   // Create & start a ConcurrentMark thread.
 453   _cmThread = new ConcurrentMarkThread(this);
 454   assert(cmThread() != NULL, "CM Thread should have been created");
 455   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 456   if (_cmThread->osthread() == NULL) {
 457       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 458   }
 459 
 460   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 461   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 462   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 463 
 464   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 465   satb_qs.set_buffer_size(G1SATBBufferSize);
 466 
 467   _root_regions.init(_g1h, this);
 468 
 469   if (ConcGCThreads > ParallelGCThreads) {
 470     warning("Can't have more ConcGCThreads (%u) "
 471             "than ParallelGCThreads (%u).",
 472             ConcGCThreads, ParallelGCThreads);
 473     return;
 474   }
 475   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 476     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 477     // if both are set
 478     _sleep_factor             = 0.0;
 479     _marking_task_overhead    = 1.0;
 480   } else if (G1MarkingOverheadPercent > 0) {
 481     // We will calculate the number of parallel marking threads based
 482     // on a target overhead with respect to the soft real-time goal
 483     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 484     double overall_cm_overhead =
 485       (double) MaxGCPauseMillis * marking_overhead /
 486       (double) GCPauseIntervalMillis;
 487     double cpu_ratio = 1.0 / (double) os::processor_count();
 488     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 489     double marking_task_overhead =
 490       overall_cm_overhead / marking_thread_num *
 491                                               (double) os::processor_count();
 492     double sleep_factor =
 493                        (1.0 - marking_task_overhead) / marking_task_overhead;
 494 
 495     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 496     _sleep_factor             = sleep_factor;
 497     _marking_task_overhead    = marking_task_overhead;
 498   } else {
 499     // Calculate the number of parallel marking threads by scaling
 500     // the number of parallel GC threads.
 501     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 502     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 503     _sleep_factor             = 0.0;
 504     _marking_task_overhead    = 1.0;
 505   }
 506 
 507   assert(ConcGCThreads > 0, "Should have been set");
 508   _parallel_marking_threads = ConcGCThreads;
 509   _max_parallel_marking_threads = _parallel_marking_threads;
 510 
 511   if (parallel_marking_threads() > 1) {
 512     _cleanup_task_overhead = 1.0;
 513   } else {
 514     _cleanup_task_overhead = marking_task_overhead();
 515   }
 516   _cleanup_sleep_factor =
 517                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 518 
 519 #if 0
 520   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 521   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 522   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 523   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 524   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 525 #endif
 526 
 527   _parallel_workers = new WorkGang("G1 Marker",
 528        _max_parallel_marking_threads, false, true);
 529   if (_parallel_workers == NULL) {
 530     vm_exit_during_initialization("Failed necessary allocation.");
 531   } else {
 532     _parallel_workers->initialize_workers();
 533   }
 534 
 535   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 536     size_t mark_stack_size =
 537       MIN2(MarkStackSizeMax,
 538           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 539     // Verify that the calculated value for MarkStackSize is in range.
 540     // It would be nice to use the private utility routine from Arguments.
 541     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 542       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 543               "must be between 1 and " SIZE_FORMAT,
 544               mark_stack_size, MarkStackSizeMax);
 545       return;
 546     }
 547     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 548   } else {
 549     // Verify MarkStackSize is in range.
 550     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 551       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 552         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 553           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 554                   "must be between 1 and " SIZE_FORMAT,
 555                   MarkStackSize, MarkStackSizeMax);
 556           return;
 557         }
 558       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 559         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 560           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 561                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 562                   MarkStackSize, MarkStackSizeMax);
 563           return;
 564         }
 565       }
 566     }
 567   }
 568 
 569   if (!_markStack.allocate(MarkStackSize)) {
 570     warning("Failed to allocate CM marking stack");
 571     return;
 572   }
 573 
 574   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 575   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 576 
 577   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 578   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 579 
 580   BitMap::idx_t card_bm_size = _card_bm.size();
 581 
 582   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 583   _active_tasks = _max_worker_id;
 584 
 585   uint max_regions = _g1h->max_regions();
 586   for (uint i = 0; i < _max_worker_id; ++i) {
 587     CMTaskQueue* task_queue = new CMTaskQueue();
 588     task_queue->initialize();
 589     _task_queues->register_queue(i, task_queue);
 590 
 591     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 592     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 593 
 594     _tasks[i] = new CMTask(i, this,
 595                            _count_marked_bytes[i],
 596                            &_count_card_bitmaps[i],
 597                            task_queue, _task_queues);
 598 
 599     _accum_task_vtime[i] = 0.0;
 600   }
 601 
 602   // Calculate the card number for the bottom of the heap. Used
 603   // in biasing indexes into the accounting card bitmaps.
 604   _heap_bottom_card_num =
 605     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 606                                 CardTableModRefBS::card_shift);
 607 
 608   // Clear all the liveness counting data
 609   clear_all_count_data();
 610 
 611   // so that the call below can read a sensible value
 612   _heap_start = g1h->reserved_region().start();
 613   set_non_marking_state();
 614   _completed_initialization = true;
 615 }
 616 
 617 void ConcurrentMark::reset() {
 618   // Starting values for these two. This should be called in a STW
 619   // phase.
 620   MemRegion reserved = _g1h->g1_reserved();
 621   _heap_start = reserved.start();
 622   _heap_end   = reserved.end();
 623 
 624   // Separated the asserts so that we know which one fires.
 625   assert(_heap_start != NULL, "heap bounds should look ok");
 626   assert(_heap_end != NULL, "heap bounds should look ok");
 627   assert(_heap_start < _heap_end, "heap bounds should look ok");
 628 
 629   // Reset all the marking data structures and any necessary flags
 630   reset_marking_state();
 631 
 632   if (verbose_low()) {
 633     gclog_or_tty->print_cr("[global] resetting");
 634   }
 635 
 636   // We do reset all of them, since different phases will use
 637   // different number of active threads. So, it's easiest to have all
 638   // of them ready.
 639   for (uint i = 0; i < _max_worker_id; ++i) {
 640     _tasks[i]->reset(_nextMarkBitMap);
 641   }
 642 
 643   // we need this to make sure that the flag is on during the evac
 644   // pause with initial mark piggy-backed
 645   set_concurrent_marking_in_progress();
 646 }
 647 
 648 
 649 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 650   _markStack.set_should_expand();
 651   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 652   if (clear_overflow) {
 653     clear_has_overflown();
 654   } else {
 655     assert(has_overflown(), "pre-condition");
 656   }
 657   _finger = _heap_start;
 658 
 659   for (uint i = 0; i < _max_worker_id; ++i) {
 660     CMTaskQueue* queue = _task_queues->queue(i);
 661     queue->set_empty();
 662   }
 663 }
 664 
 665 void ConcurrentMark::set_concurrency(uint active_tasks) {
 666   assert(active_tasks <= _max_worker_id, "we should not have more");
 667 
 668   _active_tasks = active_tasks;
 669   // Need to update the three data structures below according to the
 670   // number of active threads for this phase.
 671   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 672   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 673   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 674 }
 675 
 676 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 677   set_concurrency(active_tasks);
 678 
 679   _concurrent = concurrent;
 680   // We propagate this to all tasks, not just the active ones.
 681   for (uint i = 0; i < _max_worker_id; ++i)
 682     _tasks[i]->set_concurrent(concurrent);
 683 
 684   if (concurrent) {
 685     set_concurrent_marking_in_progress();
 686   } else {
 687     // We currently assume that the concurrent flag has been set to
 688     // false before we start remark. At this point we should also be
 689     // in a STW phase.
 690     assert(!concurrent_marking_in_progress(), "invariant");
 691     assert(out_of_regions(),
 692            err_msg("only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 693                    p2i(_finger), p2i(_heap_end)));
 694   }
 695 }
 696 
 697 void ConcurrentMark::set_non_marking_state() {
 698   // We set the global marking state to some default values when we're
 699   // not doing marking.
 700   reset_marking_state();
 701   _active_tasks = 0;
 702   clear_concurrent_marking_in_progress();
 703 }
 704 
 705 ConcurrentMark::~ConcurrentMark() {
 706   // The ConcurrentMark instance is never freed.
 707   ShouldNotReachHere();
 708 }
 709 
 710 void ConcurrentMark::clearNextBitmap() {
 711   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 712 
 713   // Make sure that the concurrent mark thread looks to still be in
 714   // the current cycle.
 715   guarantee(cmThread()->during_cycle(), "invariant");
 716 
 717   // We are finishing up the current cycle by clearing the next
 718   // marking bitmap and getting it ready for the next cycle. During
 719   // this time no other cycle can start. So, let's make sure that this
 720   // is the case.
 721   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 722 
 723   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 724   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 725   _parallel_workers->run_task(&task);
 726 
 727   // Clear the liveness counting data. If the marking has been aborted, the abort()
 728   // call already did that.
 729   if (cl.complete()) {
 730     clear_all_count_data();
 731   }
 732 
 733   // Repeat the asserts from above.
 734   guarantee(cmThread()->during_cycle(), "invariant");
 735   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 736 }
 737 
 738 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 739   CMBitMap* _bitmap;
 740   bool _error;
 741  public:
 742   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 743   }
 744 
 745   virtual bool doHeapRegion(HeapRegion* r) {
 746     // This closure can be called concurrently to the mutator, so we must make sure
 747     // that the result of the getNextMarkedWordAddress() call is compared to the
 748     // value passed to it as limit to detect any found bits.
 749     // We can use the region's orig_end() for the limit and the comparison value
 750     // as it always contains the "real" end of the region that never changes and
 751     // has no side effects.
 752     // Due to the latter, there can also be no problem with the compiler generating
 753     // reloads of the orig_end() call.
 754     HeapWord* end = r->orig_end();
 755     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 756   }
 757 };
 758 
 759 bool ConcurrentMark::nextMarkBitmapIsClear() {
 760   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 761   _g1h->heap_region_iterate(&cl);
 762   return cl.complete();
 763 }
 764 
 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 766 public:
 767   bool doHeapRegion(HeapRegion* r) {
 768     if (!r->is_continues_humongous()) {
 769       r->note_start_of_marking();
 770     }
 771     return false;
 772   }
 773 };
 774 
 775 void ConcurrentMark::checkpointRootsInitialPre() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777   G1CollectorPolicy* g1p = g1h->g1_policy();
 778 
 779   _has_aborted = false;
 780 
 781   // Initialize marking structures. This has to be done in a STW phase.
 782   reset();
 783 
 784   // For each region note start of marking.
 785   NoteStartOfMarkHRClosure startcl;
 786   g1h->heap_region_iterate(&startcl);
 787 }
 788 
 789 
 790 void ConcurrentMark::checkpointRootsInitialPost() {
 791   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 792 
 793   // If we force an overflow during remark, the remark operation will
 794   // actually abort and we'll restart concurrent marking. If we always
 795   // force an overflow during remark we'll never actually complete the
 796   // marking phase. So, we initialize this here, at the start of the
 797   // cycle, so that at the remaining overflow number will decrease at
 798   // every remark and we'll eventually not need to cause one.
 799   force_overflow_stw()->init();
 800 
 801   // Start Concurrent Marking weak-reference discovery.
 802   ReferenceProcessor* rp = g1h->ref_processor_cm();
 803   // enable ("weak") refs discovery
 804   rp->enable_discovery();
 805   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 806 
 807   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 808   // This is the start of  the marking cycle, we're expected all
 809   // threads to have SATB queues with active set to false.
 810   satb_mq_set.set_active_all_threads(true, /* new active value */
 811                                      false /* expected_active */);
 812 
 813   _root_regions.prepare_for_scan();
 814 
 815   // update_g1_committed() will be called at the end of an evac pause
 816   // when marking is on. So, it's also called at the end of the
 817   // initial-mark pause to update the heap end, if the heap expands
 818   // during it. No need to call it here.
 819 }
 820 
 821 /*
 822  * Notice that in the next two methods, we actually leave the STS
 823  * during the barrier sync and join it immediately afterwards. If we
 824  * do not do this, the following deadlock can occur: one thread could
 825  * be in the barrier sync code, waiting for the other thread to also
 826  * sync up, whereas another one could be trying to yield, while also
 827  * waiting for the other threads to sync up too.
 828  *
 829  * Note, however, that this code is also used during remark and in
 830  * this case we should not attempt to leave / enter the STS, otherwise
 831  * we'll either hit an assert (debug / fastdebug) or deadlock
 832  * (product). So we should only leave / enter the STS if we are
 833  * operating concurrently.
 834  *
 835  * Because the thread that does the sync barrier has left the STS, it
 836  * is possible to be suspended for a Full GC or an evacuation pause
 837  * could occur. This is actually safe, since the entering the sync
 838  * barrier is one of the last things do_marking_step() does, and it
 839  * doesn't manipulate any data structures afterwards.
 840  */
 841 
 842 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 843   bool barrier_aborted;
 844 
 845   if (verbose_low()) {
 846     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 847   }
 848 
 849   {
 850     SuspendibleThreadSetLeaver sts_leave(concurrent());
 851     barrier_aborted = !_first_overflow_barrier_sync.enter();
 852   }
 853 
 854   // at this point everyone should have synced up and not be doing any
 855   // more work
 856 
 857   if (verbose_low()) {
 858     if (barrier_aborted) {
 859       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 860     } else {
 861       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 862     }
 863   }
 864 
 865   if (barrier_aborted) {
 866     // If the barrier aborted we ignore the overflow condition and
 867     // just abort the whole marking phase as quickly as possible.
 868     return;
 869   }
 870 
 871   // If we're executing the concurrent phase of marking, reset the marking
 872   // state; otherwise the marking state is reset after reference processing,
 873   // during the remark pause.
 874   // If we reset here as a result of an overflow during the remark we will
 875   // see assertion failures from any subsequent set_concurrency_and_phase()
 876   // calls.
 877   if (concurrent()) {
 878     // let the task associated with with worker 0 do this
 879     if (worker_id == 0) {
 880       // task 0 is responsible for clearing the global data structures
 881       // We should be here because of an overflow. During STW we should
 882       // not clear the overflow flag since we rely on it being true when
 883       // we exit this method to abort the pause and restart concurrent
 884       // marking.
 885       reset_marking_state(true /* clear_overflow */);
 886       force_overflow()->update();
 887 
 888       if (G1Log::fine()) {
 889         gclog_or_tty->gclog_stamp(concurrent_gc_id());
 890         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 891       }
 892     }
 893   }
 894 
 895   // after this, each task should reset its own data structures then
 896   // then go into the second barrier
 897 }
 898 
 899 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 900   bool barrier_aborted;
 901 
 902   if (verbose_low()) {
 903     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 904   }
 905 
 906   {
 907     SuspendibleThreadSetLeaver sts_leave(concurrent());
 908     barrier_aborted = !_second_overflow_barrier_sync.enter();
 909   }
 910 
 911   // at this point everything should be re-initialized and ready to go
 912 
 913   if (verbose_low()) {
 914     if (barrier_aborted) {
 915       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
 916     } else {
 917       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 918     }
 919   }
 920 }
 921 
 922 #ifndef PRODUCT
 923 void ForceOverflowSettings::init() {
 924   _num_remaining = G1ConcMarkForceOverflow;
 925   _force = false;
 926   update();
 927 }
 928 
 929 void ForceOverflowSettings::update() {
 930   if (_num_remaining > 0) {
 931     _num_remaining -= 1;
 932     _force = true;
 933   } else {
 934     _force = false;
 935   }
 936 }
 937 
 938 bool ForceOverflowSettings::should_force() {
 939   if (_force) {
 940     _force = false;
 941     return true;
 942   } else {
 943     return false;
 944   }
 945 }
 946 #endif // !PRODUCT
 947 
 948 class CMConcurrentMarkingTask: public AbstractGangTask {
 949 private:
 950   ConcurrentMark*       _cm;
 951   ConcurrentMarkThread* _cmt;
 952 
 953 public:
 954   void work(uint worker_id) {
 955     assert(Thread::current()->is_ConcurrentGC_thread(),
 956            "this should only be done by a conc GC thread");
 957     ResourceMark rm;
 958 
 959     double start_vtime = os::elapsedVTime();
 960 
 961     {
 962       SuspendibleThreadSetJoiner sts_join;
 963 
 964       assert(worker_id < _cm->active_tasks(), "invariant");
 965       CMTask* the_task = _cm->task(worker_id);
 966       the_task->record_start_time();
 967       if (!_cm->has_aborted()) {
 968         do {
 969           double start_vtime_sec = os::elapsedVTime();
 970           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 971 
 972           the_task->do_marking_step(mark_step_duration_ms,
 973                                     true  /* do_termination */,
 974                                     false /* is_serial*/);
 975 
 976           double end_vtime_sec = os::elapsedVTime();
 977           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 978           _cm->clear_has_overflown();
 979 
 980           _cm->do_yield_check(worker_id);
 981 
 982           jlong sleep_time_ms;
 983           if (!_cm->has_aborted() && the_task->has_aborted()) {
 984             sleep_time_ms =
 985               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 986             {
 987               SuspendibleThreadSetLeaver sts_leave;
 988               os::sleep(Thread::current(), sleep_time_ms, false);
 989             }
 990           }
 991         } while (!_cm->has_aborted() && the_task->has_aborted());
 992       }
 993       the_task->record_end_time();
 994       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 995     }
 996 
 997     double end_vtime = os::elapsedVTime();
 998     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 999   }
1000 
1001   CMConcurrentMarkingTask(ConcurrentMark* cm,
1002                           ConcurrentMarkThread* cmt) :
1003       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1004 
1005   ~CMConcurrentMarkingTask() { }
1006 };
1007 
1008 // Calculates the number of active workers for a concurrent
1009 // phase.
1010 uint ConcurrentMark::calc_parallel_marking_threads() {
1011   uint n_conc_workers = 0;
1012   if (!UseDynamicNumberOfGCThreads ||
1013       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1014        !ForceDynamicNumberOfGCThreads)) {
1015     n_conc_workers = max_parallel_marking_threads();
1016   } else {
1017     n_conc_workers =
1018       AdaptiveSizePolicy::calc_default_active_workers(
1019                                    max_parallel_marking_threads(),
1020                                    1, /* Minimum workers */
1021                                    parallel_marking_threads(),
1022                                    Threads::number_of_non_daemon_threads());
1023     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1024     // that scaling has already gone into "_max_parallel_marking_threads".
1025   }
1026   assert(n_conc_workers > 0, "Always need at least 1");
1027   return n_conc_workers;
1028 }
1029 
1030 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1031   // Currently, only survivors can be root regions.
1032   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1033   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1034 
1035   const uintx interval = PrefetchScanIntervalInBytes;
1036   HeapWord* curr = hr->bottom();
1037   const HeapWord* end = hr->top();
1038   while (curr < end) {
1039     Prefetch::read(curr, interval);
1040     oop obj = oop(curr);
1041     int size = obj->oop_iterate_size(&cl);
1042     assert(size == obj->size(), "sanity");
1043     curr += size;
1044   }
1045 }
1046 
1047 class CMRootRegionScanTask : public AbstractGangTask {
1048 private:
1049   ConcurrentMark* _cm;
1050 
1051 public:
1052   CMRootRegionScanTask(ConcurrentMark* cm) :
1053     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1054 
1055   void work(uint worker_id) {
1056     assert(Thread::current()->is_ConcurrentGC_thread(),
1057            "this should only be done by a conc GC thread");
1058 
1059     CMRootRegions* root_regions = _cm->root_regions();
1060     HeapRegion* hr = root_regions->claim_next();
1061     while (hr != NULL) {
1062       _cm->scanRootRegion(hr, worker_id);
1063       hr = root_regions->claim_next();
1064     }
1065   }
1066 };
1067 
1068 void ConcurrentMark::scanRootRegions() {
1069   double scan_start = os::elapsedTime();
1070 
1071   // Start of concurrent marking.
1072   ClassLoaderDataGraph::clear_claimed_marks();
1073 
1074   // scan_in_progress() will have been set to true only if there was
1075   // at least one root region to scan. So, if it's false, we
1076   // should not attempt to do any further work.
1077   if (root_regions()->scan_in_progress()) {
1078     if (G1Log::fine()) {
1079       gclog_or_tty->gclog_stamp(concurrent_gc_id());
1080       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1081     }
1082 
1083     _parallel_marking_threads = calc_parallel_marking_threads();
1084     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1085            "Maximum number of marking threads exceeded");
1086     uint active_workers = MAX2(1U, parallel_marking_threads());
1087 
1088     CMRootRegionScanTask task(this);
1089     _parallel_workers->set_active_workers(active_workers);
1090     _parallel_workers->run_task(&task);
1091 
1092     if (G1Log::fine()) {
1093       gclog_or_tty->gclog_stamp(concurrent_gc_id());
1094       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1095     }
1096 
1097     // It's possible that has_aborted() is true here without actually
1098     // aborting the survivor scan earlier. This is OK as it's
1099     // mainly used for sanity checking.
1100     root_regions()->scan_finished();
1101   }
1102 }
1103 
1104 void ConcurrentMark::markFromRoots() {
1105   // we might be tempted to assert that:
1106   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1107   //        "inconsistent argument?");
1108   // However that wouldn't be right, because it's possible that
1109   // a safepoint is indeed in progress as a younger generation
1110   // stop-the-world GC happens even as we mark in this generation.
1111 
1112   _restart_for_overflow = false;
1113   force_overflow_conc()->init();
1114 
1115   // _g1h has _n_par_threads
1116   _parallel_marking_threads = calc_parallel_marking_threads();
1117   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1118     "Maximum number of marking threads exceeded");
1119 
1120   uint active_workers = MAX2(1U, parallel_marking_threads());
1121   assert(active_workers > 0, "Should have been set");
1122 
1123   // Parallel task terminator is set in "set_concurrency_and_phase()"
1124   set_concurrency_and_phase(active_workers, true /* concurrent */);
1125 
1126   CMConcurrentMarkingTask markingTask(this, cmThread());
1127   _parallel_workers->set_active_workers(active_workers);
1128   _parallel_workers->run_task(&markingTask);
1129   print_stats();
1130 }
1131 
1132 // Helper class to get rid of some boilerplate code.
1133 class G1CMTraceTime : public GCTraceTime {
1134   static bool doit_and_prepend(bool doit) {
1135     if (doit) {
1136       gclog_or_tty->put(' ');
1137     }
1138     return doit;
1139   }
1140 
1141  public:
1142   G1CMTraceTime(const char* title, bool doit)
1143     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
1144         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
1145   }
1146 };
1147 
1148 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1149   // world is stopped at this checkpoint
1150   assert(SafepointSynchronize::is_at_safepoint(),
1151          "world should be stopped");
1152 
1153   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1154 
1155   // If a full collection has happened, we shouldn't do this.
1156   if (has_aborted()) {
1157     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1158     return;
1159   }
1160 
1161   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1162 
1163   if (VerifyDuringGC) {
1164     HandleMark hm;  // handle scope
1165     g1h->prepare_for_verify();
1166     Universe::verify(VerifyOption_G1UsePrevMarking,
1167                      " VerifyDuringGC:(before)");
1168   }
1169   g1h->check_bitmaps("Remark Start");
1170 
1171   G1CollectorPolicy* g1p = g1h->g1_policy();
1172   g1p->record_concurrent_mark_remark_start();
1173 
1174   double start = os::elapsedTime();
1175 
1176   checkpointRootsFinalWork();
1177 
1178   double mark_work_end = os::elapsedTime();
1179 
1180   weakRefsWork(clear_all_soft_refs);
1181 
1182   if (has_overflown()) {
1183     // Oops.  We overflowed.  Restart concurrent marking.
1184     _restart_for_overflow = true;
1185     if (G1TraceMarkStackOverflow) {
1186       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1187     }
1188 
1189     // Verify the heap w.r.t. the previous marking bitmap.
1190     if (VerifyDuringGC) {
1191       HandleMark hm;  // handle scope
1192       g1h->prepare_for_verify();
1193       Universe::verify(VerifyOption_G1UsePrevMarking,
1194                        " VerifyDuringGC:(overflow)");
1195     }
1196 
1197     // Clear the marking state because we will be restarting
1198     // marking due to overflowing the global mark stack.
1199     reset_marking_state();
1200   } else {
1201     {
1202       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1203 
1204       // Aggregate the per-task counting data that we have accumulated
1205       // while marking.
1206       aggregate_count_data();
1207     }
1208 
1209     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1210     // We're done with marking.
1211     // This is the end of  the marking cycle, we're expected all
1212     // threads to have SATB queues with active set to true.
1213     satb_mq_set.set_active_all_threads(false, /* new active value */
1214                                        true /* expected_active */);
1215 
1216     if (VerifyDuringGC) {
1217       HandleMark hm;  // handle scope
1218       g1h->prepare_for_verify();
1219       Universe::verify(VerifyOption_G1UseNextMarking,
1220                        " VerifyDuringGC:(after)");
1221     }
1222     g1h->check_bitmaps("Remark End");
1223     assert(!restart_for_overflow(), "sanity");
1224     // Completely reset the marking state since marking completed
1225     set_non_marking_state();
1226   }
1227 
1228   // Expand the marking stack, if we have to and if we can.
1229   if (_markStack.should_expand()) {
1230     _markStack.expand();
1231   }
1232 
1233   // Statistics
1234   double now = os::elapsedTime();
1235   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1236   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1237   _remark_times.add((now - start) * 1000.0);
1238 
1239   g1p->record_concurrent_mark_remark_end();
1240 
1241   G1CMIsAliveClosure is_alive(g1h);
1242   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1243 }
1244 
1245 // Base class of the closures that finalize and verify the
1246 // liveness counting data.
1247 class CMCountDataClosureBase: public HeapRegionClosure {
1248 protected:
1249   G1CollectedHeap* _g1h;
1250   ConcurrentMark* _cm;
1251   CardTableModRefBS* _ct_bs;
1252 
1253   BitMap* _region_bm;
1254   BitMap* _card_bm;
1255 
1256   // Takes a region that's not empty (i.e., it has at least one
1257   // live object in it and sets its corresponding bit on the region
1258   // bitmap to 1. If the region is "starts humongous" it will also set
1259   // to 1 the bits on the region bitmap that correspond to its
1260   // associated "continues humongous" regions.
1261   void set_bit_for_region(HeapRegion* hr) {
1262     assert(!hr->is_continues_humongous(), "should have filtered those out");
1263 
1264     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1265     if (!hr->is_starts_humongous()) {
1266       // Normal (non-humongous) case: just set the bit.
1267       _region_bm->par_at_put(index, true);
1268     } else {
1269       // Starts humongous case: calculate how many regions are part of
1270       // this humongous region and then set the bit range.
1271       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1272       _region_bm->par_at_put_range(index, end_index, true);
1273     }
1274   }
1275 
1276 public:
1277   CMCountDataClosureBase(G1CollectedHeap* g1h,
1278                          BitMap* region_bm, BitMap* card_bm):
1279     _g1h(g1h), _cm(g1h->concurrent_mark()),
1280     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1281     _region_bm(region_bm), _card_bm(card_bm) { }
1282 };
1283 
1284 // Closure that calculates the # live objects per region. Used
1285 // for verification purposes during the cleanup pause.
1286 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1287   CMBitMapRO* _bm;
1288   size_t _region_marked_bytes;
1289 
1290 public:
1291   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1292                          BitMap* region_bm, BitMap* card_bm) :
1293     CMCountDataClosureBase(g1h, region_bm, card_bm),
1294     _bm(bm), _region_marked_bytes(0) { }
1295 
1296   bool doHeapRegion(HeapRegion* hr) {
1297 
1298     if (hr->is_continues_humongous()) {
1299       // We will ignore these here and process them when their
1300       // associated "starts humongous" region is processed (see
1301       // set_bit_for_heap_region()). Note that we cannot rely on their
1302       // associated "starts humongous" region to have their bit set to
1303       // 1 since, due to the region chunking in the parallel region
1304       // iteration, a "continues humongous" region might be visited
1305       // before its associated "starts humongous".
1306       return false;
1307     }
1308 
1309     HeapWord* ntams = hr->next_top_at_mark_start();
1310     HeapWord* start = hr->bottom();
1311 
1312     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1313            err_msg("Preconditions not met - "
1314                    "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1315                    p2i(start), p2i(ntams), p2i(hr->end())));
1316 
1317     // Find the first marked object at or after "start".
1318     start = _bm->getNextMarkedWordAddress(start, ntams);
1319 
1320     size_t marked_bytes = 0;
1321 
1322     while (start < ntams) {
1323       oop obj = oop(start);
1324       int obj_sz = obj->size();
1325       HeapWord* obj_end = start + obj_sz;
1326 
1327       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1328       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1329 
1330       // Note: if we're looking at the last region in heap - obj_end
1331       // could be actually just beyond the end of the heap; end_idx
1332       // will then correspond to a (non-existent) card that is also
1333       // just beyond the heap.
1334       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1335         // end of object is not card aligned - increment to cover
1336         // all the cards spanned by the object
1337         end_idx += 1;
1338       }
1339 
1340       // Set the bits in the card BM for the cards spanned by this object.
1341       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1342 
1343       // Add the size of this object to the number of marked bytes.
1344       marked_bytes += (size_t)obj_sz * HeapWordSize;
1345 
1346       // Find the next marked object after this one.
1347       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1348     }
1349 
1350     // Mark the allocated-since-marking portion...
1351     HeapWord* top = hr->top();
1352     if (ntams < top) {
1353       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1354       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1355 
1356       // Note: if we're looking at the last region in heap - top
1357       // could be actually just beyond the end of the heap; end_idx
1358       // will then correspond to a (non-existent) card that is also
1359       // just beyond the heap.
1360       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1361         // end of object is not card aligned - increment to cover
1362         // all the cards spanned by the object
1363         end_idx += 1;
1364       }
1365       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1366 
1367       // This definitely means the region has live objects.
1368       set_bit_for_region(hr);
1369     }
1370 
1371     // Update the live region bitmap.
1372     if (marked_bytes > 0) {
1373       set_bit_for_region(hr);
1374     }
1375 
1376     // Set the marked bytes for the current region so that
1377     // it can be queried by a calling verification routine
1378     _region_marked_bytes = marked_bytes;
1379 
1380     return false;
1381   }
1382 
1383   size_t region_marked_bytes() const { return _region_marked_bytes; }
1384 };
1385 
1386 // Heap region closure used for verifying the counting data
1387 // that was accumulated concurrently and aggregated during
1388 // the remark pause. This closure is applied to the heap
1389 // regions during the STW cleanup pause.
1390 
1391 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1392   G1CollectedHeap* _g1h;
1393   ConcurrentMark* _cm;
1394   CalcLiveObjectsClosure _calc_cl;
1395   BitMap* _region_bm;   // Region BM to be verified
1396   BitMap* _card_bm;     // Card BM to be verified
1397   bool _verbose;        // verbose output?
1398 
1399   BitMap* _exp_region_bm; // Expected Region BM values
1400   BitMap* _exp_card_bm;   // Expected card BM values
1401 
1402   int _failures;
1403 
1404 public:
1405   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1406                                 BitMap* region_bm,
1407                                 BitMap* card_bm,
1408                                 BitMap* exp_region_bm,
1409                                 BitMap* exp_card_bm,
1410                                 bool verbose) :
1411     _g1h(g1h), _cm(g1h->concurrent_mark()),
1412     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1413     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1414     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1415     _failures(0) { }
1416 
1417   int failures() const { return _failures; }
1418 
1419   bool doHeapRegion(HeapRegion* hr) {
1420     if (hr->is_continues_humongous()) {
1421       // We will ignore these here and process them when their
1422       // associated "starts humongous" region is processed (see
1423       // set_bit_for_heap_region()). Note that we cannot rely on their
1424       // associated "starts humongous" region to have their bit set to
1425       // 1 since, due to the region chunking in the parallel region
1426       // iteration, a "continues humongous" region might be visited
1427       // before its associated "starts humongous".
1428       return false;
1429     }
1430 
1431     int failures = 0;
1432 
1433     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1434     // this region and set the corresponding bits in the expected region
1435     // and card bitmaps.
1436     bool res = _calc_cl.doHeapRegion(hr);
1437     assert(res == false, "should be continuing");
1438 
1439     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1440                     Mutex::_no_safepoint_check_flag);
1441 
1442     // Verify the marked bytes for this region.
1443     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1444     size_t act_marked_bytes = hr->next_marked_bytes();
1445 
1446     // We're not OK if expected marked bytes > actual marked bytes. It means
1447     // we have missed accounting some objects during the actual marking.
1448     if (exp_marked_bytes > act_marked_bytes) {
1449       if (_verbose) {
1450         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1451                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1452                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1453       }
1454       failures += 1;
1455     }
1456 
1457     // Verify the bit, for this region, in the actual and expected
1458     // (which was just calculated) region bit maps.
1459     // We're not OK if the bit in the calculated expected region
1460     // bitmap is set and the bit in the actual region bitmap is not.
1461     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1462 
1463     bool expected = _exp_region_bm->at(index);
1464     bool actual = _region_bm->at(index);
1465     if (expected && !actual) {
1466       if (_verbose) {
1467         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1468                                "expected: %s, actual: %s",
1469                                hr->hrm_index(),
1470                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1471       }
1472       failures += 1;
1473     }
1474 
1475     // Verify that the card bit maps for the cards spanned by the current
1476     // region match. We have an error if we have a set bit in the expected
1477     // bit map and the corresponding bit in the actual bitmap is not set.
1478 
1479     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1480     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1481 
1482     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1483       expected = _exp_card_bm->at(i);
1484       actual = _card_bm->at(i);
1485 
1486       if (expected && !actual) {
1487         if (_verbose) {
1488           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1489                                  "expected: %s, actual: %s",
1490                                  hr->hrm_index(), i,
1491                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1492         }
1493         failures += 1;
1494       }
1495     }
1496 
1497     if (failures > 0 && _verbose)  {
1498       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1499                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1500                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1501                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1502     }
1503 
1504     _failures += failures;
1505 
1506     // We could stop iteration over the heap when we
1507     // find the first violating region by returning true.
1508     return false;
1509   }
1510 };
1511 
1512 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1513 protected:
1514   G1CollectedHeap* _g1h;
1515   ConcurrentMark* _cm;
1516   BitMap* _actual_region_bm;
1517   BitMap* _actual_card_bm;
1518 
1519   uint    _n_workers;
1520 
1521   BitMap* _expected_region_bm;
1522   BitMap* _expected_card_bm;
1523 
1524   int  _failures;
1525   bool _verbose;
1526 
1527   HeapRegionClaimer _hrclaimer;
1528 
1529 public:
1530   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1531                             BitMap* region_bm, BitMap* card_bm,
1532                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1533     : AbstractGangTask("G1 verify final counting"),
1534       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1535       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1536       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1537       _failures(0), _verbose(false),
1538       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1539     assert(VerifyDuringGC, "don't call this otherwise");
1540     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1541     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1542 
1543     _verbose = _cm->verbose_medium();
1544   }
1545 
1546   void work(uint worker_id) {
1547     assert(worker_id < _n_workers, "invariant");
1548 
1549     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1550                                             _actual_region_bm, _actual_card_bm,
1551                                             _expected_region_bm,
1552                                             _expected_card_bm,
1553                                             _verbose);
1554 
1555     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1556 
1557     Atomic::add(verify_cl.failures(), &_failures);
1558   }
1559 
1560   int failures() const { return _failures; }
1561 };
1562 
1563 // Closure that finalizes the liveness counting data.
1564 // Used during the cleanup pause.
1565 // Sets the bits corresponding to the interval [NTAMS, top]
1566 // (which contains the implicitly live objects) in the
1567 // card liveness bitmap. Also sets the bit for each region,
1568 // containing live data, in the region liveness bitmap.
1569 
1570 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1571  public:
1572   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1573                               BitMap* region_bm,
1574                               BitMap* card_bm) :
1575     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1576 
1577   bool doHeapRegion(HeapRegion* hr) {
1578 
1579     if (hr->is_continues_humongous()) {
1580       // We will ignore these here and process them when their
1581       // associated "starts humongous" region is processed (see
1582       // set_bit_for_heap_region()). Note that we cannot rely on their
1583       // associated "starts humongous" region to have their bit set to
1584       // 1 since, due to the region chunking in the parallel region
1585       // iteration, a "continues humongous" region might be visited
1586       // before its associated "starts humongous".
1587       return false;
1588     }
1589 
1590     HeapWord* ntams = hr->next_top_at_mark_start();
1591     HeapWord* top   = hr->top();
1592 
1593     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1594 
1595     // Mark the allocated-since-marking portion...
1596     if (ntams < top) {
1597       // This definitely means the region has live objects.
1598       set_bit_for_region(hr);
1599 
1600       // Now set the bits in the card bitmap for [ntams, top)
1601       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1602       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1603 
1604       // Note: if we're looking at the last region in heap - top
1605       // could be actually just beyond the end of the heap; end_idx
1606       // will then correspond to a (non-existent) card that is also
1607       // just beyond the heap.
1608       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1609         // end of object is not card aligned - increment to cover
1610         // all the cards spanned by the object
1611         end_idx += 1;
1612       }
1613 
1614       assert(end_idx <= _card_bm->size(),
1615              err_msg("oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1616                      end_idx, _card_bm->size()));
1617       assert(start_idx < _card_bm->size(),
1618              err_msg("oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1619                      start_idx, _card_bm->size()));
1620 
1621       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1622     }
1623 
1624     // Set the bit for the region if it contains live data
1625     if (hr->next_marked_bytes() > 0) {
1626       set_bit_for_region(hr);
1627     }
1628 
1629     return false;
1630   }
1631 };
1632 
1633 class G1ParFinalCountTask: public AbstractGangTask {
1634 protected:
1635   G1CollectedHeap* _g1h;
1636   ConcurrentMark* _cm;
1637   BitMap* _actual_region_bm;
1638   BitMap* _actual_card_bm;
1639 
1640   uint    _n_workers;
1641   HeapRegionClaimer _hrclaimer;
1642 
1643 public:
1644   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1645     : AbstractGangTask("G1 final counting"),
1646       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1647       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1648       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1649   }
1650 
1651   void work(uint worker_id) {
1652     assert(worker_id < _n_workers, "invariant");
1653 
1654     FinalCountDataUpdateClosure final_update_cl(_g1h,
1655                                                 _actual_region_bm,
1656                                                 _actual_card_bm);
1657 
1658     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1659   }
1660 };
1661 
1662 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1663   G1CollectedHeap* _g1;
1664   size_t _freed_bytes;
1665   FreeRegionList* _local_cleanup_list;
1666   HeapRegionSetCount _old_regions_removed;
1667   HeapRegionSetCount _humongous_regions_removed;
1668   HRRSCleanupTask* _hrrs_cleanup_task;
1669 
1670 public:
1671   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1672                              FreeRegionList* local_cleanup_list,
1673                              HRRSCleanupTask* hrrs_cleanup_task) :
1674     _g1(g1),
1675     _freed_bytes(0),
1676     _local_cleanup_list(local_cleanup_list),
1677     _old_regions_removed(),
1678     _humongous_regions_removed(),
1679     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1680 
1681   size_t freed_bytes() { return _freed_bytes; }
1682   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1683   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1684 
1685   bool doHeapRegion(HeapRegion *hr) {
1686     if (hr->is_continues_humongous() || hr->is_archive()) {
1687       return false;
1688     }
1689     // We use a claim value of zero here because all regions
1690     // were claimed with value 1 in the FinalCount task.
1691     _g1->reset_gc_time_stamps(hr);
1692     hr->note_end_of_marking();
1693 
1694     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1695       _freed_bytes += hr->used();
1696       hr->set_containing_set(NULL);
1697       if (hr->is_humongous()) {
1698         assert(hr->is_starts_humongous(), "we should only see starts humongous");
1699         _humongous_regions_removed.increment(1u, hr->capacity());
1700         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1701       } else {
1702         _old_regions_removed.increment(1u, hr->capacity());
1703         _g1->free_region(hr, _local_cleanup_list, true);
1704       }
1705     } else {
1706       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1707     }
1708 
1709     return false;
1710   }
1711 };
1712 
1713 class G1ParNoteEndTask: public AbstractGangTask {
1714   friend class G1NoteEndOfConcMarkClosure;
1715 
1716 protected:
1717   G1CollectedHeap* _g1h;
1718   FreeRegionList* _cleanup_list;
1719   HeapRegionClaimer _hrclaimer;
1720 
1721 public:
1722   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1723       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1724   }
1725 
1726   void work(uint worker_id) {
1727     FreeRegionList local_cleanup_list("Local Cleanup List");
1728     HRRSCleanupTask hrrs_cleanup_task;
1729     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1730                                            &hrrs_cleanup_task);
1731     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1732     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1733 
1734     // Now update the lists
1735     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1736     {
1737       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1738       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1739 
1740       // If we iterate over the global cleanup list at the end of
1741       // cleanup to do this printing we will not guarantee to only
1742       // generate output for the newly-reclaimed regions (the list
1743       // might not be empty at the beginning of cleanup; we might
1744       // still be working on its previous contents). So we do the
1745       // printing here, before we append the new regions to the global
1746       // cleanup list.
1747 
1748       G1HRPrinter* hr_printer = _g1h->hr_printer();
1749       if (hr_printer->is_active()) {
1750         FreeRegionListIterator iter(&local_cleanup_list);
1751         while (iter.more_available()) {
1752           HeapRegion* hr = iter.get_next();
1753           hr_printer->cleanup(hr);
1754         }
1755       }
1756 
1757       _cleanup_list->add_ordered(&local_cleanup_list);
1758       assert(local_cleanup_list.is_empty(), "post-condition");
1759 
1760       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1761     }
1762   }
1763 };
1764 
1765 class G1ParScrubRemSetTask: public AbstractGangTask {
1766 protected:
1767   G1RemSet* _g1rs;
1768   BitMap* _region_bm;
1769   BitMap* _card_bm;
1770   HeapRegionClaimer _hrclaimer;
1771 
1772 public:
1773   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1774       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1775   }
1776 
1777   void work(uint worker_id) {
1778     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1779   }
1780 
1781 };
1782 
1783 void ConcurrentMark::cleanup() {
1784   // world is stopped at this checkpoint
1785   assert(SafepointSynchronize::is_at_safepoint(),
1786          "world should be stopped");
1787   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1788 
1789   // If a full collection has happened, we shouldn't do this.
1790   if (has_aborted()) {
1791     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1792     return;
1793   }
1794 
1795   g1h->verify_region_sets_optional();
1796 
1797   if (VerifyDuringGC) {
1798     HandleMark hm;  // handle scope
1799     g1h->prepare_for_verify();
1800     Universe::verify(VerifyOption_G1UsePrevMarking,
1801                      " VerifyDuringGC:(before)");
1802   }
1803   g1h->check_bitmaps("Cleanup Start");
1804 
1805   G1CollectorPolicy* g1p = g1h->g1_policy();
1806   g1p->record_concurrent_mark_cleanup_start();
1807 
1808   double start = os::elapsedTime();
1809 
1810   HeapRegionRemSet::reset_for_cleanup_tasks();
1811 
1812   // Do counting once more with the world stopped for good measure.
1813   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1814 
1815   g1h->workers()->run_task(&g1_par_count_task);
1816 
1817   if (VerifyDuringGC) {
1818     // Verify that the counting data accumulated during marking matches
1819     // that calculated by walking the marking bitmap.
1820 
1821     // Bitmaps to hold expected values
1822     BitMap expected_region_bm(_region_bm.size(), true);
1823     BitMap expected_card_bm(_card_bm.size(), true);
1824 
1825     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1826                                                  &_region_bm,
1827                                                  &_card_bm,
1828                                                  &expected_region_bm,
1829                                                  &expected_card_bm);
1830 
1831     g1h->workers()->run_task(&g1_par_verify_task);
1832 
1833     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1834   }
1835 
1836   size_t start_used_bytes = g1h->used();
1837   g1h->collector_state()->set_mark_in_progress(false);
1838 
1839   double count_end = os::elapsedTime();
1840   double this_final_counting_time = (count_end - start);
1841   _total_counting_time += this_final_counting_time;
1842 
1843   if (G1PrintRegionLivenessInfo) {
1844     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1845     _g1h->heap_region_iterate(&cl);
1846   }
1847 
1848   // Install newly created mark bitMap as "prev".
1849   swapMarkBitMaps();
1850 
1851   g1h->reset_gc_time_stamp();
1852 
1853   uint n_workers = _g1h->workers()->active_workers();
1854 
1855   // Note end of marking in all heap regions.
1856   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1857   g1h->workers()->run_task(&g1_par_note_end_task);
1858   g1h->check_gc_time_stamps();
1859 
1860   if (!cleanup_list_is_empty()) {
1861     // The cleanup list is not empty, so we'll have to process it
1862     // concurrently. Notify anyone else that might be wanting free
1863     // regions that there will be more free regions coming soon.
1864     g1h->set_free_regions_coming();
1865   }
1866 
1867   // call below, since it affects the metric by which we sort the heap
1868   // regions.
1869   if (G1ScrubRemSets) {
1870     double rs_scrub_start = os::elapsedTime();
1871     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1872     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1873 
1874     double rs_scrub_end = os::elapsedTime();
1875     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1876     _total_rs_scrub_time += this_rs_scrub_time;
1877   }
1878 
1879   // this will also free any regions totally full of garbage objects,
1880   // and sort the regions.
1881   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1882 
1883   // Statistics.
1884   double end = os::elapsedTime();
1885   _cleanup_times.add((end - start) * 1000.0);
1886 
1887   if (G1Log::fine()) {
1888     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1889   }
1890 
1891   // Clean up will have freed any regions completely full of garbage.
1892   // Update the soft reference policy with the new heap occupancy.
1893   Universe::update_heap_info_at_gc();
1894 
1895   if (VerifyDuringGC) {
1896     HandleMark hm;  // handle scope
1897     g1h->prepare_for_verify();
1898     Universe::verify(VerifyOption_G1UsePrevMarking,
1899                      " VerifyDuringGC:(after)");
1900   }
1901 
1902   g1h->check_bitmaps("Cleanup End");
1903 
1904   g1h->verify_region_sets_optional();
1905 
1906   // We need to make this be a "collection" so any collection pause that
1907   // races with it goes around and waits for completeCleanup to finish.
1908   g1h->increment_total_collections();
1909 
1910   // Clean out dead classes and update Metaspace sizes.
1911   if (ClassUnloadingWithConcurrentMark) {
1912     ClassLoaderDataGraph::purge();
1913   }
1914   MetaspaceGC::compute_new_size();
1915 
1916   // We reclaimed old regions so we should calculate the sizes to make
1917   // sure we update the old gen/space data.
1918   g1h->g1mm()->update_sizes();
1919   g1h->allocation_context_stats().update_after_mark();
1920 
1921   g1h->trace_heap_after_concurrent_cycle();
1922 }
1923 
1924 void ConcurrentMark::completeCleanup() {
1925   if (has_aborted()) return;
1926 
1927   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1928 
1929   _cleanup_list.verify_optional();
1930   FreeRegionList tmp_free_list("Tmp Free List");
1931 
1932   if (G1ConcRegionFreeingVerbose) {
1933     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1934                            "cleanup list has %u entries",
1935                            _cleanup_list.length());
1936   }
1937 
1938   // No one else should be accessing the _cleanup_list at this point,
1939   // so it is not necessary to take any locks
1940   while (!_cleanup_list.is_empty()) {
1941     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1942     assert(hr != NULL, "Got NULL from a non-empty list");
1943     hr->par_clear();
1944     tmp_free_list.add_ordered(hr);
1945 
1946     // Instead of adding one region at a time to the secondary_free_list,
1947     // we accumulate them in the local list and move them a few at a
1948     // time. This also cuts down on the number of notify_all() calls
1949     // we do during this process. We'll also append the local list when
1950     // _cleanup_list is empty (which means we just removed the last
1951     // region from the _cleanup_list).
1952     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1953         _cleanup_list.is_empty()) {
1954       if (G1ConcRegionFreeingVerbose) {
1955         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1956                                "appending %u entries to the secondary_free_list, "
1957                                "cleanup list still has %u entries",
1958                                tmp_free_list.length(),
1959                                _cleanup_list.length());
1960       }
1961 
1962       {
1963         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1964         g1h->secondary_free_list_add(&tmp_free_list);
1965         SecondaryFreeList_lock->notify_all();
1966       }
1967 #ifndef PRODUCT
1968       if (G1StressConcRegionFreeing) {
1969         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1970           os::sleep(Thread::current(), (jlong) 1, false);
1971         }
1972       }
1973 #endif
1974     }
1975   }
1976   assert(tmp_free_list.is_empty(), "post-condition");
1977 }
1978 
1979 // Supporting Object and Oop closures for reference discovery
1980 // and processing in during marking
1981 
1982 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1983   HeapWord* addr = (HeapWord*)obj;
1984   return addr != NULL &&
1985          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1986 }
1987 
1988 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1989 // Uses the CMTask associated with a worker thread (for serial reference
1990 // processing the CMTask for worker 0 is used) to preserve (mark) and
1991 // trace referent objects.
1992 //
1993 // Using the CMTask and embedded local queues avoids having the worker
1994 // threads operating on the global mark stack. This reduces the risk
1995 // of overflowing the stack - which we would rather avoid at this late
1996 // state. Also using the tasks' local queues removes the potential
1997 // of the workers interfering with each other that could occur if
1998 // operating on the global stack.
1999 
2000 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2001   ConcurrentMark* _cm;
2002   CMTask*         _task;
2003   int             _ref_counter_limit;
2004   int             _ref_counter;
2005   bool            _is_serial;
2006  public:
2007   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2008     _cm(cm), _task(task), _is_serial(is_serial),
2009     _ref_counter_limit(G1RefProcDrainInterval) {
2010     assert(_ref_counter_limit > 0, "sanity");
2011     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2012     _ref_counter = _ref_counter_limit;
2013   }
2014 
2015   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2016   virtual void do_oop(      oop* p) { do_oop_work(p); }
2017 
2018   template <class T> void do_oop_work(T* p) {
2019     if (!_cm->has_overflown()) {
2020       oop obj = oopDesc::load_decode_heap_oop(p);
2021       if (_cm->verbose_high()) {
2022         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2023                                "*" PTR_FORMAT " = " PTR_FORMAT,
2024                                _task->worker_id(), p2i(p), p2i((void*) obj));
2025       }
2026 
2027       _task->deal_with_reference(obj);
2028       _ref_counter--;
2029 
2030       if (_ref_counter == 0) {
2031         // We have dealt with _ref_counter_limit references, pushing them
2032         // and objects reachable from them on to the local stack (and
2033         // possibly the global stack). Call CMTask::do_marking_step() to
2034         // process these entries.
2035         //
2036         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2037         // there's nothing more to do (i.e. we're done with the entries that
2038         // were pushed as a result of the CMTask::deal_with_reference() calls
2039         // above) or we overflow.
2040         //
2041         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2042         // flag while there may still be some work to do. (See the comment at
2043         // the beginning of CMTask::do_marking_step() for those conditions -
2044         // one of which is reaching the specified time target.) It is only
2045         // when CMTask::do_marking_step() returns without setting the
2046         // has_aborted() flag that the marking step has completed.
2047         do {
2048           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2049           _task->do_marking_step(mark_step_duration_ms,
2050                                  false      /* do_termination */,
2051                                  _is_serial);
2052         } while (_task->has_aborted() && !_cm->has_overflown());
2053         _ref_counter = _ref_counter_limit;
2054       }
2055     } else {
2056       if (_cm->verbose_high()) {
2057          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2058       }
2059     }
2060   }
2061 };
2062 
2063 // 'Drain' oop closure used by both serial and parallel reference processing.
2064 // Uses the CMTask associated with a given worker thread (for serial
2065 // reference processing the CMtask for worker 0 is used). Calls the
2066 // do_marking_step routine, with an unbelievably large timeout value,
2067 // to drain the marking data structures of the remaining entries
2068 // added by the 'keep alive' oop closure above.
2069 
2070 class G1CMDrainMarkingStackClosure: public VoidClosure {
2071   ConcurrentMark* _cm;
2072   CMTask*         _task;
2073   bool            _is_serial;
2074  public:
2075   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2076     _cm(cm), _task(task), _is_serial(is_serial) {
2077     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2078   }
2079 
2080   void do_void() {
2081     do {
2082       if (_cm->verbose_high()) {
2083         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2084                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2085       }
2086 
2087       // We call CMTask::do_marking_step() to completely drain the local
2088       // and global marking stacks of entries pushed by the 'keep alive'
2089       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2090       //
2091       // CMTask::do_marking_step() is called in a loop, which we'll exit
2092       // if there's nothing more to do (i.e. we've completely drained the
2093       // entries that were pushed as a a result of applying the 'keep alive'
2094       // closure to the entries on the discovered ref lists) or we overflow
2095       // the global marking stack.
2096       //
2097       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2098       // flag while there may still be some work to do. (See the comment at
2099       // the beginning of CMTask::do_marking_step() for those conditions -
2100       // one of which is reaching the specified time target.) It is only
2101       // when CMTask::do_marking_step() returns without setting the
2102       // has_aborted() flag that the marking step has completed.
2103 
2104       _task->do_marking_step(1000000000.0 /* something very large */,
2105                              true         /* do_termination */,
2106                              _is_serial);
2107     } while (_task->has_aborted() && !_cm->has_overflown());
2108   }
2109 };
2110 
2111 // Implementation of AbstractRefProcTaskExecutor for parallel
2112 // reference processing at the end of G1 concurrent marking
2113 
2114 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2115 private:
2116   G1CollectedHeap* _g1h;
2117   ConcurrentMark*  _cm;
2118   WorkGang*        _workers;
2119   uint             _active_workers;
2120 
2121 public:
2122   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2123                           ConcurrentMark* cm,
2124                           WorkGang* workers,
2125                           uint n_workers) :
2126     _g1h(g1h), _cm(cm),
2127     _workers(workers), _active_workers(n_workers) { }
2128 
2129   // Executes the given task using concurrent marking worker threads.
2130   virtual void execute(ProcessTask& task);
2131   virtual void execute(EnqueueTask& task);
2132 };
2133 
2134 class G1CMRefProcTaskProxy: public AbstractGangTask {
2135   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2136   ProcessTask&     _proc_task;
2137   G1CollectedHeap* _g1h;
2138   ConcurrentMark*  _cm;
2139 
2140 public:
2141   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2142                      G1CollectedHeap* g1h,
2143                      ConcurrentMark* cm) :
2144     AbstractGangTask("Process reference objects in parallel"),
2145     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2146     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2147     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2148   }
2149 
2150   virtual void work(uint worker_id) {
2151     ResourceMark rm;
2152     HandleMark hm;
2153     CMTask* task = _cm->task(worker_id);
2154     G1CMIsAliveClosure g1_is_alive(_g1h);
2155     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2156     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2157 
2158     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2159   }
2160 };
2161 
2162 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2163   assert(_workers != NULL, "Need parallel worker threads.");
2164   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2165 
2166   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2167 
2168   // We need to reset the concurrency level before each
2169   // proxy task execution, so that the termination protocol
2170   // and overflow handling in CMTask::do_marking_step() knows
2171   // how many workers to wait for.
2172   _cm->set_concurrency(_active_workers);
2173   _workers->run_task(&proc_task_proxy);
2174 }
2175 
2176 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2177   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2178   EnqueueTask& _enq_task;
2179 
2180 public:
2181   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2182     AbstractGangTask("Enqueue reference objects in parallel"),
2183     _enq_task(enq_task) { }
2184 
2185   virtual void work(uint worker_id) {
2186     _enq_task.work(worker_id);
2187   }
2188 };
2189 
2190 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2191   assert(_workers != NULL, "Need parallel worker threads.");
2192   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2193 
2194   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2195 
2196   // Not strictly necessary but...
2197   //
2198   // We need to reset the concurrency level before each
2199   // proxy task execution, so that the termination protocol
2200   // and overflow handling in CMTask::do_marking_step() knows
2201   // how many workers to wait for.
2202   _cm->set_concurrency(_active_workers);
2203   _workers->run_task(&enq_task_proxy);
2204 }
2205 
2206 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2207   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2208 }
2209 
2210 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2211   if (has_overflown()) {
2212     // Skip processing the discovered references if we have
2213     // overflown the global marking stack. Reference objects
2214     // only get discovered once so it is OK to not
2215     // de-populate the discovered reference lists. We could have,
2216     // but the only benefit would be that, when marking restarts,
2217     // less reference objects are discovered.
2218     return;
2219   }
2220 
2221   ResourceMark rm;
2222   HandleMark   hm;
2223 
2224   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2225 
2226   // Is alive closure.
2227   G1CMIsAliveClosure g1_is_alive(g1h);
2228 
2229   // Inner scope to exclude the cleaning of the string and symbol
2230   // tables from the displayed time.
2231   {
2232     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2233 
2234     ReferenceProcessor* rp = g1h->ref_processor_cm();
2235 
2236     // See the comment in G1CollectedHeap::ref_processing_init()
2237     // about how reference processing currently works in G1.
2238 
2239     // Set the soft reference policy
2240     rp->setup_policy(clear_all_soft_refs);
2241     assert(_markStack.isEmpty(), "mark stack should be empty");
2242 
2243     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2244     // in serial reference processing. Note these closures are also
2245     // used for serially processing (by the the current thread) the
2246     // JNI references during parallel reference processing.
2247     //
2248     // These closures do not need to synchronize with the worker
2249     // threads involved in parallel reference processing as these
2250     // instances are executed serially by the current thread (e.g.
2251     // reference processing is not multi-threaded and is thus
2252     // performed by the current thread instead of a gang worker).
2253     //
2254     // The gang tasks involved in parallel reference processing create
2255     // their own instances of these closures, which do their own
2256     // synchronization among themselves.
2257     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2258     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2259 
2260     // We need at least one active thread. If reference processing
2261     // is not multi-threaded we use the current (VMThread) thread,
2262     // otherwise we use the work gang from the G1CollectedHeap and
2263     // we utilize all the worker threads we can.
2264     bool processing_is_mt = rp->processing_is_mt();
2265     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2266     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2267 
2268     // Parallel processing task executor.
2269     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2270                                               g1h->workers(), active_workers);
2271     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2272 
2273     // Set the concurrency level. The phase was already set prior to
2274     // executing the remark task.
2275     set_concurrency(active_workers);
2276 
2277     // Set the degree of MT processing here.  If the discovery was done MT,
2278     // the number of threads involved during discovery could differ from
2279     // the number of active workers.  This is OK as long as the discovered
2280     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2281     rp->set_active_mt_degree(active_workers);
2282 
2283     // Process the weak references.
2284     const ReferenceProcessorStats& stats =
2285         rp->process_discovered_references(&g1_is_alive,
2286                                           &g1_keep_alive,
2287                                           &g1_drain_mark_stack,
2288                                           executor,
2289                                           g1h->gc_timer_cm(),
2290                                           concurrent_gc_id());
2291     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2292 
2293     // The do_oop work routines of the keep_alive and drain_marking_stack
2294     // oop closures will set the has_overflown flag if we overflow the
2295     // global marking stack.
2296 
2297     assert(_markStack.overflow() || _markStack.isEmpty(),
2298             "mark stack should be empty (unless it overflowed)");
2299 
2300     if (_markStack.overflow()) {
2301       // This should have been done already when we tried to push an
2302       // entry on to the global mark stack. But let's do it again.
2303       set_has_overflown();
2304     }
2305 
2306     assert(rp->num_q() == active_workers, "why not");
2307 
2308     rp->enqueue_discovered_references(executor);
2309 
2310     rp->verify_no_references_recorded();
2311     assert(!rp->discovery_enabled(), "Post condition");
2312   }
2313 
2314   if (has_overflown()) {
2315     // We can not trust g1_is_alive if the marking stack overflowed
2316     return;
2317   }
2318 
2319   assert(_markStack.isEmpty(), "Marking should have completed");
2320 
2321   // Unload Klasses, String, Symbols, Code Cache, etc.
2322   {
2323     G1CMTraceTime trace("Unloading", G1Log::finer());
2324 
2325     if (ClassUnloadingWithConcurrentMark) {
2326       bool purged_classes;
2327 
2328       {
2329         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2330         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2331       }
2332 
2333       {
2334         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2335         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2336       }
2337     }
2338 
2339     if (G1StringDedup::is_enabled()) {
2340       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2341       G1StringDedup::unlink(&g1_is_alive);
2342     }
2343   }
2344 }
2345 
2346 void ConcurrentMark::swapMarkBitMaps() {
2347   CMBitMapRO* temp = _prevMarkBitMap;
2348   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2349   _nextMarkBitMap  = (CMBitMap*)  temp;
2350 }
2351 
2352 // Closure for marking entries in SATB buffers.
2353 class CMSATBBufferClosure : public SATBBufferClosure {
2354 private:
2355   CMTask* _task;
2356   G1CollectedHeap* _g1h;
2357 
2358   // This is very similar to CMTask::deal_with_reference, but with
2359   // more relaxed requirements for the argument, so this must be more
2360   // circumspect about treating the argument as an object.
2361   void do_entry(void* entry) const {
2362     _task->increment_refs_reached();
2363     HeapRegion* hr = _g1h->heap_region_containing_raw(entry);
2364     if (entry < hr->next_top_at_mark_start()) {
2365       // Until we get here, we don't know whether entry refers to a valid
2366       // object; it could instead have been a stale reference.
2367       oop obj = static_cast<oop>(entry);
2368       assert(obj->is_oop(true /* ignore mark word */),
2369              err_msg("Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj)));
2370       _task->make_reference_grey(obj, hr);
2371     }
2372   }
2373 
2374 public:
2375   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2376     : _task(task), _g1h(g1h) { }
2377 
2378   virtual void do_buffer(void** buffer, size_t size) {
2379     for (size_t i = 0; i < size; ++i) {
2380       do_entry(buffer[i]);
2381     }
2382   }
2383 };
2384 
2385 class G1RemarkThreadsClosure : public ThreadClosure {
2386   CMSATBBufferClosure _cm_satb_cl;
2387   G1CMOopClosure _cm_cl;
2388   MarkingCodeBlobClosure _code_cl;
2389   int _thread_parity;
2390 
2391  public:
2392   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2393     _cm_satb_cl(task, g1h),
2394     _cm_cl(g1h, g1h->concurrent_mark(), task),
2395     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2396     _thread_parity(Threads::thread_claim_parity()) {}
2397 
2398   void do_thread(Thread* thread) {
2399     if (thread->is_Java_thread()) {
2400       if (thread->claim_oops_do(true, _thread_parity)) {
2401         JavaThread* jt = (JavaThread*)thread;
2402 
2403         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2404         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2405         // * Alive if on the stack of an executing method
2406         // * Weakly reachable otherwise
2407         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2408         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2409         jt->nmethods_do(&_code_cl);
2410 
2411         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2412       }
2413     } else if (thread->is_VM_thread()) {
2414       if (thread->claim_oops_do(true, _thread_parity)) {
2415         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2416       }
2417     }
2418   }
2419 };
2420 
2421 class CMRemarkTask: public AbstractGangTask {
2422 private:
2423   ConcurrentMark* _cm;
2424 public:
2425   void work(uint worker_id) {
2426     // Since all available tasks are actually started, we should
2427     // only proceed if we're supposed to be active.
2428     if (worker_id < _cm->active_tasks()) {
2429       CMTask* task = _cm->task(worker_id);
2430       task->record_start_time();
2431       {
2432         ResourceMark rm;
2433         HandleMark hm;
2434 
2435         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2436         Threads::threads_do(&threads_f);
2437       }
2438 
2439       do {
2440         task->do_marking_step(1000000000.0 /* something very large */,
2441                               true         /* do_termination       */,
2442                               false        /* is_serial            */);
2443       } while (task->has_aborted() && !_cm->has_overflown());
2444       // If we overflow, then we do not want to restart. We instead
2445       // want to abort remark and do concurrent marking again.
2446       task->record_end_time();
2447     }
2448   }
2449 
2450   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2451     AbstractGangTask("Par Remark"), _cm(cm) {
2452     _cm->terminator()->reset_for_reuse(active_workers);
2453   }
2454 };
2455 
2456 void ConcurrentMark::checkpointRootsFinalWork() {
2457   ResourceMark rm;
2458   HandleMark   hm;
2459   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2460 
2461   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2462 
2463   g1h->ensure_parsability(false);
2464 
2465   // this is remark, so we'll use up all active threads
2466   uint active_workers = g1h->workers()->active_workers();
2467   set_concurrency_and_phase(active_workers, false /* concurrent */);
2468   // Leave _parallel_marking_threads at it's
2469   // value originally calculated in the ConcurrentMark
2470   // constructor and pass values of the active workers
2471   // through the gang in the task.
2472 
2473   {
2474     StrongRootsScope srs(active_workers);
2475 
2476     CMRemarkTask remarkTask(this, active_workers);
2477     // We will start all available threads, even if we decide that the
2478     // active_workers will be fewer. The extra ones will just bail out
2479     // immediately.
2480     g1h->workers()->run_task(&remarkTask);
2481   }
2482 
2483   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2484   guarantee(has_overflown() ||
2485             satb_mq_set.completed_buffers_num() == 0,
2486             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2487                     BOOL_TO_STR(has_overflown()),
2488                     satb_mq_set.completed_buffers_num()));
2489 
2490   print_stats();
2491 }
2492 
2493 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2494   // Note we are overriding the read-only view of the prev map here, via
2495   // the cast.
2496   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2497 }
2498 
2499 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2500   _nextMarkBitMap->clearRange(mr);
2501 }
2502 
2503 HeapRegion*
2504 ConcurrentMark::claim_region(uint worker_id) {
2505   // "checkpoint" the finger
2506   HeapWord* finger = _finger;
2507 
2508   // _heap_end will not change underneath our feet; it only changes at
2509   // yield points.
2510   while (finger < _heap_end) {
2511     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2512 
2513     // Note on how this code handles humongous regions. In the
2514     // normal case the finger will reach the start of a "starts
2515     // humongous" (SH) region. Its end will either be the end of the
2516     // last "continues humongous" (CH) region in the sequence, or the
2517     // standard end of the SH region (if the SH is the only region in
2518     // the sequence). That way claim_region() will skip over the CH
2519     // regions. However, there is a subtle race between a CM thread
2520     // executing this method and a mutator thread doing a humongous
2521     // object allocation. The two are not mutually exclusive as the CM
2522     // thread does not need to hold the Heap_lock when it gets
2523     // here. So there is a chance that claim_region() will come across
2524     // a free region that's in the progress of becoming a SH or a CH
2525     // region. In the former case, it will either
2526     //   a) Miss the update to the region's end, in which case it will
2527     //      visit every subsequent CH region, will find their bitmaps
2528     //      empty, and do nothing, or
2529     //   b) Will observe the update of the region's end (in which case
2530     //      it will skip the subsequent CH regions).
2531     // If it comes across a region that suddenly becomes CH, the
2532     // scenario will be similar to b). So, the race between
2533     // claim_region() and a humongous object allocation might force us
2534     // to do a bit of unnecessary work (due to some unnecessary bitmap
2535     // iterations) but it should not introduce and correctness issues.
2536     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2537 
2538     // Above heap_region_containing_raw may return NULL as we always scan claim
2539     // until the end of the heap. In this case, just jump to the next region.
2540     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2541 
2542     // Is the gap between reading the finger and doing the CAS too long?
2543     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2544     if (res == finger && curr_region != NULL) {
2545       // we succeeded
2546       HeapWord*   bottom        = curr_region->bottom();
2547       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2548 
2549       if (verbose_low()) {
2550         gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
2551                                "[" PTR_FORMAT ", " PTR_FORMAT "), "
2552                                "limit = " PTR_FORMAT,
2553                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2554       }
2555 
2556       // notice that _finger == end cannot be guaranteed here since,
2557       // someone else might have moved the finger even further
2558       assert(_finger >= end, "the finger should have moved forward");
2559 
2560       if (verbose_low()) {
2561         gclog_or_tty->print_cr("[%u] we were successful with region = "
2562                                PTR_FORMAT, worker_id, p2i(curr_region));
2563       }
2564 
2565       if (limit > bottom) {
2566         if (verbose_low()) {
2567           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
2568                                  "returning it ", worker_id, p2i(curr_region));
2569         }
2570         return curr_region;
2571       } else {
2572         assert(limit == bottom,
2573                "the region limit should be at bottom");
2574         if (verbose_low()) {
2575           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
2576                                  "returning NULL", worker_id, p2i(curr_region));
2577         }
2578         // we return NULL and the caller should try calling
2579         // claim_region() again.
2580         return NULL;
2581       }
2582     } else {
2583       assert(_finger > finger, "the finger should have moved forward");
2584       if (verbose_low()) {
2585         if (curr_region == NULL) {
2586           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2587                                  "global finger = " PTR_FORMAT ", "
2588                                  "our finger = " PTR_FORMAT,
2589                                  worker_id, p2i(_finger), p2i(finger));
2590         } else {
2591           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2592                                  "global finger = " PTR_FORMAT ", "
2593                                  "our finger = " PTR_FORMAT,
2594                                  worker_id, p2i(_finger), p2i(finger));
2595         }
2596       }
2597 
2598       // read it again
2599       finger = _finger;
2600     }
2601   }
2602 
2603   return NULL;
2604 }
2605 
2606 #ifndef PRODUCT
2607 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2608 private:
2609   G1CollectedHeap* _g1h;
2610   const char* _phase;
2611   int _info;
2612 
2613 public:
2614   VerifyNoCSetOops(const char* phase, int info = -1) :
2615     _g1h(G1CollectedHeap::heap()),
2616     _phase(phase),
2617     _info(info)
2618   { }
2619 
2620   void operator()(oop obj) const {
2621     guarantee(obj->is_oop(),
2622               err_msg("Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2623                       p2i(obj), _phase, _info));
2624     guarantee(!_g1h->obj_in_cs(obj),
2625               err_msg("obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2626                       p2i(obj), _phase, _info));
2627   }
2628 };
2629 
2630 void ConcurrentMark::verify_no_cset_oops() {
2631   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2632   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2633     return;
2634   }
2635 
2636   // Verify entries on the global mark stack
2637   _markStack.iterate(VerifyNoCSetOops("Stack"));
2638 
2639   // Verify entries on the task queues
2640   for (uint i = 0; i < _max_worker_id; ++i) {
2641     CMTaskQueue* queue = _task_queues->queue(i);
2642     queue->iterate(VerifyNoCSetOops("Queue", i));
2643   }
2644 
2645   // Verify the global finger
2646   HeapWord* global_finger = finger();
2647   if (global_finger != NULL && global_finger < _heap_end) {
2648     // The global finger always points to a heap region boundary. We
2649     // use heap_region_containing_raw() to get the containing region
2650     // given that the global finger could be pointing to a free region
2651     // which subsequently becomes continues humongous. If that
2652     // happens, heap_region_containing() will return the bottom of the
2653     // corresponding starts humongous region and the check below will
2654     // not hold any more.
2655     // Since we always iterate over all regions, we might get a NULL HeapRegion
2656     // here.
2657     HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2658     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2659               err_msg("global finger: " PTR_FORMAT " region: " HR_FORMAT,
2660                       p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
2661   }
2662 
2663   // Verify the task fingers
2664   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2665   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2666     CMTask* task = _tasks[i];
2667     HeapWord* task_finger = task->finger();
2668     if (task_finger != NULL && task_finger < _heap_end) {
2669       // See above note on the global finger verification.
2670       HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2671       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2672                 !task_hr->in_collection_set(),
2673                 err_msg("task finger: " PTR_FORMAT " region: " HR_FORMAT,
2674                         p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
2675     }
2676   }
2677 }
2678 #endif // PRODUCT
2679 
2680 // Aggregate the counting data that was constructed concurrently
2681 // with marking.
2682 class AggregateCountDataHRClosure: public HeapRegionClosure {
2683   G1CollectedHeap* _g1h;
2684   ConcurrentMark* _cm;
2685   CardTableModRefBS* _ct_bs;
2686   BitMap* _cm_card_bm;
2687   uint _max_worker_id;
2688 
2689  public:
2690   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2691                               BitMap* cm_card_bm,
2692                               uint max_worker_id) :
2693     _g1h(g1h), _cm(g1h->concurrent_mark()),
2694     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2695     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2696 
2697   bool doHeapRegion(HeapRegion* hr) {
2698     if (hr->is_continues_humongous()) {
2699       // We will ignore these here and process them when their
2700       // associated "starts humongous" region is processed.
2701       // Note that we cannot rely on their associated
2702       // "starts humongous" region to have their bit set to 1
2703       // since, due to the region chunking in the parallel region
2704       // iteration, a "continues humongous" region might be visited
2705       // before its associated "starts humongous".
2706       return false;
2707     }
2708 
2709     HeapWord* start = hr->bottom();
2710     HeapWord* limit = hr->next_top_at_mark_start();
2711     HeapWord* end = hr->end();
2712 
2713     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2714            err_msg("Preconditions not met - "
2715                    "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2716                    "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2717                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
2718 
2719     assert(hr->next_marked_bytes() == 0, "Precondition");
2720 
2721     if (start == limit) {
2722       // NTAMS of this region has not been set so nothing to do.
2723       return false;
2724     }
2725 
2726     // 'start' should be in the heap.
2727     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2728     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2729     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2730 
2731     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2732     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2733     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2734 
2735     // If ntams is not card aligned then we bump card bitmap index
2736     // for limit so that we get the all the cards spanned by
2737     // the object ending at ntams.
2738     // Note: if this is the last region in the heap then ntams
2739     // could be actually just beyond the end of the the heap;
2740     // limit_idx will then  correspond to a (non-existent) card
2741     // that is also outside the heap.
2742     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2743       limit_idx += 1;
2744     }
2745 
2746     assert(limit_idx <= end_idx, "or else use atomics");
2747 
2748     // Aggregate the "stripe" in the count data associated with hr.
2749     uint hrm_index = hr->hrm_index();
2750     size_t marked_bytes = 0;
2751 
2752     for (uint i = 0; i < _max_worker_id; i += 1) {
2753       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2754       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2755 
2756       // Fetch the marked_bytes in this region for task i and
2757       // add it to the running total for this region.
2758       marked_bytes += marked_bytes_array[hrm_index];
2759 
2760       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2761       // into the global card bitmap.
2762       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2763 
2764       while (scan_idx < limit_idx) {
2765         assert(task_card_bm->at(scan_idx) == true, "should be");
2766         _cm_card_bm->set_bit(scan_idx);
2767         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2768 
2769         // BitMap::get_next_one_offset() can handle the case when
2770         // its left_offset parameter is greater than its right_offset
2771         // parameter. It does, however, have an early exit if
2772         // left_offset == right_offset. So let's limit the value
2773         // passed in for left offset here.
2774         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2775         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2776       }
2777     }
2778 
2779     // Update the marked bytes for this region.
2780     hr->add_to_marked_bytes(marked_bytes);
2781 
2782     // Next heap region
2783     return false;
2784   }
2785 };
2786 
2787 class G1AggregateCountDataTask: public AbstractGangTask {
2788 protected:
2789   G1CollectedHeap* _g1h;
2790   ConcurrentMark* _cm;
2791   BitMap* _cm_card_bm;
2792   uint _max_worker_id;
2793   uint _active_workers;
2794   HeapRegionClaimer _hrclaimer;
2795 
2796 public:
2797   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2798                            ConcurrentMark* cm,
2799                            BitMap* cm_card_bm,
2800                            uint max_worker_id,
2801                            uint n_workers) :
2802       AbstractGangTask("Count Aggregation"),
2803       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2804       _max_worker_id(max_worker_id),
2805       _active_workers(n_workers),
2806       _hrclaimer(_active_workers) {
2807   }
2808 
2809   void work(uint worker_id) {
2810     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2811 
2812     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2813   }
2814 };
2815 
2816 
2817 void ConcurrentMark::aggregate_count_data() {
2818   uint n_workers = _g1h->workers()->active_workers();
2819 
2820   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2821                                            _max_worker_id, n_workers);
2822 
2823   _g1h->workers()->run_task(&g1_par_agg_task);
2824 }
2825 
2826 // Clear the per-worker arrays used to store the per-region counting data
2827 void ConcurrentMark::clear_all_count_data() {
2828   // Clear the global card bitmap - it will be filled during
2829   // liveness count aggregation (during remark) and the
2830   // final counting task.
2831   _card_bm.clear();
2832 
2833   // Clear the global region bitmap - it will be filled as part
2834   // of the final counting task.
2835   _region_bm.clear();
2836 
2837   uint max_regions = _g1h->max_regions();
2838   assert(_max_worker_id > 0, "uninitialized");
2839 
2840   for (uint i = 0; i < _max_worker_id; i += 1) {
2841     BitMap* task_card_bm = count_card_bitmap_for(i);
2842     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2843 
2844     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2845     assert(marked_bytes_array != NULL, "uninitialized");
2846 
2847     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2848     task_card_bm->clear();
2849   }
2850 }
2851 
2852 void ConcurrentMark::print_stats() {
2853   if (verbose_stats()) {
2854     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2855     for (size_t i = 0; i < _active_tasks; ++i) {
2856       _tasks[i]->print_stats();
2857       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2858     }
2859   }
2860 }
2861 
2862 // abandon current marking iteration due to a Full GC
2863 void ConcurrentMark::abort() {
2864   if (!cmThread()->during_cycle() || _has_aborted) {
2865     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2866     return;
2867   }
2868 
2869   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2870   // concurrent bitmap clearing.
2871   _nextMarkBitMap->clearAll();
2872 
2873   // Note we cannot clear the previous marking bitmap here
2874   // since VerifyDuringGC verifies the objects marked during
2875   // a full GC against the previous bitmap.
2876 
2877   // Clear the liveness counting data
2878   clear_all_count_data();
2879   // Empty mark stack
2880   reset_marking_state();
2881   for (uint i = 0; i < _max_worker_id; ++i) {
2882     _tasks[i]->clear_region_fields();
2883   }
2884   _first_overflow_barrier_sync.abort();
2885   _second_overflow_barrier_sync.abort();
2886   _aborted_gc_id = _g1h->gc_tracer_cm()->gc_id();
2887   assert(!_aborted_gc_id.is_undefined(), "ConcurrentMark::abort() executed more than once?");
2888   _has_aborted = true;
2889 
2890   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2891   satb_mq_set.abandon_partial_marking();
2892   // This can be called either during or outside marking, we'll read
2893   // the expected_active value from the SATB queue set.
2894   satb_mq_set.set_active_all_threads(
2895                                  false, /* new active value */
2896                                  satb_mq_set.is_active() /* expected_active */);
2897 
2898   _g1h->trace_heap_after_concurrent_cycle();
2899   _g1h->register_concurrent_cycle_end();
2900 }
2901 
2902 const GCId& ConcurrentMark::concurrent_gc_id() {
2903   if (has_aborted()) {
2904     return _aborted_gc_id;
2905   }
2906   return _g1h->gc_tracer_cm()->gc_id();
2907 }
2908 
2909 static void print_ms_time_info(const char* prefix, const char* name,
2910                                NumberSeq& ns) {
2911   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2912                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2913   if (ns.num() > 0) {
2914     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2915                            prefix, ns.sd(), ns.maximum());
2916   }
2917 }
2918 
2919 void ConcurrentMark::print_summary_info() {
2920   gclog_or_tty->print_cr(" Concurrent marking:");
2921   print_ms_time_info("  ", "init marks", _init_times);
2922   print_ms_time_info("  ", "remarks", _remark_times);
2923   {
2924     print_ms_time_info("     ", "final marks", _remark_mark_times);
2925     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2926 
2927   }
2928   print_ms_time_info("  ", "cleanups", _cleanup_times);
2929   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2930                          _total_counting_time,
2931                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2932                           (double)_cleanup_times.num()
2933                          : 0.0));
2934   if (G1ScrubRemSets) {
2935     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2936                            _total_rs_scrub_time,
2937                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2938                             (double)_cleanup_times.num()
2939                            : 0.0));
2940   }
2941   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2942                          (_init_times.sum() + _remark_times.sum() +
2943                           _cleanup_times.sum())/1000.0);
2944   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2945                 "(%8.2f s marking).",
2946                 cmThread()->vtime_accum(),
2947                 cmThread()->vtime_mark_accum());
2948 }
2949 
2950 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2951   _parallel_workers->print_worker_threads_on(st);
2952 }
2953 
2954 void ConcurrentMark::print_on_error(outputStream* st) const {
2955   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2956       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2957   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2958   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2959 }
2960 
2961 // We take a break if someone is trying to stop the world.
2962 bool ConcurrentMark::do_yield_check(uint worker_id) {
2963   if (SuspendibleThreadSet::should_yield()) {
2964     if (worker_id == 0) {
2965       _g1h->g1_policy()->record_concurrent_pause();
2966     }
2967     SuspendibleThreadSet::yield();
2968     return true;
2969   } else {
2970     return false;
2971   }
2972 }
2973 
2974 #ifndef PRODUCT
2975 // for debugging purposes
2976 void ConcurrentMark::print_finger() {
2977   gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
2978                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
2979   for (uint i = 0; i < _max_worker_id; ++i) {
2980     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
2981   }
2982   gclog_or_tty->cr();
2983 }
2984 #endif
2985 
2986 // Closure for iteration over bitmaps
2987 class CMBitMapClosure : public BitMapClosure {
2988 private:
2989   // the bitmap that is being iterated over
2990   CMBitMap*                   _nextMarkBitMap;
2991   ConcurrentMark*             _cm;
2992   CMTask*                     _task;
2993 
2994 public:
2995   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
2996     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2997 
2998   bool do_bit(size_t offset) {
2999     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3000     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3001     assert( addr < _cm->finger(), "invariant");
3002 
3003     statsOnly( _task->increase_objs_found_on_bitmap() );
3004     assert(addr >= _task->finger(), "invariant");
3005 
3006     // We move that task's local finger along.
3007     _task->move_finger_to(addr);
3008 
3009     _task->scan_object(oop(addr));
3010     // we only partially drain the local queue and global stack
3011     _task->drain_local_queue(true);
3012     _task->drain_global_stack(true);
3013 
3014     // if the has_aborted flag has been raised, we need to bail out of
3015     // the iteration
3016     return !_task->has_aborted();
3017   }
3018 };
3019 
3020 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3021                                ConcurrentMark* cm,
3022                                CMTask* task)
3023   : _g1h(g1h), _cm(cm), _task(task) {
3024   assert(_ref_processor == NULL, "should be initialized to NULL");
3025 
3026   if (G1UseConcMarkReferenceProcessing) {
3027     _ref_processor = g1h->ref_processor_cm();
3028     assert(_ref_processor != NULL, "should not be NULL");
3029   }
3030 }
3031 
3032 void CMTask::setup_for_region(HeapRegion* hr) {
3033   assert(hr != NULL,
3034         "claim_region() should have filtered out NULL regions");
3035   assert(!hr->is_continues_humongous(),
3036         "claim_region() should have filtered out continues humongous regions");
3037 
3038   if (_cm->verbose_low()) {
3039     gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3040                            _worker_id, p2i(hr));
3041   }
3042 
3043   _curr_region  = hr;
3044   _finger       = hr->bottom();
3045   update_region_limit();
3046 }
3047 
3048 void CMTask::update_region_limit() {
3049   HeapRegion* hr            = _curr_region;
3050   HeapWord* bottom          = hr->bottom();
3051   HeapWord* limit           = hr->next_top_at_mark_start();
3052 
3053   if (limit == bottom) {
3054     if (_cm->verbose_low()) {
3055       gclog_or_tty->print_cr("[%u] found an empty region "
3056                              "[" PTR_FORMAT ", " PTR_FORMAT ")",
3057                              _worker_id, p2i(bottom), p2i(limit));
3058     }
3059     // The region was collected underneath our feet.
3060     // We set the finger to bottom to ensure that the bitmap
3061     // iteration that will follow this will not do anything.
3062     // (this is not a condition that holds when we set the region up,
3063     // as the region is not supposed to be empty in the first place)
3064     _finger = bottom;
3065   } else if (limit >= _region_limit) {
3066     assert(limit >= _finger, "peace of mind");
3067   } else {
3068     assert(limit < _region_limit, "only way to get here");
3069     // This can happen under some pretty unusual circumstances.  An
3070     // evacuation pause empties the region underneath our feet (NTAMS
3071     // at bottom). We then do some allocation in the region (NTAMS
3072     // stays at bottom), followed by the region being used as a GC
3073     // alloc region (NTAMS will move to top() and the objects
3074     // originally below it will be grayed). All objects now marked in
3075     // the region are explicitly grayed, if below the global finger,
3076     // and we do not need in fact to scan anything else. So, we simply
3077     // set _finger to be limit to ensure that the bitmap iteration
3078     // doesn't do anything.
3079     _finger = limit;
3080   }
3081 
3082   _region_limit = limit;
3083 }
3084 
3085 void CMTask::giveup_current_region() {
3086   assert(_curr_region != NULL, "invariant");
3087   if (_cm->verbose_low()) {
3088     gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3089                            _worker_id, p2i(_curr_region));
3090   }
3091   clear_region_fields();
3092 }
3093 
3094 void CMTask::clear_region_fields() {
3095   // Values for these three fields that indicate that we're not
3096   // holding on to a region.
3097   _curr_region   = NULL;
3098   _finger        = NULL;
3099   _region_limit  = NULL;
3100 }
3101 
3102 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3103   if (cm_oop_closure == NULL) {
3104     assert(_cm_oop_closure != NULL, "invariant");
3105   } else {
3106     assert(_cm_oop_closure == NULL, "invariant");
3107   }
3108   _cm_oop_closure = cm_oop_closure;
3109 }
3110 
3111 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3112   guarantee(nextMarkBitMap != NULL, "invariant");
3113 
3114   if (_cm->verbose_low()) {
3115     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3116   }
3117 
3118   _nextMarkBitMap                = nextMarkBitMap;
3119   clear_region_fields();
3120 
3121   _calls                         = 0;
3122   _elapsed_time_ms               = 0.0;
3123   _termination_time_ms           = 0.0;
3124   _termination_start_time_ms     = 0.0;
3125 
3126 #if _MARKING_STATS_
3127   _aborted                       = 0;
3128   _aborted_overflow              = 0;
3129   _aborted_cm_aborted            = 0;
3130   _aborted_yield                 = 0;
3131   _aborted_timed_out             = 0;
3132   _aborted_satb                  = 0;
3133   _aborted_termination           = 0;
3134   _steal_attempts                = 0;
3135   _steals                        = 0;
3136   _local_pushes                  = 0;
3137   _local_pops                    = 0;
3138   _local_max_size                = 0;
3139   _objs_scanned                  = 0;
3140   _global_pushes                 = 0;
3141   _global_pops                   = 0;
3142   _global_max_size               = 0;
3143   _global_transfers_to           = 0;
3144   _global_transfers_from         = 0;
3145   _regions_claimed               = 0;
3146   _objs_found_on_bitmap          = 0;
3147   _satb_buffers_processed        = 0;
3148 #endif // _MARKING_STATS_
3149 }
3150 
3151 bool CMTask::should_exit_termination() {
3152   regular_clock_call();
3153   // This is called when we are in the termination protocol. We should
3154   // quit if, for some reason, this task wants to abort or the global
3155   // stack is not empty (this means that we can get work from it).
3156   return !_cm->mark_stack_empty() || has_aborted();
3157 }
3158 
3159 void CMTask::reached_limit() {
3160   assert(_words_scanned >= _words_scanned_limit ||
3161          _refs_reached >= _refs_reached_limit ,
3162          "shouldn't have been called otherwise");
3163   regular_clock_call();
3164 }
3165 
3166 void CMTask::regular_clock_call() {
3167   if (has_aborted()) return;
3168 
3169   // First, we need to recalculate the words scanned and refs reached
3170   // limits for the next clock call.
3171   recalculate_limits();
3172 
3173   // During the regular clock call we do the following
3174 
3175   // (1) If an overflow has been flagged, then we abort.
3176   if (_cm->has_overflown()) {
3177     set_has_aborted();
3178     return;
3179   }
3180 
3181   // If we are not concurrent (i.e. we're doing remark) we don't need
3182   // to check anything else. The other steps are only needed during
3183   // the concurrent marking phase.
3184   if (!concurrent()) return;
3185 
3186   // (2) If marking has been aborted for Full GC, then we also abort.
3187   if (_cm->has_aborted()) {
3188     set_has_aborted();
3189     statsOnly( ++_aborted_cm_aborted );
3190     return;
3191   }
3192 
3193   double curr_time_ms = os::elapsedVTime() * 1000.0;
3194 
3195   // (3) If marking stats are enabled, then we update the step history.
3196 #if _MARKING_STATS_
3197   if (_words_scanned >= _words_scanned_limit) {
3198     ++_clock_due_to_scanning;
3199   }
3200   if (_refs_reached >= _refs_reached_limit) {
3201     ++_clock_due_to_marking;
3202   }
3203 
3204   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3205   _interval_start_time_ms = curr_time_ms;
3206   _all_clock_intervals_ms.add(last_interval_ms);
3207 
3208   if (_cm->verbose_medium()) {
3209       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3210                         "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s",
3211                         _worker_id, last_interval_ms,
3212                         _words_scanned,
3213                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3214                         _refs_reached,
3215                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3216   }
3217 #endif // _MARKING_STATS_
3218 
3219   // (4) We check whether we should yield. If we have to, then we abort.
3220   if (SuspendibleThreadSet::should_yield()) {
3221     // We should yield. To do this we abort the task. The caller is
3222     // responsible for yielding.
3223     set_has_aborted();
3224     statsOnly( ++_aborted_yield );
3225     return;
3226   }
3227 
3228   // (5) We check whether we've reached our time quota. If we have,
3229   // then we abort.
3230   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3231   if (elapsed_time_ms > _time_target_ms) {
3232     set_has_aborted();
3233     _has_timed_out = true;
3234     statsOnly( ++_aborted_timed_out );
3235     return;
3236   }
3237 
3238   // (6) Finally, we check whether there are enough completed STAB
3239   // buffers available for processing. If there are, we abort.
3240   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3241   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3242     if (_cm->verbose_low()) {
3243       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3244                              _worker_id);
3245     }
3246     // we do need to process SATB buffers, we'll abort and restart
3247     // the marking task to do so
3248     set_has_aborted();
3249     statsOnly( ++_aborted_satb );
3250     return;
3251   }
3252 }
3253 
3254 void CMTask::recalculate_limits() {
3255   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3256   _words_scanned_limit      = _real_words_scanned_limit;
3257 
3258   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3259   _refs_reached_limit       = _real_refs_reached_limit;
3260 }
3261 
3262 void CMTask::decrease_limits() {
3263   // This is called when we believe that we're going to do an infrequent
3264   // operation which will increase the per byte scanned cost (i.e. move
3265   // entries to/from the global stack). It basically tries to decrease the
3266   // scanning limit so that the clock is called earlier.
3267 
3268   if (_cm->verbose_medium()) {
3269     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3270   }
3271 
3272   _words_scanned_limit = _real_words_scanned_limit -
3273     3 * words_scanned_period / 4;
3274   _refs_reached_limit  = _real_refs_reached_limit -
3275     3 * refs_reached_period / 4;
3276 }
3277 
3278 void CMTask::move_entries_to_global_stack() {
3279   // local array where we'll store the entries that will be popped
3280   // from the local queue
3281   oop buffer[global_stack_transfer_size];
3282 
3283   int n = 0;
3284   oop obj;
3285   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3286     buffer[n] = obj;
3287     ++n;
3288   }
3289 
3290   if (n > 0) {
3291     // we popped at least one entry from the local queue
3292 
3293     statsOnly( ++_global_transfers_to; _local_pops += n );
3294 
3295     if (!_cm->mark_stack_push(buffer, n)) {
3296       if (_cm->verbose_low()) {
3297         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3298                                _worker_id);
3299       }
3300       set_has_aborted();
3301     } else {
3302       // the transfer was successful
3303 
3304       if (_cm->verbose_medium()) {
3305         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3306                                _worker_id, n);
3307       }
3308       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3309                  if (tmp_size > _global_max_size) {
3310                    _global_max_size = tmp_size;
3311                  }
3312                  _global_pushes += n );
3313     }
3314   }
3315 
3316   // this operation was quite expensive, so decrease the limits
3317   decrease_limits();
3318 }
3319 
3320 void CMTask::get_entries_from_global_stack() {
3321   // local array where we'll store the entries that will be popped
3322   // from the global stack.
3323   oop buffer[global_stack_transfer_size];
3324   int n;
3325   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3326   assert(n <= global_stack_transfer_size,
3327          "we should not pop more than the given limit");
3328   if (n > 0) {
3329     // yes, we did actually pop at least one entry
3330 
3331     statsOnly( ++_global_transfers_from; _global_pops += n );
3332     if (_cm->verbose_medium()) {
3333       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3334                              _worker_id, n);
3335     }
3336     for (int i = 0; i < n; ++i) {
3337       bool success = _task_queue->push(buffer[i]);
3338       // We only call this when the local queue is empty or under a
3339       // given target limit. So, we do not expect this push to fail.
3340       assert(success, "invariant");
3341     }
3342 
3343     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3344                if (tmp_size > _local_max_size) {
3345                  _local_max_size = tmp_size;
3346                }
3347                _local_pushes += n );
3348   }
3349 
3350   // this operation was quite expensive, so decrease the limits
3351   decrease_limits();
3352 }
3353 
3354 void CMTask::drain_local_queue(bool partially) {
3355   if (has_aborted()) return;
3356 
3357   // Decide what the target size is, depending whether we're going to
3358   // drain it partially (so that other tasks can steal if they run out
3359   // of things to do) or totally (at the very end).
3360   size_t target_size;
3361   if (partially) {
3362     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3363   } else {
3364     target_size = 0;
3365   }
3366 
3367   if (_task_queue->size() > target_size) {
3368     if (_cm->verbose_high()) {
3369       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3370                              _worker_id, target_size);
3371     }
3372 
3373     oop obj;
3374     bool ret = _task_queue->pop_local(obj);
3375     while (ret) {
3376       statsOnly( ++_local_pops );
3377 
3378       if (_cm->verbose_high()) {
3379         gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3380                                p2i((void*) obj));
3381       }
3382 
3383       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3384       assert(!_g1h->is_on_master_free_list(
3385                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3386 
3387       scan_object(obj);
3388 
3389       if (_task_queue->size() <= target_size || has_aborted()) {
3390         ret = false;
3391       } else {
3392         ret = _task_queue->pop_local(obj);
3393       }
3394     }
3395 
3396     if (_cm->verbose_high()) {
3397       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3398                              _worker_id, _task_queue->size());
3399     }
3400   }
3401 }
3402 
3403 void CMTask::drain_global_stack(bool partially) {
3404   if (has_aborted()) return;
3405 
3406   // We have a policy to drain the local queue before we attempt to
3407   // drain the global stack.
3408   assert(partially || _task_queue->size() == 0, "invariant");
3409 
3410   // Decide what the target size is, depending whether we're going to
3411   // drain it partially (so that other tasks can steal if they run out
3412   // of things to do) or totally (at the very end).  Notice that,
3413   // because we move entries from the global stack in chunks or
3414   // because another task might be doing the same, we might in fact
3415   // drop below the target. But, this is not a problem.
3416   size_t target_size;
3417   if (partially) {
3418     target_size = _cm->partial_mark_stack_size_target();
3419   } else {
3420     target_size = 0;
3421   }
3422 
3423   if (_cm->mark_stack_size() > target_size) {
3424     if (_cm->verbose_low()) {
3425       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3426                              _worker_id, target_size);
3427     }
3428 
3429     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3430       get_entries_from_global_stack();
3431       drain_local_queue(partially);
3432     }
3433 
3434     if (_cm->verbose_low()) {
3435       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3436                              _worker_id, _cm->mark_stack_size());
3437     }
3438   }
3439 }
3440 
3441 // SATB Queue has several assumptions on whether to call the par or
3442 // non-par versions of the methods. this is why some of the code is
3443 // replicated. We should really get rid of the single-threaded version
3444 // of the code to simplify things.
3445 void CMTask::drain_satb_buffers() {
3446   if (has_aborted()) return;
3447 
3448   // We set this so that the regular clock knows that we're in the
3449   // middle of draining buffers and doesn't set the abort flag when it
3450   // notices that SATB buffers are available for draining. It'd be
3451   // very counter productive if it did that. :-)
3452   _draining_satb_buffers = true;
3453 
3454   CMSATBBufferClosure satb_cl(this, _g1h);
3455   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3456 
3457   // This keeps claiming and applying the closure to completed buffers
3458   // until we run out of buffers or we need to abort.
3459   while (!has_aborted() &&
3460          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3461     if (_cm->verbose_medium()) {
3462       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3463     }
3464     statsOnly( ++_satb_buffers_processed );
3465     regular_clock_call();
3466   }
3467 
3468   _draining_satb_buffers = false;
3469 
3470   assert(has_aborted() ||
3471          concurrent() ||
3472          satb_mq_set.completed_buffers_num() == 0, "invariant");
3473 
3474   // again, this was a potentially expensive operation, decrease the
3475   // limits to get the regular clock call early
3476   decrease_limits();
3477 }
3478 
3479 void CMTask::print_stats() {
3480   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3481                          _worker_id, _calls);
3482   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3483                          _elapsed_time_ms, _termination_time_ms);
3484   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3485                          _step_times_ms.num(), _step_times_ms.avg(),
3486                          _step_times_ms.sd());
3487   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3488                          _step_times_ms.maximum(), _step_times_ms.sum());
3489 
3490 #if _MARKING_STATS_
3491   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3492                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3493                          _all_clock_intervals_ms.sd());
3494   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3495                          _all_clock_intervals_ms.maximum(),
3496                          _all_clock_intervals_ms.sum());
3497   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3498                          _clock_due_to_scanning, _clock_due_to_marking);
3499   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3500                          _objs_scanned, _objs_found_on_bitmap);
3501   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3502                          _local_pushes, _local_pops, _local_max_size);
3503   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3504                          _global_pushes, _global_pops, _global_max_size);
3505   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3506                          _global_transfers_to,_global_transfers_from);
3507   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3508   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3509   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3510                          _steal_attempts, _steals);
3511   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3512   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3513                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3514   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3515                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3516 #endif // _MARKING_STATS_
3517 }
3518 
3519 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3520   return _task_queues->steal(worker_id, hash_seed, obj);
3521 }
3522 
3523 /*****************************************************************************
3524 
3525     The do_marking_step(time_target_ms, ...) method is the building
3526     block of the parallel marking framework. It can be called in parallel
3527     with other invocations of do_marking_step() on different tasks
3528     (but only one per task, obviously) and concurrently with the
3529     mutator threads, or during remark, hence it eliminates the need
3530     for two versions of the code. When called during remark, it will
3531     pick up from where the task left off during the concurrent marking
3532     phase. Interestingly, tasks are also claimable during evacuation
3533     pauses too, since do_marking_step() ensures that it aborts before
3534     it needs to yield.
3535 
3536     The data structures that it uses to do marking work are the
3537     following:
3538 
3539       (1) Marking Bitmap. If there are gray objects that appear only
3540       on the bitmap (this happens either when dealing with an overflow
3541       or when the initial marking phase has simply marked the roots
3542       and didn't push them on the stack), then tasks claim heap
3543       regions whose bitmap they then scan to find gray objects. A
3544       global finger indicates where the end of the last claimed region
3545       is. A local finger indicates how far into the region a task has
3546       scanned. The two fingers are used to determine how to gray an
3547       object (i.e. whether simply marking it is OK, as it will be
3548       visited by a task in the future, or whether it needs to be also
3549       pushed on a stack).
3550 
3551       (2) Local Queue. The local queue of the task which is accessed
3552       reasonably efficiently by the task. Other tasks can steal from
3553       it when they run out of work. Throughout the marking phase, a
3554       task attempts to keep its local queue short but not totally
3555       empty, so that entries are available for stealing by other
3556       tasks. Only when there is no more work, a task will totally
3557       drain its local queue.
3558 
3559       (3) Global Mark Stack. This handles local queue overflow. During
3560       marking only sets of entries are moved between it and the local
3561       queues, as access to it requires a mutex and more fine-grain
3562       interaction with it which might cause contention. If it
3563       overflows, then the marking phase should restart and iterate
3564       over the bitmap to identify gray objects. Throughout the marking
3565       phase, tasks attempt to keep the global mark stack at a small
3566       length but not totally empty, so that entries are available for
3567       popping by other tasks. Only when there is no more work, tasks
3568       will totally drain the global mark stack.
3569 
3570       (4) SATB Buffer Queue. This is where completed SATB buffers are
3571       made available. Buffers are regularly removed from this queue
3572       and scanned for roots, so that the queue doesn't get too
3573       long. During remark, all completed buffers are processed, as
3574       well as the filled in parts of any uncompleted buffers.
3575 
3576     The do_marking_step() method tries to abort when the time target
3577     has been reached. There are a few other cases when the
3578     do_marking_step() method also aborts:
3579 
3580       (1) When the marking phase has been aborted (after a Full GC).
3581 
3582       (2) When a global overflow (on the global stack) has been
3583       triggered. Before the task aborts, it will actually sync up with
3584       the other tasks to ensure that all the marking data structures
3585       (local queues, stacks, fingers etc.)  are re-initialized so that
3586       when do_marking_step() completes, the marking phase can
3587       immediately restart.
3588 
3589       (3) When enough completed SATB buffers are available. The
3590       do_marking_step() method only tries to drain SATB buffers right
3591       at the beginning. So, if enough buffers are available, the
3592       marking step aborts and the SATB buffers are processed at
3593       the beginning of the next invocation.
3594 
3595       (4) To yield. when we have to yield then we abort and yield
3596       right at the end of do_marking_step(). This saves us from a lot
3597       of hassle as, by yielding we might allow a Full GC. If this
3598       happens then objects will be compacted underneath our feet, the
3599       heap might shrink, etc. We save checking for this by just
3600       aborting and doing the yield right at the end.
3601 
3602     From the above it follows that the do_marking_step() method should
3603     be called in a loop (or, otherwise, regularly) until it completes.
3604 
3605     If a marking step completes without its has_aborted() flag being
3606     true, it means it has completed the current marking phase (and
3607     also all other marking tasks have done so and have all synced up).
3608 
3609     A method called regular_clock_call() is invoked "regularly" (in
3610     sub ms intervals) throughout marking. It is this clock method that
3611     checks all the abort conditions which were mentioned above and
3612     decides when the task should abort. A work-based scheme is used to
3613     trigger this clock method: when the number of object words the
3614     marking phase has scanned or the number of references the marking
3615     phase has visited reach a given limit. Additional invocations to
3616     the method clock have been planted in a few other strategic places
3617     too. The initial reason for the clock method was to avoid calling
3618     vtime too regularly, as it is quite expensive. So, once it was in
3619     place, it was natural to piggy-back all the other conditions on it
3620     too and not constantly check them throughout the code.
3621 
3622     If do_termination is true then do_marking_step will enter its
3623     termination protocol.
3624 
3625     The value of is_serial must be true when do_marking_step is being
3626     called serially (i.e. by the VMThread) and do_marking_step should
3627     skip any synchronization in the termination and overflow code.
3628     Examples include the serial remark code and the serial reference
3629     processing closures.
3630 
3631     The value of is_serial must be false when do_marking_step is
3632     being called by any of the worker threads in a work gang.
3633     Examples include the concurrent marking code (CMMarkingTask),
3634     the MT remark code, and the MT reference processing closures.
3635 
3636  *****************************************************************************/
3637 
3638 void CMTask::do_marking_step(double time_target_ms,
3639                              bool do_termination,
3640                              bool is_serial) {
3641   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3642   assert(concurrent() == _cm->concurrent(), "they should be the same");
3643 
3644   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3645   assert(_task_queues != NULL, "invariant");
3646   assert(_task_queue != NULL, "invariant");
3647   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3648 
3649   assert(!_claimed,
3650          "only one thread should claim this task at any one time");
3651 
3652   // OK, this doesn't safeguard again all possible scenarios, as it is
3653   // possible for two threads to set the _claimed flag at the same
3654   // time. But it is only for debugging purposes anyway and it will
3655   // catch most problems.
3656   _claimed = true;
3657 
3658   _start_time_ms = os::elapsedVTime() * 1000.0;
3659   statsOnly( _interval_start_time_ms = _start_time_ms );
3660 
3661   // If do_stealing is true then do_marking_step will attempt to
3662   // steal work from the other CMTasks. It only makes sense to
3663   // enable stealing when the termination protocol is enabled
3664   // and do_marking_step() is not being called serially.
3665   bool do_stealing = do_termination && !is_serial;
3666 
3667   double diff_prediction_ms =
3668     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3669   _time_target_ms = time_target_ms - diff_prediction_ms;
3670 
3671   // set up the variables that are used in the work-based scheme to
3672   // call the regular clock method
3673   _words_scanned = 0;
3674   _refs_reached  = 0;
3675   recalculate_limits();
3676 
3677   // clear all flags
3678   clear_has_aborted();
3679   _has_timed_out = false;
3680   _draining_satb_buffers = false;
3681 
3682   ++_calls;
3683 
3684   if (_cm->verbose_low()) {
3685     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3686                            "target = %1.2lfms >>>>>>>>>>",
3687                            _worker_id, _calls, _time_target_ms);
3688   }
3689 
3690   // Set up the bitmap and oop closures. Anything that uses them is
3691   // eventually called from this method, so it is OK to allocate these
3692   // statically.
3693   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3694   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3695   set_cm_oop_closure(&cm_oop_closure);
3696 
3697   if (_cm->has_overflown()) {
3698     // This can happen if the mark stack overflows during a GC pause
3699     // and this task, after a yield point, restarts. We have to abort
3700     // as we need to get into the overflow protocol which happens
3701     // right at the end of this task.
3702     set_has_aborted();
3703   }
3704 
3705   // First drain any available SATB buffers. After this, we will not
3706   // look at SATB buffers before the next invocation of this method.
3707   // If enough completed SATB buffers are queued up, the regular clock
3708   // will abort this task so that it restarts.
3709   drain_satb_buffers();
3710   // ...then partially drain the local queue and the global stack
3711   drain_local_queue(true);
3712   drain_global_stack(true);
3713 
3714   do {
3715     if (!has_aborted() && _curr_region != NULL) {
3716       // This means that we're already holding on to a region.
3717       assert(_finger != NULL, "if region is not NULL, then the finger "
3718              "should not be NULL either");
3719 
3720       // We might have restarted this task after an evacuation pause
3721       // which might have evacuated the region we're holding on to
3722       // underneath our feet. Let's read its limit again to make sure
3723       // that we do not iterate over a region of the heap that
3724       // contains garbage (update_region_limit() will also move
3725       // _finger to the start of the region if it is found empty).
3726       update_region_limit();
3727       // We will start from _finger not from the start of the region,
3728       // as we might be restarting this task after aborting half-way
3729       // through scanning this region. In this case, _finger points to
3730       // the address where we last found a marked object. If this is a
3731       // fresh region, _finger points to start().
3732       MemRegion mr = MemRegion(_finger, _region_limit);
3733 
3734       if (_cm->verbose_low()) {
3735         gclog_or_tty->print_cr("[%u] we're scanning part "
3736                                "[" PTR_FORMAT ", " PTR_FORMAT ") "
3737                                "of region " HR_FORMAT,
3738                                _worker_id, p2i(_finger), p2i(_region_limit),
3739                                HR_FORMAT_PARAMS(_curr_region));
3740       }
3741 
3742       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3743              "humongous regions should go around loop once only");
3744 
3745       // Some special cases:
3746       // If the memory region is empty, we can just give up the region.
3747       // If the current region is humongous then we only need to check
3748       // the bitmap for the bit associated with the start of the object,
3749       // scan the object if it's live, and give up the region.
3750       // Otherwise, let's iterate over the bitmap of the part of the region
3751       // that is left.
3752       // If the iteration is successful, give up the region.
3753       if (mr.is_empty()) {
3754         giveup_current_region();
3755         regular_clock_call();
3756       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3757         if (_nextMarkBitMap->isMarked(mr.start())) {
3758           // The object is marked - apply the closure
3759           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3760           bitmap_closure.do_bit(offset);
3761         }
3762         // Even if this task aborted while scanning the humongous object
3763         // we can (and should) give up the current region.
3764         giveup_current_region();
3765         regular_clock_call();
3766       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3767         giveup_current_region();
3768         regular_clock_call();
3769       } else {
3770         assert(has_aborted(), "currently the only way to do so");
3771         // The only way to abort the bitmap iteration is to return
3772         // false from the do_bit() method. However, inside the
3773         // do_bit() method we move the _finger to point to the
3774         // object currently being looked at. So, if we bail out, we
3775         // have definitely set _finger to something non-null.
3776         assert(_finger != NULL, "invariant");
3777 
3778         // Region iteration was actually aborted. So now _finger
3779         // points to the address of the object we last scanned. If we
3780         // leave it there, when we restart this task, we will rescan
3781         // the object. It is easy to avoid this. We move the finger by
3782         // enough to point to the next possible object header (the
3783         // bitmap knows by how much we need to move it as it knows its
3784         // granularity).
3785         assert(_finger < _region_limit, "invariant");
3786         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3787         // Check if bitmap iteration was aborted while scanning the last object
3788         if (new_finger >= _region_limit) {
3789           giveup_current_region();
3790         } else {
3791           move_finger_to(new_finger);
3792         }
3793       }
3794     }
3795     // At this point we have either completed iterating over the
3796     // region we were holding on to, or we have aborted.
3797 
3798     // We then partially drain the local queue and the global stack.
3799     // (Do we really need this?)
3800     drain_local_queue(true);
3801     drain_global_stack(true);
3802 
3803     // Read the note on the claim_region() method on why it might
3804     // return NULL with potentially more regions available for
3805     // claiming and why we have to check out_of_regions() to determine
3806     // whether we're done or not.
3807     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3808       // We are going to try to claim a new region. We should have
3809       // given up on the previous one.
3810       // Separated the asserts so that we know which one fires.
3811       assert(_curr_region  == NULL, "invariant");
3812       assert(_finger       == NULL, "invariant");
3813       assert(_region_limit == NULL, "invariant");
3814       if (_cm->verbose_low()) {
3815         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3816       }
3817       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3818       if (claimed_region != NULL) {
3819         // Yes, we managed to claim one
3820         statsOnly( ++_regions_claimed );
3821 
3822         if (_cm->verbose_low()) {
3823           gclog_or_tty->print_cr("[%u] we successfully claimed "
3824                                  "region " PTR_FORMAT,
3825                                  _worker_id, p2i(claimed_region));
3826         }
3827 
3828         setup_for_region(claimed_region);
3829         assert(_curr_region == claimed_region, "invariant");
3830       }
3831       // It is important to call the regular clock here. It might take
3832       // a while to claim a region if, for example, we hit a large
3833       // block of empty regions. So we need to call the regular clock
3834       // method once round the loop to make sure it's called
3835       // frequently enough.
3836       regular_clock_call();
3837     }
3838 
3839     if (!has_aborted() && _curr_region == NULL) {
3840       assert(_cm->out_of_regions(),
3841              "at this point we should be out of regions");
3842     }
3843   } while ( _curr_region != NULL && !has_aborted());
3844 
3845   if (!has_aborted()) {
3846     // We cannot check whether the global stack is empty, since other
3847     // tasks might be pushing objects to it concurrently.
3848     assert(_cm->out_of_regions(),
3849            "at this point we should be out of regions");
3850 
3851     if (_cm->verbose_low()) {
3852       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
3853     }
3854 
3855     // Try to reduce the number of available SATB buffers so that
3856     // remark has less work to do.
3857     drain_satb_buffers();
3858   }
3859 
3860   // Since we've done everything else, we can now totally drain the
3861   // local queue and global stack.
3862   drain_local_queue(false);
3863   drain_global_stack(false);
3864 
3865   // Attempt at work stealing from other task's queues.
3866   if (do_stealing && !has_aborted()) {
3867     // We have not aborted. This means that we have finished all that
3868     // we could. Let's try to do some stealing...
3869 
3870     // We cannot check whether the global stack is empty, since other
3871     // tasks might be pushing objects to it concurrently.
3872     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3873            "only way to reach here");
3874 
3875     if (_cm->verbose_low()) {
3876       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
3877     }
3878 
3879     while (!has_aborted()) {
3880       oop obj;
3881       statsOnly( ++_steal_attempts );
3882 
3883       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3884         if (_cm->verbose_medium()) {
3885           gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
3886                                  _worker_id, p2i((void*) obj));
3887         }
3888 
3889         statsOnly( ++_steals );
3890 
3891         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3892                "any stolen object should be marked");
3893         scan_object(obj);
3894 
3895         // And since we're towards the end, let's totally drain the
3896         // local queue and global stack.
3897         drain_local_queue(false);
3898         drain_global_stack(false);
3899       } else {
3900         break;
3901       }
3902     }
3903   }
3904 
3905   // If we are about to wrap up and go into termination, check if we
3906   // should raise the overflow flag.
3907   if (do_termination && !has_aborted()) {
3908     if (_cm->force_overflow()->should_force()) {
3909       _cm->set_has_overflown();
3910       regular_clock_call();
3911     }
3912   }
3913 
3914   // We still haven't aborted. Now, let's try to get into the
3915   // termination protocol.
3916   if (do_termination && !has_aborted()) {
3917     // We cannot check whether the global stack is empty, since other
3918     // tasks might be concurrently pushing objects on it.
3919     // Separated the asserts so that we know which one fires.
3920     assert(_cm->out_of_regions(), "only way to reach here");
3921     assert(_task_queue->size() == 0, "only way to reach here");
3922 
3923     if (_cm->verbose_low()) {
3924       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
3925     }
3926 
3927     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3928 
3929     // The CMTask class also extends the TerminatorTerminator class,
3930     // hence its should_exit_termination() method will also decide
3931     // whether to exit the termination protocol or not.
3932     bool finished = (is_serial ||
3933                      _cm->terminator()->offer_termination(this));
3934     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3935     _termination_time_ms +=
3936       termination_end_time_ms - _termination_start_time_ms;
3937 
3938     if (finished) {
3939       // We're all done.
3940 
3941       if (_worker_id == 0) {
3942         // let's allow task 0 to do this
3943         if (concurrent()) {
3944           assert(_cm->concurrent_marking_in_progress(), "invariant");
3945           // we need to set this to false before the next
3946           // safepoint. This way we ensure that the marking phase
3947           // doesn't observe any more heap expansions.
3948           _cm->clear_concurrent_marking_in_progress();
3949         }
3950       }
3951 
3952       // We can now guarantee that the global stack is empty, since
3953       // all other tasks have finished. We separated the guarantees so
3954       // that, if a condition is false, we can immediately find out
3955       // which one.
3956       guarantee(_cm->out_of_regions(), "only way to reach here");
3957       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3958       guarantee(_task_queue->size() == 0, "only way to reach here");
3959       guarantee(!_cm->has_overflown(), "only way to reach here");
3960       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3961 
3962       if (_cm->verbose_low()) {
3963         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
3964       }
3965     } else {
3966       // Apparently there's more work to do. Let's abort this task. It
3967       // will restart it and we can hopefully find more things to do.
3968 
3969       if (_cm->verbose_low()) {
3970         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
3971                                _worker_id);
3972       }
3973 
3974       set_has_aborted();
3975       statsOnly( ++_aborted_termination );
3976     }
3977   }
3978 
3979   // Mainly for debugging purposes to make sure that a pointer to the
3980   // closure which was statically allocated in this frame doesn't
3981   // escape it by accident.
3982   set_cm_oop_closure(NULL);
3983   double end_time_ms = os::elapsedVTime() * 1000.0;
3984   double elapsed_time_ms = end_time_ms - _start_time_ms;
3985   // Update the step history.
3986   _step_times_ms.add(elapsed_time_ms);
3987 
3988   if (has_aborted()) {
3989     // The task was aborted for some reason.
3990 
3991     statsOnly( ++_aborted );
3992 
3993     if (_has_timed_out) {
3994       double diff_ms = elapsed_time_ms - _time_target_ms;
3995       // Keep statistics of how well we did with respect to hitting
3996       // our target only if we actually timed out (if we aborted for
3997       // other reasons, then the results might get skewed).
3998       _marking_step_diffs_ms.add(diff_ms);
3999     }
4000 
4001     if (_cm->has_overflown()) {
4002       // This is the interesting one. We aborted because a global
4003       // overflow was raised. This means we have to restart the
4004       // marking phase and start iterating over regions. However, in
4005       // order to do this we have to make sure that all tasks stop
4006       // what they are doing and re-initialize in a safe manner. We
4007       // will achieve this with the use of two barrier sync points.
4008 
4009       if (_cm->verbose_low()) {
4010         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4011       }
4012 
4013       if (!is_serial) {
4014         // We only need to enter the sync barrier if being called
4015         // from a parallel context
4016         _cm->enter_first_sync_barrier(_worker_id);
4017 
4018         // When we exit this sync barrier we know that all tasks have
4019         // stopped doing marking work. So, it's now safe to
4020         // re-initialize our data structures. At the end of this method,
4021         // task 0 will clear the global data structures.
4022       }
4023 
4024       statsOnly( ++_aborted_overflow );
4025 
4026       // We clear the local state of this task...
4027       clear_region_fields();
4028 
4029       if (!is_serial) {
4030         // ...and enter the second barrier.
4031         _cm->enter_second_sync_barrier(_worker_id);
4032       }
4033       // At this point, if we're during the concurrent phase of
4034       // marking, everything has been re-initialized and we're
4035       // ready to restart.
4036     }
4037 
4038     if (_cm->verbose_low()) {
4039       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4040                              "elapsed = %1.2lfms <<<<<<<<<<",
4041                              _worker_id, _time_target_ms, elapsed_time_ms);
4042       if (_cm->has_aborted()) {
4043         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4044                                _worker_id);
4045       }
4046     }
4047   } else {
4048     if (_cm->verbose_low()) {
4049       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4050                              "elapsed = %1.2lfms <<<<<<<<<<",
4051                              _worker_id, _time_target_ms, elapsed_time_ms);
4052     }
4053   }
4054 
4055   _claimed = false;
4056 }
4057 
4058 CMTask::CMTask(uint worker_id,
4059                ConcurrentMark* cm,
4060                size_t* marked_bytes,
4061                BitMap* card_bm,
4062                CMTaskQueue* task_queue,
4063                CMTaskQueueSet* task_queues)
4064   : _g1h(G1CollectedHeap::heap()),
4065     _worker_id(worker_id), _cm(cm),
4066     _claimed(false),
4067     _nextMarkBitMap(NULL), _hash_seed(17),
4068     _task_queue(task_queue),
4069     _task_queues(task_queues),
4070     _cm_oop_closure(NULL),
4071     _marked_bytes_array(marked_bytes),
4072     _card_bm(card_bm) {
4073   guarantee(task_queue != NULL, "invariant");
4074   guarantee(task_queues != NULL, "invariant");
4075 
4076   statsOnly( _clock_due_to_scanning = 0;
4077              _clock_due_to_marking  = 0 );
4078 
4079   _marking_step_diffs_ms.add(0.5);
4080 }
4081 
4082 // These are formatting macros that are used below to ensure
4083 // consistent formatting. The *_H_* versions are used to format the
4084 // header for a particular value and they should be kept consistent
4085 // with the corresponding macro. Also note that most of the macros add
4086 // the necessary white space (as a prefix) which makes them a bit
4087 // easier to compose.
4088 
4089 // All the output lines are prefixed with this string to be able to
4090 // identify them easily in a large log file.
4091 #define G1PPRL_LINE_PREFIX            "###"
4092 
4093 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4094 #ifdef _LP64
4095 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4096 #else // _LP64
4097 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4098 #endif // _LP64
4099 
4100 // For per-region info
4101 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4102 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4103 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4104 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4105 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4106 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4107 
4108 // For summary info
4109 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4110 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4111 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4112 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4113 
4114 G1PrintRegionLivenessInfoClosure::
4115 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4116   : _out(out),
4117     _total_used_bytes(0), _total_capacity_bytes(0),
4118     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4119     _hum_used_bytes(0), _hum_capacity_bytes(0),
4120     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4121     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4122   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4123   MemRegion g1_reserved = g1h->g1_reserved();
4124   double now = os::elapsedTime();
4125 
4126   // Print the header of the output.
4127   _out->cr();
4128   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4129   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4130                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4131                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4132                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4133                  HeapRegion::GrainBytes);
4134   _out->print_cr(G1PPRL_LINE_PREFIX);
4135   _out->print_cr(G1PPRL_LINE_PREFIX
4136                 G1PPRL_TYPE_H_FORMAT
4137                 G1PPRL_ADDR_BASE_H_FORMAT
4138                 G1PPRL_BYTE_H_FORMAT
4139                 G1PPRL_BYTE_H_FORMAT
4140                 G1PPRL_BYTE_H_FORMAT
4141                 G1PPRL_DOUBLE_H_FORMAT
4142                 G1PPRL_BYTE_H_FORMAT
4143                 G1PPRL_BYTE_H_FORMAT,
4144                 "type", "address-range",
4145                 "used", "prev-live", "next-live", "gc-eff",
4146                 "remset", "code-roots");
4147   _out->print_cr(G1PPRL_LINE_PREFIX
4148                 G1PPRL_TYPE_H_FORMAT
4149                 G1PPRL_ADDR_BASE_H_FORMAT
4150                 G1PPRL_BYTE_H_FORMAT
4151                 G1PPRL_BYTE_H_FORMAT
4152                 G1PPRL_BYTE_H_FORMAT
4153                 G1PPRL_DOUBLE_H_FORMAT
4154                 G1PPRL_BYTE_H_FORMAT
4155                 G1PPRL_BYTE_H_FORMAT,
4156                 "", "",
4157                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4158                 "(bytes)", "(bytes)");
4159 }
4160 
4161 // It takes as a parameter a reference to one of the _hum_* fields, it
4162 // deduces the corresponding value for a region in a humongous region
4163 // series (either the region size, or what's left if the _hum_* field
4164 // is < the region size), and updates the _hum_* field accordingly.
4165 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4166   size_t bytes = 0;
4167   // The > 0 check is to deal with the prev and next live bytes which
4168   // could be 0.
4169   if (*hum_bytes > 0) {
4170     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4171     *hum_bytes -= bytes;
4172   }
4173   return bytes;
4174 }
4175 
4176 // It deduces the values for a region in a humongous region series
4177 // from the _hum_* fields and updates those accordingly. It assumes
4178 // that that _hum_* fields have already been set up from the "starts
4179 // humongous" region and we visit the regions in address order.
4180 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4181                                                      size_t* capacity_bytes,
4182                                                      size_t* prev_live_bytes,
4183                                                      size_t* next_live_bytes) {
4184   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4185   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4186   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4187   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4188   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4189 }
4190 
4191 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4192   const char* type       = r->get_type_str();
4193   HeapWord* bottom       = r->bottom();
4194   HeapWord* end          = r->end();
4195   size_t capacity_bytes  = r->capacity();
4196   size_t used_bytes      = r->used();
4197   size_t prev_live_bytes = r->live_bytes();
4198   size_t next_live_bytes = r->next_live_bytes();
4199   double gc_eff          = r->gc_efficiency();
4200   size_t remset_bytes    = r->rem_set()->mem_size();
4201   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4202 
4203   if (r->is_starts_humongous()) {
4204     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4205            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4206            "they should have been zeroed after the last time we used them");
4207     // Set up the _hum_* fields.
4208     _hum_capacity_bytes  = capacity_bytes;
4209     _hum_used_bytes      = used_bytes;
4210     _hum_prev_live_bytes = prev_live_bytes;
4211     _hum_next_live_bytes = next_live_bytes;
4212     get_hum_bytes(&used_bytes, &capacity_bytes,
4213                   &prev_live_bytes, &next_live_bytes);
4214     end = bottom + HeapRegion::GrainWords;
4215   } else if (r->is_continues_humongous()) {
4216     get_hum_bytes(&used_bytes, &capacity_bytes,
4217                   &prev_live_bytes, &next_live_bytes);
4218     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4219   }
4220 
4221   _total_used_bytes      += used_bytes;
4222   _total_capacity_bytes  += capacity_bytes;
4223   _total_prev_live_bytes += prev_live_bytes;
4224   _total_next_live_bytes += next_live_bytes;
4225   _total_remset_bytes    += remset_bytes;
4226   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4227 
4228   // Print a line for this particular region.
4229   _out->print_cr(G1PPRL_LINE_PREFIX
4230                  G1PPRL_TYPE_FORMAT
4231                  G1PPRL_ADDR_BASE_FORMAT
4232                  G1PPRL_BYTE_FORMAT
4233                  G1PPRL_BYTE_FORMAT
4234                  G1PPRL_BYTE_FORMAT
4235                  G1PPRL_DOUBLE_FORMAT
4236                  G1PPRL_BYTE_FORMAT
4237                  G1PPRL_BYTE_FORMAT,
4238                  type, p2i(bottom), p2i(end),
4239                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4240                  remset_bytes, strong_code_roots_bytes);
4241 
4242   return false;
4243 }
4244 
4245 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4246   // add static memory usages to remembered set sizes
4247   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4248   // Print the footer of the output.
4249   _out->print_cr(G1PPRL_LINE_PREFIX);
4250   _out->print_cr(G1PPRL_LINE_PREFIX
4251                  " SUMMARY"
4252                  G1PPRL_SUM_MB_FORMAT("capacity")
4253                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4254                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4255                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4256                  G1PPRL_SUM_MB_FORMAT("remset")
4257                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4258                  bytes_to_mb(_total_capacity_bytes),
4259                  bytes_to_mb(_total_used_bytes),
4260                  perc(_total_used_bytes, _total_capacity_bytes),
4261                  bytes_to_mb(_total_prev_live_bytes),
4262                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4263                  bytes_to_mb(_total_next_live_bytes),
4264                  perc(_total_next_live_bytes, _total_capacity_bytes),
4265                  bytes_to_mb(_total_remset_bytes),
4266                  bytes_to_mb(_total_strong_code_roots_bytes));
4267   _out->cr();
4268 }