New src/share/vm/gc/g1/g1ConcurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorPolicy.hpp"
  32 #include "gc/g1/g1CollectorState.hpp"
  33 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/g1/suspendibleThreadSet.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/taskqueue.inline.hpp"
  49 #include "gc/shared/vmGCOperations.hpp"
  50 #include "logging/log.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                  const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   assert(limit != NULL, "limit must not be NULL");
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 #ifndef PRODUCT
  86 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  87   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  88   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  89          "size inconsistency");
  90   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  91          _bmWordSize  == heap_rs.word_size();
  92 }
  93 #endif
  94 
  95 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  96   _bm.print_on_error(st, prefix);
  97 }
  98 
  99 size_t G1CMBitMap::compute_size(size_t heap_size) {
 100   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 101 }
 102 
 103 size_t G1CMBitMap::mark_distance() {
 104   return MinObjAlignmentInBytes * BitsPerByte;
 105 }
 106 
 107 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 108   _bmStartWord = heap.start();
 109   _bmWordSize = heap.word_size();
 110 
 111   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 112   _bm.set_size(_bmWordSize >> _shifter);
 113 
 114   storage->set_mapping_changed_listener(&_listener);
 115 }
 116 
 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 118   if (zero_filled) {
 119     return;
 120   }
 121   // We need to clear the bitmap on commit, removing any existing information.
 122   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 123   _bm->clearRange(mr);
 124 }
 125 
 126 // Closure used for clearing the given mark bitmap.
 127 class ClearBitmapHRClosure : public HeapRegionClosure {
 128  private:
 129   G1ConcurrentMark* _cm;
 130   G1CMBitMap* _bitmap;
 131   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 132  public:
 133   ClearBitmapHRClosure(G1ConcurrentMark* cm, G1CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 134     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 135   }
 136 
 137   virtual bool doHeapRegion(HeapRegion* r) {
 138     size_t const chunk_size_in_words = M / HeapWordSize;
 139 
 140     HeapWord* cur = r->bottom();
 141     HeapWord* const end = r->end();
 142 
 143     while (cur < end) {
 144       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 145       _bitmap->clearRange(mr);
 146 
 147       cur += chunk_size_in_words;
 148 
 149       // Abort iteration if after yielding the marking has been aborted.
 150       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 151         return true;
 152       }
 153       // Repeat the asserts from before the start of the closure. We will do them
 154       // as asserts here to minimize their overhead on the product. However, we
 155       // will have them as guarantees at the beginning / end of the bitmap
 156       // clearing to get some checking in the product.
 157       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 158       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 159     }
 160 
 161     return false;
 162   }
 163 };
 164 
 165 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 166   ClearBitmapHRClosure* _cl;
 167   HeapRegionClaimer     _hrclaimer;
 168   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 169 
 170 public:
 171   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 172       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 173 
 174   void work(uint worker_id) {
 175     SuspendibleThreadSetJoiner sts_join(_suspendible);
 176     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 177   }
 178 };
 179 
 180 void G1CMBitMap::clearAll() {
 181   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 182   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 183   uint n_workers = g1h->workers()->active_workers();
 184   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 185   g1h->workers()->run_task(&task);
 186   guarantee(cl.complete(), "Must have completed iteration.");
 187   return;
 188 }
 189 
 190 void G1CMBitMap::clearRange(MemRegion mr) {
 191   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 192   assert(!mr.is_empty(), "unexpected empty region");
 193   // convert address range into offset range
 194   _bm.at_put_range(heapWordToOffset(mr.start()),
 195                    heapWordToOffset(mr.end()), false);
 196 }
 197 
 198 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) :
 199   _base(NULL), _cm(cm)
 200 {}
 201 
 202 bool G1CMMarkStack::allocate(size_t capacity) {
 203   // allocate a stack of the requisite depth
 204   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 205   if (!rs.is_reserved()) {
 206     log_warning(gc)("ConcurrentMark MarkStack allocation failure");
 207     return false;
 208   }
 209   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 210   if (!_virtual_space.initialize(rs, rs.size())) {
 211     log_warning(gc)("ConcurrentMark MarkStack backing store failure");
 212     // Release the virtual memory reserved for the marking stack
 213     rs.release();
 214     return false;
 215   }
 216   assert(_virtual_space.committed_size() == rs.size(),
 217          "Didn't reserve backing store for all of G1ConcurrentMark stack?");
 218   _base = (oop*) _virtual_space.low();
 219   setEmpty();
 220   _capacity = (jint) capacity;
 221   _saved_index = -1;
 222   _should_expand = false;
 223   return true;
 224 }
 225 
 226 void G1CMMarkStack::expand() {
 227   // Called, during remark, if we've overflown the marking stack during marking.
 228   assert(isEmpty(), "stack should been emptied while handling overflow");
 229   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 230   // Clear expansion flag
 231   _should_expand = false;
 232   if (_capacity == (jint) MarkStackSizeMax) {
 233     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 234     return;
 235   }
 236   // Double capacity if possible
 237   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 238   // Do not give up existing stack until we have managed to
 239   // get the double capacity that we desired.
 240   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 241                                                            sizeof(oop)));
 242   if (rs.is_reserved()) {
 243     // Release the backing store associated with old stack
 244     _virtual_space.release();
 245     // Reinitialize virtual space for new stack
 246     if (!_virtual_space.initialize(rs, rs.size())) {
 247       fatal("Not enough swap for expanded marking stack capacity");
 248     }
 249     _base = (oop*)(_virtual_space.low());
 250     _index = 0;
 251     _capacity = new_capacity;
 252   } else {
 253     // Failed to double capacity, continue;
 254     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 255                   _capacity / K, new_capacity / K);
 256   }
 257 }
 258 
 259 void G1CMMarkStack::set_should_expand() {
 260   // If we're resetting the marking state because of an
 261   // marking stack overflow, record that we should, if
 262   // possible, expand the stack.
 263   _should_expand = _cm->has_overflown();
 264 }
 265 
 266 G1CMMarkStack::~G1CMMarkStack() {
 267   if (_base != NULL) {
 268     _base = NULL;
 269     _virtual_space.release();
 270   }
 271 }
 272 
 273 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 274   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 275   jint start = _index;
 276   jint next_index = start + n;
 277   if (next_index > _capacity) {
 278     _overflow = true;
 279     return;
 280   }
 281   // Otherwise.
 282   _index = next_index;
 283   for (int i = 0; i < n; i++) {
 284     int ind = start + i;
 285     assert(ind < _capacity, "By overflow test above.");
 286     _base[ind] = ptr_arr[i];
 287   }
 288 }
 289 
 290 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 291   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 292   jint index = _index;
 293   if (index == 0) {
 294     *n = 0;
 295     return false;
 296   } else {
 297     int k = MIN2(max, index);
 298     jint  new_ind = index - k;
 299     for (int j = 0; j < k; j++) {
 300       ptr_arr[j] = _base[new_ind + j];
 301     }
 302     _index = new_ind;
 303     *n = k;
 304     return true;
 305   }
 306 }
 307 
 308 void G1CMMarkStack::note_start_of_gc() {
 309   assert(_saved_index == -1,
 310          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 311   _saved_index = _index;
 312 }
 313 
 314 void G1CMMarkStack::note_end_of_gc() {
 315   // This is intentionally a guarantee, instead of an assert. If we
 316   // accidentally add something to the mark stack during GC, it
 317   // will be a correctness issue so it's better if we crash. we'll
 318   // only check this once per GC anyway, so it won't be a performance
 319   // issue in any way.
 320   guarantee(_saved_index == _index,
 321             "saved index: %d index: %d", _saved_index, _index);
 322   _saved_index = -1;
 323 }
 324 
 325 G1CMRootRegions::G1CMRootRegions() :
 326   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 327   _should_abort(false),  _next_survivor(NULL) { }
 328 
 329 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) {
 330   _young_list = g1h->young_list();
 331   _cm = cm;
 332 }
 333 
 334 void G1CMRootRegions::prepare_for_scan() {
 335   assert(!scan_in_progress(), "pre-condition");
 336 
 337   // Currently, only survivors can be root regions.
 338   assert(_next_survivor == NULL, "pre-condition");
 339   _next_survivor = _young_list->first_survivor_region();
 340   _scan_in_progress = (_next_survivor != NULL);
 341   _should_abort = false;
 342 }
 343 
 344 HeapRegion* G1CMRootRegions::claim_next() {
 345   if (_should_abort) {
 346     // If someone has set the should_abort flag, we return NULL to
 347     // force the caller to bail out of their loop.
 348     return NULL;
 349   }
 350 
 351   // Currently, only survivors can be root regions.
 352   HeapRegion* res = _next_survivor;
 353   if (res != NULL) {
 354     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 355     // Read it again in case it changed while we were waiting for the lock.
 356     res = _next_survivor;
 357     if (res != NULL) {
 358       if (res == _young_list->last_survivor_region()) {
 359         // We just claimed the last survivor so store NULL to indicate
 360         // that we're done.
 361         _next_survivor = NULL;
 362       } else {
 363         _next_survivor = res->get_next_young_region();
 364       }
 365     } else {
 366       // Someone else claimed the last survivor while we were trying
 367       // to take the lock so nothing else to do.
 368     }
 369   }
 370   assert(res == NULL || res->is_survivor(), "post-condition");
 371 
 372   return res;
 373 }
 374 
 375 void G1CMRootRegions::notify_scan_done() {
 376   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 377   _scan_in_progress = false;
 378   RootRegionScan_lock->notify_all();
 379 }
 380 
 381 void G1CMRootRegions::cancel_scan() {
 382   notify_scan_done();
 383 }
 384 
 385 void G1CMRootRegions::scan_finished() {
 386   assert(scan_in_progress(), "pre-condition");
 387 
 388   // Currently, only survivors can be root regions.
 389   if (!_should_abort) {
 390     assert(_next_survivor == NULL, "we should have claimed all survivors");
 391   }
 392   _next_survivor = NULL;
 393 
 394   notify_scan_done();
 395 }
 396 
 397 bool G1CMRootRegions::wait_until_scan_finished() {
 398   if (!scan_in_progress()) return false;
 399 
 400   {
 401     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 402     while (scan_in_progress()) {
 403       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 404     }
 405   }
 406   return true;
 407 }
 408 
 409 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 410   return MAX2((n_par_threads + 2) / 4, 1U);
 411 }
 412 
 413 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 414   _g1h(g1h),
 415   _markBitMap1(),
 416   _markBitMap2(),
 417   _parallel_marking_threads(0),
 418   _max_parallel_marking_threads(0),
 419   _sleep_factor(0.0),
 420   _marking_task_overhead(1.0),
 421   _cleanup_list("Cleanup List"),
 422   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 423   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 424             CardTableModRefBS::card_shift,
 425             false /* in_resource_area*/),
 426 
 427   _prevMarkBitMap(&_markBitMap1),
 428   _nextMarkBitMap(&_markBitMap2),
 429 
 430   _markStack(this),
 431   // _finger set in set_non_marking_state
 432 
 433   _max_worker_id(ParallelGCThreads),
 434   // _active_tasks set in set_non_marking_state
 435   // _tasks set inside the constructor
 436   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 437   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 438 
 439   _has_overflown(false),
 440   _concurrent(false),
 441   _has_aborted(false),
 442   _restart_for_overflow(false),
 443   _concurrent_marking_in_progress(false),
 444   _concurrent_phase_status(ConcPhaseNotStarted),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL),
 458   _completed_initialization(false) {
 459 
 460   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 461   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 462 
 463   // Create & start a ConcurrentMark thread.
 464   _cmThread = new ConcurrentMarkThread(this);
 465   assert(cmThread() != NULL, "CM Thread should have been created");
 466   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 467   if (_cmThread->osthread() == NULL) {
 468       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 469   }
 470 
 471   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 472   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 473   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 474 
 475   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 476   satb_qs.set_buffer_size(G1SATBBufferSize);
 477 
 478   _root_regions.init(_g1h, this);
 479 
 480   if (ConcGCThreads > ParallelGCThreads) {
 481     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 482                     ConcGCThreads, ParallelGCThreads);
 483     return;
 484   }
 485   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 486     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 487     // if both are set
 488     _sleep_factor             = 0.0;
 489     _marking_task_overhead    = 1.0;
 490   } else if (G1MarkingOverheadPercent > 0) {
 491     // We will calculate the number of parallel marking threads based
 492     // on a target overhead with respect to the soft real-time goal
 493     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 494     double overall_cm_overhead =
 495       (double) MaxGCPauseMillis * marking_overhead /
 496       (double) GCPauseIntervalMillis;
 497     double cpu_ratio = 1.0 / (double) os::processor_count();
 498     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 499     double marking_task_overhead =
 500       overall_cm_overhead / marking_thread_num *
 501                                               (double) os::processor_count();
 502     double sleep_factor =
 503                        (1.0 - marking_task_overhead) / marking_task_overhead;
 504 
 505     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 506     _sleep_factor             = sleep_factor;
 507     _marking_task_overhead    = marking_task_overhead;
 508   } else {
 509     // Calculate the number of parallel marking threads by scaling
 510     // the number of parallel GC threads.
 511     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 512     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 513     _sleep_factor             = 0.0;
 514     _marking_task_overhead    = 1.0;
 515   }
 516 
 517   assert(ConcGCThreads > 0, "Should have been set");
 518   _parallel_marking_threads = ConcGCThreads;
 519   _max_parallel_marking_threads = _parallel_marking_threads;
 520 
 521   _parallel_workers = new WorkGang("G1 Marker",
 522        _max_parallel_marking_threads, false, true);
 523   if (_parallel_workers == NULL) {
 524     vm_exit_during_initialization("Failed necessary allocation.");
 525   } else {
 526     _parallel_workers->initialize_workers();
 527   }
 528 
 529   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 530     size_t mark_stack_size =
 531       MIN2(MarkStackSizeMax,
 532           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 533     // Verify that the calculated value for MarkStackSize is in range.
 534     // It would be nice to use the private utility routine from Arguments.
 535     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 536       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 537                       "must be between 1 and " SIZE_FORMAT,
 538                       mark_stack_size, MarkStackSizeMax);
 539       return;
 540     }
 541     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 542   } else {
 543     // Verify MarkStackSize is in range.
 544     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 545       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 546         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 547           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 548                           "must be between 1 and " SIZE_FORMAT,
 549                           MarkStackSize, MarkStackSizeMax);
 550           return;
 551         }
 552       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 553         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 554           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 555                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 556                           MarkStackSize, MarkStackSizeMax);
 557           return;
 558         }
 559       }
 560     }
 561   }
 562 
 563   if (!_markStack.allocate(MarkStackSize)) {
 564     log_warning(gc)("Failed to allocate CM marking stack");
 565     return;
 566   }
 567 
 568   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 569   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 570 
 571   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 572   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 573 
 574   BitMap::idx_t card_bm_size = _card_bm.size();
 575 
 576   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 577   _active_tasks = _max_worker_id;
 578 
 579   uint max_regions = _g1h->max_regions();
 580   for (uint i = 0; i < _max_worker_id; ++i) {
 581     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 582     task_queue->initialize();
 583     _task_queues->register_queue(i, task_queue);
 584 
 585     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 586     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 587 
 588     _tasks[i] = new G1CMTask(i, this,
 589                              _count_marked_bytes[i],
 590                              &_count_card_bitmaps[i],
 591                              task_queue, _task_queues);
 592 
 593     _accum_task_vtime[i] = 0.0;
 594   }
 595 
 596   // Calculate the card number for the bottom of the heap. Used
 597   // in biasing indexes into the accounting card bitmaps.
 598   _heap_bottom_card_num =
 599     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 600                                 CardTableModRefBS::card_shift);
 601 
 602   // Clear all the liveness counting data
 603   clear_all_count_data();
 604 
 605   // so that the call below can read a sensible value
 606   _heap_start = g1h->reserved_region().start();
 607   set_non_marking_state();
 608   _completed_initialization = true;
 609 }
 610 
 611 void G1ConcurrentMark::reset() {
 612   // Starting values for these two. This should be called in a STW
 613   // phase.
 614   MemRegion reserved = _g1h->g1_reserved();
 615   _heap_start = reserved.start();
 616   _heap_end   = reserved.end();
 617 
 618   // Separated the asserts so that we know which one fires.
 619   assert(_heap_start != NULL, "heap bounds should look ok");
 620   assert(_heap_end != NULL, "heap bounds should look ok");
 621   assert(_heap_start < _heap_end, "heap bounds should look ok");
 622 
 623   // Reset all the marking data structures and any necessary flags
 624   reset_marking_state();
 625 
 626   // We do reset all of them, since different phases will use
 627   // different number of active threads. So, it's easiest to have all
 628   // of them ready.
 629   for (uint i = 0; i < _max_worker_id; ++i) {
 630     _tasks[i]->reset(_nextMarkBitMap);
 631   }
 632 
 633   // we need this to make sure that the flag is on during the evac
 634   // pause with initial mark piggy-backed
 635   set_concurrent_marking_in_progress();
 636 }
 637 
 638 
 639 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
 640   _markStack.set_should_expand();
 641   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 642   if (clear_overflow) {
 643     clear_has_overflown();
 644   } else {
 645     assert(has_overflown(), "pre-condition");
 646   }
 647   _finger = _heap_start;
 648 
 649   for (uint i = 0; i < _max_worker_id; ++i) {
 650     G1CMTaskQueue* queue = _task_queues->queue(i);
 651     queue->set_empty();
 652   }
 653 }
 654 
 655 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 656   assert(active_tasks <= _max_worker_id, "we should not have more");
 657 
 658   _active_tasks = active_tasks;
 659   // Need to update the three data structures below according to the
 660   // number of active threads for this phase.
 661   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 662   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 663   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 664 }
 665 
 666 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 667   set_concurrency(active_tasks);
 668 
 669   _concurrent = concurrent;
 670   // We propagate this to all tasks, not just the active ones.
 671   for (uint i = 0; i < _max_worker_id; ++i)
 672     _tasks[i]->set_concurrent(concurrent);
 673 
 674   if (concurrent) {
 675     set_concurrent_marking_in_progress();
 676   } else {
 677     // We currently assume that the concurrent flag has been set to
 678     // false before we start remark. At this point we should also be
 679     // in a STW phase.
 680     assert(!concurrent_marking_in_progress(), "invariant");
 681     assert(out_of_regions(),
 682            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 683            p2i(_finger), p2i(_heap_end));
 684   }
 685 }
 686 
 687 void G1ConcurrentMark::set_non_marking_state() {
 688   // We set the global marking state to some default values when we're
 689   // not doing marking.
 690   reset_marking_state();
 691   _active_tasks = 0;
 692   clear_concurrent_marking_in_progress();
 693 }
 694 
 695 G1ConcurrentMark::~G1ConcurrentMark() {
 696   // The G1ConcurrentMark instance is never freed.
 697   ShouldNotReachHere();
 698 }
 699 
 700 void G1ConcurrentMark::clearNextBitmap() {
 701   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 702 
 703   // Make sure that the concurrent mark thread looks to still be in
 704   // the current cycle.
 705   guarantee(cmThread()->during_cycle(), "invariant");
 706 
 707   // We are finishing up the current cycle by clearing the next
 708   // marking bitmap and getting it ready for the next cycle. During
 709   // this time no other cycle can start. So, let's make sure that this
 710   // is the case.
 711   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 712 
 713   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 714   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 715   _parallel_workers->run_task(&task);
 716 
 717   // Clear the liveness counting data. If the marking has been aborted, the abort()
 718   // call already did that.
 719   if (cl.complete()) {
 720     clear_all_count_data();
 721   }
 722 
 723   // Repeat the asserts from above.
 724   guarantee(cmThread()->during_cycle(), "invariant");
 725   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 726 }
 727 
 728 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 729   G1CMBitMap* _bitmap;
 730   bool _error;
 731  public:
 732   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 733   }
 734 
 735   virtual bool doHeapRegion(HeapRegion* r) {
 736     // This closure can be called concurrently to the mutator, so we must make sure
 737     // that the result of the getNextMarkedWordAddress() call is compared to the
 738     // value passed to it as limit to detect any found bits.
 739     // end never changes in G1.
 740     HeapWord* end = r->end();
 741     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 742   }
 743 };
 744 
 745 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 746   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 747   _g1h->heap_region_iterate(&cl);
 748   return cl.complete();
 749 }
 750 
 751 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 752 public:
 753   bool doHeapRegion(HeapRegion* r) {
 754     r->note_start_of_marking();
 755     return false;
 756   }
 757 };
 758 
 759 void G1ConcurrentMark::checkpointRootsInitialPre() {
 760   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 761   G1CollectorPolicy* g1p = g1h->g1_policy();
 762 
 763   _has_aborted = false;
 764 
 765   // Initialize marking structures. This has to be done in a STW phase.
 766   reset();
 767 
 768   // For each region note start of marking.
 769   NoteStartOfMarkHRClosure startcl;
 770   g1h->heap_region_iterate(&startcl);
 771 }
 772 
 773 
 774 void G1ConcurrentMark::checkpointRootsInitialPost() {
 775   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 776 
 777   // Start Concurrent Marking weak-reference discovery.
 778   ReferenceProcessor* rp = g1h->ref_processor_cm();
 779   // enable ("weak") refs discovery
 780   rp->enable_discovery();
 781   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 782 
 783   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 784   // This is the start of  the marking cycle, we're expected all
 785   // threads to have SATB queues with active set to false.
 786   satb_mq_set.set_active_all_threads(true, /* new active value */
 787                                      false /* expected_active */);
 788 
 789   _root_regions.prepare_for_scan();
 790 
 791   // update_g1_committed() will be called at the end of an evac pause
 792   // when marking is on. So, it's also called at the end of the
 793   // initial-mark pause to update the heap end, if the heap expands
 794   // during it. No need to call it here.
 795 }
 796 
 797 /*
 798  * Notice that in the next two methods, we actually leave the STS
 799  * during the barrier sync and join it immediately afterwards. If we
 800  * do not do this, the following deadlock can occur: one thread could
 801  * be in the barrier sync code, waiting for the other thread to also
 802  * sync up, whereas another one could be trying to yield, while also
 803  * waiting for the other threads to sync up too.
 804  *
 805  * Note, however, that this code is also used during remark and in
 806  * this case we should not attempt to leave / enter the STS, otherwise
 807  * we'll either hit an assert (debug / fastdebug) or deadlock
 808  * (product). So we should only leave / enter the STS if we are
 809  * operating concurrently.
 810  *
 811  * Because the thread that does the sync barrier has left the STS, it
 812  * is possible to be suspended for a Full GC or an evacuation pause
 813  * could occur. This is actually safe, since the entering the sync
 814  * barrier is one of the last things do_marking_step() does, and it
 815  * doesn't manipulate any data structures afterwards.
 816  */
 817 
 818 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 819   bool barrier_aborted;
 820   {
 821     SuspendibleThreadSetLeaver sts_leave(concurrent());
 822     barrier_aborted = !_first_overflow_barrier_sync.enter();
 823   }
 824 
 825   // at this point everyone should have synced up and not be doing any
 826   // more work
 827 
 828   if (barrier_aborted) {
 829     // If the barrier aborted we ignore the overflow condition and
 830     // just abort the whole marking phase as quickly as possible.
 831     return;
 832   }
 833 
 834   // If we're executing the concurrent phase of marking, reset the marking
 835   // state; otherwise the marking state is reset after reference processing,
 836   // during the remark pause.
 837   // If we reset here as a result of an overflow during the remark we will
 838   // see assertion failures from any subsequent set_concurrency_and_phase()
 839   // calls.
 840   if (concurrent()) {
 841     // let the task associated with with worker 0 do this
 842     if (worker_id == 0) {
 843       // task 0 is responsible for clearing the global data structures
 844       // We should be here because of an overflow. During STW we should
 845       // not clear the overflow flag since we rely on it being true when
 846       // we exit this method to abort the pause and restart concurrent
 847       // marking.
 848       reset_marking_state(true /* clear_overflow */);
 849 
 850       log_info(gc)("Concurrent Mark reset for overflow");
 851     }
 852   }
 853 
 854   // after this, each task should reset its own data structures then
 855   // then go into the second barrier
 856 }
 857 
 858 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 859   SuspendibleThreadSetLeaver sts_leave(concurrent());
 860   _second_overflow_barrier_sync.enter();
 861 
 862   // at this point everything should be re-initialized and ready to go
 863 }
 864 
 865 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 866 private:
 867   G1ConcurrentMark*     _cm;
 868   ConcurrentMarkThread* _cmt;
 869 
 870 public:
 871   void work(uint worker_id) {
 872     assert(Thread::current()->is_ConcurrentGC_thread(),
 873            "this should only be done by a conc GC thread");
 874     ResourceMark rm;
 875 
 876     double start_vtime = os::elapsedVTime();
 877 
 878     {
 879       SuspendibleThreadSetJoiner sts_join;
 880 
 881       assert(worker_id < _cm->active_tasks(), "invariant");
 882       G1CMTask* the_task = _cm->task(worker_id);
 883       the_task->record_start_time();
 884       if (!_cm->has_aborted()) {
 885         do {
 886           double start_vtime_sec = os::elapsedVTime();
 887           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 888 
 889           the_task->do_marking_step(mark_step_duration_ms,
 890                                     true  /* do_termination */,
 891                                     false /* is_serial*/);
 892 
 893           double end_vtime_sec = os::elapsedVTime();
 894           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 895           _cm->clear_has_overflown();
 896 
 897           _cm->do_yield_check(worker_id);
 898 
 899           jlong sleep_time_ms;
 900           if (!_cm->has_aborted() && the_task->has_aborted()) {
 901             sleep_time_ms =
 902               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 903             {
 904               SuspendibleThreadSetLeaver sts_leave;
 905               os::sleep(Thread::current(), sleep_time_ms, false);
 906             }
 907           }
 908         } while (!_cm->has_aborted() && the_task->has_aborted());
 909       }
 910       the_task->record_end_time();
 911       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 912     }
 913 
 914     double end_vtime = os::elapsedVTime();
 915     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 916   }
 917 
 918   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 919                             ConcurrentMarkThread* cmt) :
 920       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 921 
 922   ~G1CMConcurrentMarkingTask() { }
 923 };
 924 
 925 // Calculates the number of active workers for a concurrent
 926 // phase.
 927 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 928   uint n_conc_workers = 0;
 929   if (!UseDynamicNumberOfGCThreads ||
 930       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 931        !ForceDynamicNumberOfGCThreads)) {
 932     n_conc_workers = max_parallel_marking_threads();
 933   } else {
 934     n_conc_workers =
 935       AdaptiveSizePolicy::calc_default_active_workers(
 936                                    max_parallel_marking_threads(),
 937                                    1, /* Minimum workers */
 938                                    parallel_marking_threads(),
 939                                    Threads::number_of_non_daemon_threads());
 940     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 941     // that scaling has already gone into "_max_parallel_marking_threads".
 942   }
 943   assert(n_conc_workers > 0, "Always need at least 1");
 944   return n_conc_workers;
 945 }
 946 
 947 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 948   // Currently, only survivors can be root regions.
 949   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 950   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 951 
 952   const uintx interval = PrefetchScanIntervalInBytes;
 953   HeapWord* curr = hr->bottom();
 954   const HeapWord* end = hr->top();
 955   while (curr < end) {
 956     Prefetch::read(curr, interval);
 957     oop obj = oop(curr);
 958     int size = obj->oop_iterate_size(&cl);
 959     assert(size == obj->size(), "sanity");
 960     curr += size;
 961   }
 962 }
 963 
 964 class G1CMRootRegionScanTask : public AbstractGangTask {
 965 private:
 966   G1ConcurrentMark* _cm;
 967 
 968 public:
 969   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 970     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 971 
 972   void work(uint worker_id) {
 973     assert(Thread::current()->is_ConcurrentGC_thread(),
 974            "this should only be done by a conc GC thread");
 975 
 976     G1CMRootRegions* root_regions = _cm->root_regions();
 977     HeapRegion* hr = root_regions->claim_next();
 978     while (hr != NULL) {
 979       _cm->scanRootRegion(hr, worker_id);
 980       hr = root_regions->claim_next();
 981     }
 982   }
 983 };
 984 
 985 void G1ConcurrentMark::scanRootRegions() {
 986   // scan_in_progress() will have been set to true only if there was
 987   // at least one root region to scan. So, if it's false, we
 988   // should not attempt to do any further work.
 989   if (root_regions()->scan_in_progress()) {
 990     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 991     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 992 
 993     _parallel_marking_threads = calc_parallel_marking_threads();
 994     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 995            "Maximum number of marking threads exceeded");
 996     uint active_workers = MAX2(1U, parallel_marking_threads());
 997 
 998     G1CMRootRegionScanTask task(this);
 999     _parallel_workers->set_active_workers(active_workers);
1000     _parallel_workers->run_task(&task);
1001 
1002     // It's possible that has_aborted() is true here without actually
1003     // aborting the survivor scan earlier. This is OK as it's
1004     // mainly used for sanity checking.
1005     root_regions()->scan_finished();
1006   }
1007 }
1008 
1009 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) {
1010   uint old_val = 0;
1011   do {
1012     old_val = Atomic::cmpxchg(ConcPhaseStarted, &_concurrent_phase_status, ConcPhaseNotStarted);
1013   } while (old_val != ConcPhaseNotStarted);
1014   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
1015 }
1016 
1017 void G1ConcurrentMark::register_concurrent_phase_end_common(bool end_timer) {
1018   if (_concurrent_phase_status == ConcPhaseNotStarted) {
1019     return;
1020   }
1021 
1022   uint old_val = Atomic::cmpxchg(ConcPhaseStopping, &_concurrent_phase_status, ConcPhaseStarted);
1023   if (old_val == ConcPhaseStarted) {
1024     _g1h->gc_timer_cm()->register_gc_concurrent_end();
1025     // If 'end_timer' is true, we came here to end timer which needs concurrent phase ended.
1026     // We need to end it before changing the status to 'ConcPhaseNotStarted' to prevent
1027     // starting a new concurrent phase by 'ConcurrentMarkThread'.
1028     if (end_timer) {
1029       _g1h->gc_timer_cm()->register_gc_end();
1030     }
1031     old_val = Atomic::cmpxchg(ConcPhaseNotStarted, &_concurrent_phase_status, ConcPhaseStopping);
1032     assert(old_val == ConcPhaseStopping, "Should not have changed since we entered this scope.");
1033   } else {
1034     do {
1035       // Let other thread finish changing '_concurrent_phase_status' to 'ConcPhaseNotStarted'.
1036       os::naked_short_sleep(1);
1037     } while (_concurrent_phase_status != ConcPhaseNotStarted);
1038   }
1039 }
1040 
1041 void G1ConcurrentMark::register_concurrent_phase_end() {
1042   register_concurrent_phase_end_common(false);
1043 }
1044 
1045 void G1ConcurrentMark::register_concurrent_gc_end_and_stop_timer() {
1046   register_concurrent_phase_end_common(true);
1047 }
1048 
1049 void G1ConcurrentMark::markFromRoots() {
1050   // we might be tempted to assert that:
1051   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1052   //        "inconsistent argument?");
1053   // However that wouldn't be right, because it's possible that
1054   // a safepoint is indeed in progress as a younger generation
1055   // stop-the-world GC happens even as we mark in this generation.
1056 
1057   _restart_for_overflow = false;
1058 
1059   // _g1h has _n_par_threads
1060   _parallel_marking_threads = calc_parallel_marking_threads();
1061   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1062     "Maximum number of marking threads exceeded");
1063 
1064   uint active_workers = MAX2(1U, parallel_marking_threads());
1065   assert(active_workers > 0, "Should have been set");
1066 
1067   // Parallel task terminator is set in "set_concurrency_and_phase()"
1068   set_concurrency_and_phase(active_workers, true /* concurrent */);
1069 
1070   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1071   _parallel_workers->set_active_workers(active_workers);
1072   _parallel_workers->run_task(&markingTask);
1073   print_stats();
1074 }
1075 
1076 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1077   // world is stopped at this checkpoint
1078   assert(SafepointSynchronize::is_at_safepoint(),
1079          "world should be stopped");
1080 
1081   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1082 
1083   // If a full collection has happened, we shouldn't do this.
1084   if (has_aborted()) {
1085     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1086     return;
1087   }
1088 
1089   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1090 
1091   if (VerifyDuringGC) {
1092     HandleMark hm;  // handle scope
1093     g1h->prepare_for_verify();
1094     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1095   }
1096   g1h->verifier()->check_bitmaps("Remark Start");
1097 
1098   G1CollectorPolicy* g1p = g1h->g1_policy();
1099   g1p->record_concurrent_mark_remark_start();
1100 
1101   double start = os::elapsedTime();
1102 
1103   checkpointRootsFinalWork();
1104 
1105   double mark_work_end = os::elapsedTime();
1106 
1107   weakRefsWork(clear_all_soft_refs);
1108 
1109   if (has_overflown()) {
1110     // Oops.  We overflowed.  Restart concurrent marking.
1111     _restart_for_overflow = true;
1112     log_develop_trace(gc)("Remark led to restart for overflow.");
1113 
1114     // Verify the heap w.r.t. the previous marking bitmap.
1115     if (VerifyDuringGC) {
1116       HandleMark hm;  // handle scope
1117       g1h->prepare_for_verify();
1118       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1119     }
1120 
1121     // Clear the marking state because we will be restarting
1122     // marking due to overflowing the global mark stack.
1123     reset_marking_state();
1124   } else {
1125     {
1126       GCTraceTime(Debug, gc, phases) trace("Aggregate Data", g1h->gc_timer_cm());
1127 
1128       // Aggregate the per-task counting data that we have accumulated
1129       // while marking.
1130       aggregate_count_data();
1131     }
1132 
1133     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1134     // We're done with marking.
1135     // This is the end of  the marking cycle, we're expected all
1136     // threads to have SATB queues with active set to true.
1137     satb_mq_set.set_active_all_threads(false, /* new active value */
1138                                        true /* expected_active */);
1139 
1140     if (VerifyDuringGC) {
1141       HandleMark hm;  // handle scope
1142       g1h->prepare_for_verify();
1143       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1144     }
1145     g1h->verifier()->check_bitmaps("Remark End");
1146     assert(!restart_for_overflow(), "sanity");
1147     // Completely reset the marking state since marking completed
1148     set_non_marking_state();
1149   }
1150 
1151   // Expand the marking stack, if we have to and if we can.
1152   if (_markStack.should_expand()) {
1153     _markStack.expand();
1154   }
1155 
1156   // Statistics
1157   double now = os::elapsedTime();
1158   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1159   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1160   _remark_times.add((now - start) * 1000.0);
1161 
1162   g1p->record_concurrent_mark_remark_end();
1163 
1164   G1CMIsAliveClosure is_alive(g1h);
1165   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1166 }
1167 
1168 // Base class of the closures that finalize and verify the
1169 // liveness counting data.
1170 class G1CMCountDataClosureBase: public HeapRegionClosure {
1171 protected:
1172   G1CollectedHeap* _g1h;
1173   G1ConcurrentMark* _cm;
1174   CardTableModRefBS* _ct_bs;
1175 
1176   BitMap* _region_bm;
1177   BitMap* _card_bm;
1178 
1179   // Takes a region that's not empty (i.e., it has at least one
1180   // live object in it and sets its corresponding bit on the region
1181   // bitmap to 1.
1182   void set_bit_for_region(HeapRegion* hr) {
1183     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1184     _region_bm->par_at_put(index, true);
1185   }
1186 
1187 public:
1188   G1CMCountDataClosureBase(G1CollectedHeap* g1h,
1189                            BitMap* region_bm, BitMap* card_bm):
1190     _g1h(g1h), _cm(g1h->concurrent_mark()),
1191     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1192     _region_bm(region_bm), _card_bm(card_bm) { }
1193 };
1194 
1195 // Closure that calculates the # live objects per region. Used
1196 // for verification purposes during the cleanup pause.
1197 class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
1198   G1CMBitMapRO* _bm;
1199   size_t _region_marked_bytes;
1200 
1201 public:
1202   CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
1203                          BitMap* region_bm, BitMap* card_bm) :
1204     G1CMCountDataClosureBase(g1h, region_bm, card_bm),
1205     _bm(bm), _region_marked_bytes(0) { }
1206 
1207   bool doHeapRegion(HeapRegion* hr) {
1208     HeapWord* ntams = hr->next_top_at_mark_start();
1209     HeapWord* start = hr->bottom();
1210 
1211     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1212            "Preconditions not met - "
1213            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1214            p2i(start), p2i(ntams), p2i(hr->end()));
1215 
1216     // Find the first marked object at or after "start".
1217     start = _bm->getNextMarkedWordAddress(start, ntams);
1218 
1219     size_t marked_bytes = 0;
1220 
1221     while (start < ntams) {
1222       oop obj = oop(start);
1223       int obj_sz = obj->size();
1224       HeapWord* obj_end = start + obj_sz;
1225 
1226       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1227       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1228 
1229       // Note: if we're looking at the last region in heap - obj_end
1230       // could be actually just beyond the end of the heap; end_idx
1231       // will then correspond to a (non-existent) card that is also
1232       // just beyond the heap.
1233       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1234         // end of object is not card aligned - increment to cover
1235         // all the cards spanned by the object
1236         end_idx += 1;
1237       }
1238 
1239       // Set the bits in the card BM for the cards spanned by this object.
1240       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1241 
1242       // Add the size of this object to the number of marked bytes.
1243       marked_bytes += (size_t)obj_sz * HeapWordSize;
1244 
1245       // This will happen if we are handling a humongous object that spans
1246       // several heap regions.
1247       if (obj_end > hr->end()) {
1248         break;
1249       }
1250       // Find the next marked object after this one.
1251       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1252     }
1253 
1254     // Mark the allocated-since-marking portion...
1255     HeapWord* top = hr->top();
1256     if (ntams < top) {
1257       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1258       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1259 
1260       // Note: if we're looking at the last region in heap - top
1261       // could be actually just beyond the end of the heap; end_idx
1262       // will then correspond to a (non-existent) card that is also
1263       // just beyond the heap.
1264       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1265         // end of object is not card aligned - increment to cover
1266         // all the cards spanned by the object
1267         end_idx += 1;
1268       }
1269       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1270 
1271       // This definitely means the region has live objects.
1272       set_bit_for_region(hr);
1273     }
1274 
1275     // Update the live region bitmap.
1276     if (marked_bytes > 0) {
1277       set_bit_for_region(hr);
1278     }
1279 
1280     // Set the marked bytes for the current region so that
1281     // it can be queried by a calling verification routine
1282     _region_marked_bytes = marked_bytes;
1283 
1284     return false;
1285   }
1286 
1287   size_t region_marked_bytes() const { return _region_marked_bytes; }
1288 };
1289 
1290 // Heap region closure used for verifying the counting data
1291 // that was accumulated concurrently and aggregated during
1292 // the remark pause. This closure is applied to the heap
1293 // regions during the STW cleanup pause.
1294 
1295 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1296   G1CollectedHeap* _g1h;
1297   G1ConcurrentMark* _cm;
1298   CalcLiveObjectsClosure _calc_cl;
1299   BitMap* _region_bm;   // Region BM to be verified
1300   BitMap* _card_bm;     // Card BM to be verified
1301 
1302   BitMap* _exp_region_bm; // Expected Region BM values
1303   BitMap* _exp_card_bm;   // Expected card BM values
1304 
1305   int _failures;
1306 
1307 public:
1308   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1309                                 BitMap* region_bm,
1310                                 BitMap* card_bm,
1311                                 BitMap* exp_region_bm,
1312                                 BitMap* exp_card_bm) :
1313     _g1h(g1h), _cm(g1h->concurrent_mark()),
1314     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1315     _region_bm(region_bm), _card_bm(card_bm),
1316     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1317     _failures(0) { }
1318 
1319   int failures() const { return _failures; }
1320 
1321   bool doHeapRegion(HeapRegion* hr) {
1322     int failures = 0;
1323 
1324     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1325     // this region and set the corresponding bits in the expected region
1326     // and card bitmaps.
1327     bool res = _calc_cl.doHeapRegion(hr);
1328     assert(res == false, "should be continuing");
1329 
1330     // Verify the marked bytes for this region.
1331     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1332     size_t act_marked_bytes = hr->next_marked_bytes();
1333 
1334     if (exp_marked_bytes > act_marked_bytes) {
1335       if (hr->is_starts_humongous()) {
1336         // For start_humongous regions, the size of the whole object will be
1337         // in exp_marked_bytes.
1338         HeapRegion* region = hr;
1339         int num_regions;
1340         for (num_regions = 0; region != NULL; num_regions++) {
1341           region = _g1h->next_region_in_humongous(region);
1342         }
1343         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1344           failures += 1;
1345         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1346           failures += 1;
1347         }
1348       } else {
1349         // We're not OK if expected marked bytes > actual marked bytes. It means
1350         // we have missed accounting some objects during the actual marking.
1351         failures += 1;
1352       }
1353     }
1354 
1355     // Verify the bit, for this region, in the actual and expected
1356     // (which was just calculated) region bit maps.
1357     // We're not OK if the bit in the calculated expected region
1358     // bitmap is set and the bit in the actual region bitmap is not.
1359     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1360 
1361     bool expected = _exp_region_bm->at(index);
1362     bool actual = _region_bm->at(index);
1363     if (expected && !actual) {
1364       failures += 1;
1365     }
1366 
1367     // Verify that the card bit maps for the cards spanned by the current
1368     // region match. We have an error if we have a set bit in the expected
1369     // bit map and the corresponding bit in the actual bitmap is not set.
1370 
1371     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1372     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1373 
1374     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1375       expected = _exp_card_bm->at(i);
1376       actual = _card_bm->at(i);
1377 
1378       if (expected && !actual) {
1379         failures += 1;
1380       }
1381     }
1382 
1383     _failures += failures;
1384 
1385     // We could stop iteration over the heap when we
1386     // find the first violating region by returning true.
1387     return false;
1388   }
1389 };
1390 
1391 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1392 protected:
1393   G1CollectedHeap* _g1h;
1394   G1ConcurrentMark* _cm;
1395   BitMap* _actual_region_bm;
1396   BitMap* _actual_card_bm;
1397 
1398   uint    _n_workers;
1399 
1400   BitMap* _expected_region_bm;
1401   BitMap* _expected_card_bm;
1402 
1403   int  _failures;
1404 
1405   HeapRegionClaimer _hrclaimer;
1406 
1407 public:
1408   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1409                             BitMap* region_bm, BitMap* card_bm,
1410                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1411     : AbstractGangTask("G1 verify final counting"),
1412       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1413       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1414       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1415       _failures(0),
1416       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1417     assert(VerifyDuringGC, "don't call this otherwise");
1418     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1419     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1420   }
1421 
1422   void work(uint worker_id) {
1423     assert(worker_id < _n_workers, "invariant");
1424 
1425     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1426                                             _actual_region_bm, _actual_card_bm,
1427                                             _expected_region_bm,
1428                                             _expected_card_bm);
1429 
1430     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1431 
1432     Atomic::add(verify_cl.failures(), &_failures);
1433   }
1434 
1435   int failures() const { return _failures; }
1436 };
1437 
1438 // Closure that finalizes the liveness counting data.
1439 // Used during the cleanup pause.
1440 // Sets the bits corresponding to the interval [NTAMS, top]
1441 // (which contains the implicitly live objects) in the
1442 // card liveness bitmap. Also sets the bit for each region,
1443 // containing live data, in the region liveness bitmap.
1444 
1445 class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
1446  public:
1447   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1448                               BitMap* region_bm,
1449                               BitMap* card_bm) :
1450     G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1451 
1452   bool doHeapRegion(HeapRegion* hr) {
1453     HeapWord* ntams = hr->next_top_at_mark_start();
1454     HeapWord* top   = hr->top();
1455 
1456     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1457 
1458     // Mark the allocated-since-marking portion...
1459     if (ntams < top) {
1460       // This definitely means the region has live objects.
1461       set_bit_for_region(hr);
1462 
1463       // Now set the bits in the card bitmap for [ntams, top)
1464       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1465       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1466 
1467       // Note: if we're looking at the last region in heap - top
1468       // could be actually just beyond the end of the heap; end_idx
1469       // will then correspond to a (non-existent) card that is also
1470       // just beyond the heap.
1471       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1472         // end of object is not card aligned - increment to cover
1473         // all the cards spanned by the object
1474         end_idx += 1;
1475       }
1476 
1477       assert(end_idx <= _card_bm->size(),
1478              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1479              end_idx, _card_bm->size());
1480       assert(start_idx < _card_bm->size(),
1481              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1482              start_idx, _card_bm->size());
1483 
1484       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1485     }
1486 
1487     // Set the bit for the region if it contains live data
1488     if (hr->next_marked_bytes() > 0) {
1489       set_bit_for_region(hr);
1490     }
1491 
1492     return false;
1493   }
1494 };
1495 
1496 class G1ParFinalCountTask: public AbstractGangTask {
1497 protected:
1498   G1CollectedHeap* _g1h;
1499   G1ConcurrentMark* _cm;
1500   BitMap* _actual_region_bm;
1501   BitMap* _actual_card_bm;
1502 
1503   uint    _n_workers;
1504   HeapRegionClaimer _hrclaimer;
1505 
1506 public:
1507   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1508     : AbstractGangTask("G1 final counting"),
1509       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1510       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1511       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1512   }
1513 
1514   void work(uint worker_id) {
1515     assert(worker_id < _n_workers, "invariant");
1516 
1517     FinalCountDataUpdateClosure final_update_cl(_g1h,
1518                                                 _actual_region_bm,
1519                                                 _actual_card_bm);
1520 
1521     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1522   }
1523 };
1524 
1525 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1526   G1CollectedHeap* _g1;
1527   size_t _freed_bytes;
1528   FreeRegionList* _local_cleanup_list;
1529   uint _old_regions_removed;
1530   uint _humongous_regions_removed;
1531   HRRSCleanupTask* _hrrs_cleanup_task;
1532 
1533 public:
1534   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1535                              FreeRegionList* local_cleanup_list,
1536                              HRRSCleanupTask* hrrs_cleanup_task) :
1537     _g1(g1),
1538     _freed_bytes(0),
1539     _local_cleanup_list(local_cleanup_list),
1540     _old_regions_removed(0),
1541     _humongous_regions_removed(0),
1542     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1543 
1544   size_t freed_bytes() { return _freed_bytes; }
1545   const uint old_regions_removed() { return _old_regions_removed; }
1546   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1547 
1548   bool doHeapRegion(HeapRegion *hr) {
1549     if (hr->is_archive()) {
1550       return false;
1551     }
1552     // We use a claim value of zero here because all regions
1553     // were claimed with value 1 in the FinalCount task.
1554     _g1->reset_gc_time_stamps(hr);
1555     hr->note_end_of_marking();
1556 
1557     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1558       _freed_bytes += hr->used();
1559       hr->set_containing_set(NULL);
1560       if (hr->is_humongous()) {
1561         _humongous_regions_removed++;
1562         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1563       } else {
1564         _old_regions_removed++;
1565         _g1->free_region(hr, _local_cleanup_list, true);
1566       }
1567     } else {
1568       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1569     }
1570 
1571     return false;
1572   }
1573 };
1574 
1575 class G1ParNoteEndTask: public AbstractGangTask {
1576   friend class G1NoteEndOfConcMarkClosure;
1577 
1578 protected:
1579   G1CollectedHeap* _g1h;
1580   FreeRegionList* _cleanup_list;
1581   HeapRegionClaimer _hrclaimer;
1582 
1583 public:
1584   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1585       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1586   }
1587 
1588   void work(uint worker_id) {
1589     FreeRegionList local_cleanup_list("Local Cleanup List");
1590     HRRSCleanupTask hrrs_cleanup_task;
1591     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1592                                            &hrrs_cleanup_task);
1593     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1594     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1595 
1596     // Now update the lists
1597     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1598     {
1599       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1600       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1601 
1602       // If we iterate over the global cleanup list at the end of
1603       // cleanup to do this printing we will not guarantee to only
1604       // generate output for the newly-reclaimed regions (the list
1605       // might not be empty at the beginning of cleanup; we might
1606       // still be working on its previous contents). So we do the
1607       // printing here, before we append the new regions to the global
1608       // cleanup list.
1609 
1610       G1HRPrinter* hr_printer = _g1h->hr_printer();
1611       if (hr_printer->is_active()) {
1612         FreeRegionListIterator iter(&local_cleanup_list);
1613         while (iter.more_available()) {
1614           HeapRegion* hr = iter.get_next();
1615           hr_printer->cleanup(hr);
1616         }
1617       }
1618 
1619       _cleanup_list->add_ordered(&local_cleanup_list);
1620       assert(local_cleanup_list.is_empty(), "post-condition");
1621 
1622       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1623     }
1624   }
1625 };
1626 
1627 void G1ConcurrentMark::cleanup() {
1628   // world is stopped at this checkpoint
1629   assert(SafepointSynchronize::is_at_safepoint(),
1630          "world should be stopped");
1631   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1632 
1633   // If a full collection has happened, we shouldn't do this.
1634   if (has_aborted()) {
1635     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1636     return;
1637   }
1638 
1639   g1h->verifier()->verify_region_sets_optional();
1640 
1641   if (VerifyDuringGC) {
1642     HandleMark hm;  // handle scope
1643     g1h->prepare_for_verify();
1644     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1645   }
1646   g1h->verifier()->check_bitmaps("Cleanup Start");
1647 
1648   G1CollectorPolicy* g1p = g1h->g1_policy();
1649   g1p->record_concurrent_mark_cleanup_start();
1650 
1651   double start = os::elapsedTime();
1652 
1653   HeapRegionRemSet::reset_for_cleanup_tasks();
1654 
1655   // Do counting once more with the world stopped for good measure.
1656   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1657 
1658   g1h->workers()->run_task(&g1_par_count_task);
1659 
1660   if (VerifyDuringGC) {
1661     // Verify that the counting data accumulated during marking matches
1662     // that calculated by walking the marking bitmap.
1663 
1664     // Bitmaps to hold expected values
1665     BitMap expected_region_bm(_region_bm.size(), true);
1666     BitMap expected_card_bm(_card_bm.size(), true);
1667 
1668     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1669                                                  &_region_bm,
1670                                                  &_card_bm,
1671                                                  &expected_region_bm,
1672                                                  &expected_card_bm);
1673 
1674     g1h->workers()->run_task(&g1_par_verify_task);
1675 
1676     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1677   }
1678 
1679   size_t start_used_bytes = g1h->used();
1680   g1h->collector_state()->set_mark_in_progress(false);
1681 
1682   double count_end = os::elapsedTime();
1683   double this_final_counting_time = (count_end - start);
1684   _total_counting_time += this_final_counting_time;
1685 
1686   if (log_is_enabled(Trace, gc, liveness)) {
1687     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1688     _g1h->heap_region_iterate(&cl);
1689   }
1690 
1691   // Install newly created mark bitMap as "prev".
1692   swapMarkBitMaps();
1693 
1694   g1h->reset_gc_time_stamp();
1695 
1696   uint n_workers = _g1h->workers()->active_workers();
1697 
1698   // Note end of marking in all heap regions.
1699   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1700   g1h->workers()->run_task(&g1_par_note_end_task);
1701   g1h->check_gc_time_stamps();
1702 
1703   if (!cleanup_list_is_empty()) {
1704     // The cleanup list is not empty, so we'll have to process it
1705     // concurrently. Notify anyone else that might be wanting free
1706     // regions that there will be more free regions coming soon.
1707     g1h->set_free_regions_coming();
1708   }
1709 
1710   // call below, since it affects the metric by which we sort the heap
1711   // regions.
1712   if (G1ScrubRemSets) {
1713     double rs_scrub_start = os::elapsedTime();
1714     g1h->scrub_rem_set(&_region_bm, &_card_bm);
1715     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1716   }
1717 
1718   // this will also free any regions totally full of garbage objects,
1719   // and sort the regions.
1720   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1721 
1722   // Statistics.
1723   double end = os::elapsedTime();
1724   _cleanup_times.add((end - start) * 1000.0);
1725 
1726   // Clean up will have freed any regions completely full of garbage.
1727   // Update the soft reference policy with the new heap occupancy.
1728   Universe::update_heap_info_at_gc();
1729 
1730   if (VerifyDuringGC) {
1731     HandleMark hm;  // handle scope
1732     g1h->prepare_for_verify();
1733     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1734   }
1735 
1736   g1h->verifier()->check_bitmaps("Cleanup End");
1737 
1738   g1h->verifier()->verify_region_sets_optional();
1739 
1740   // We need to make this be a "collection" so any collection pause that
1741   // races with it goes around and waits for completeCleanup to finish.
1742   g1h->increment_total_collections();
1743 
1744   // Clean out dead classes and update Metaspace sizes.
1745   if (ClassUnloadingWithConcurrentMark) {
1746     ClassLoaderDataGraph::purge();
1747   }
1748   MetaspaceGC::compute_new_size();
1749 
1750   // We reclaimed old regions so we should calculate the sizes to make
1751   // sure we update the old gen/space data.
1752   g1h->g1mm()->update_sizes();
1753   g1h->allocation_context_stats().update_after_mark();
1754 
1755   g1h->trace_heap_after_concurrent_cycle();
1756 }
1757 
1758 void G1ConcurrentMark::completeCleanup() {
1759   if (has_aborted()) return;
1760 
1761   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1762 
1763   _cleanup_list.verify_optional();
1764   FreeRegionList tmp_free_list("Tmp Free List");
1765 
1766   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1767                                   "cleanup list has %u entries",
1768                                   _cleanup_list.length());
1769 
1770   // No one else should be accessing the _cleanup_list at this point,
1771   // so it is not necessary to take any locks
1772   while (!_cleanup_list.is_empty()) {
1773     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1774     assert(hr != NULL, "Got NULL from a non-empty list");
1775     hr->par_clear();
1776     tmp_free_list.add_ordered(hr);
1777 
1778     // Instead of adding one region at a time to the secondary_free_list,
1779     // we accumulate them in the local list and move them a few at a
1780     // time. This also cuts down on the number of notify_all() calls
1781     // we do during this process. We'll also append the local list when
1782     // _cleanup_list is empty (which means we just removed the last
1783     // region from the _cleanup_list).
1784     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1785         _cleanup_list.is_empty()) {
1786       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1787                                       "appending %u entries to the secondary_free_list, "
1788                                       "cleanup list still has %u entries",
1789                                       tmp_free_list.length(),
1790                                       _cleanup_list.length());
1791 
1792       {
1793         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1794         g1h->secondary_free_list_add(&tmp_free_list);
1795         SecondaryFreeList_lock->notify_all();
1796       }
1797 #ifndef PRODUCT
1798       if (G1StressConcRegionFreeing) {
1799         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1800           os::sleep(Thread::current(), (jlong) 1, false);
1801         }
1802       }
1803 #endif
1804     }
1805   }
1806   assert(tmp_free_list.is_empty(), "post-condition");
1807 }
1808 
1809 // Supporting Object and Oop closures for reference discovery
1810 // and processing in during marking
1811 
1812 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1813   HeapWord* addr = (HeapWord*)obj;
1814   return addr != NULL &&
1815          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1816 }
1817 
1818 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1819 // Uses the G1CMTask associated with a worker thread (for serial reference
1820 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1821 // trace referent objects.
1822 //
1823 // Using the G1CMTask and embedded local queues avoids having the worker
1824 // threads operating on the global mark stack. This reduces the risk
1825 // of overflowing the stack - which we would rather avoid at this late
1826 // state. Also using the tasks' local queues removes the potential
1827 // of the workers interfering with each other that could occur if
1828 // operating on the global stack.
1829 
1830 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1831   G1ConcurrentMark* _cm;
1832   G1CMTask*         _task;
1833   int               _ref_counter_limit;
1834   int               _ref_counter;
1835   bool              _is_serial;
1836  public:
1837   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1838     _cm(cm), _task(task), _is_serial(is_serial),
1839     _ref_counter_limit(G1RefProcDrainInterval) {
1840     assert(_ref_counter_limit > 0, "sanity");
1841     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1842     _ref_counter = _ref_counter_limit;
1843   }
1844 
1845   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1846   virtual void do_oop(      oop* p) { do_oop_work(p); }
1847 
1848   template <class T> void do_oop_work(T* p) {
1849     if (!_cm->has_overflown()) {
1850       oop obj = oopDesc::load_decode_heap_oop(p);
1851       _task->deal_with_reference(obj);
1852       _ref_counter--;
1853 
1854       if (_ref_counter == 0) {
1855         // We have dealt with _ref_counter_limit references, pushing them
1856         // and objects reachable from them on to the local stack (and
1857         // possibly the global stack). Call G1CMTask::do_marking_step() to
1858         // process these entries.
1859         //
1860         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1861         // there's nothing more to do (i.e. we're done with the entries that
1862         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1863         // above) or we overflow.
1864         //
1865         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1866         // flag while there may still be some work to do. (See the comment at
1867         // the beginning of G1CMTask::do_marking_step() for those conditions -
1868         // one of which is reaching the specified time target.) It is only
1869         // when G1CMTask::do_marking_step() returns without setting the
1870         // has_aborted() flag that the marking step has completed.
1871         do {
1872           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1873           _task->do_marking_step(mark_step_duration_ms,
1874                                  false      /* do_termination */,
1875                                  _is_serial);
1876         } while (_task->has_aborted() && !_cm->has_overflown());
1877         _ref_counter = _ref_counter_limit;
1878       }
1879     }
1880   }
1881 };
1882 
1883 // 'Drain' oop closure used by both serial and parallel reference processing.
1884 // Uses the G1CMTask associated with a given worker thread (for serial
1885 // reference processing the G1CMtask for worker 0 is used). Calls the
1886 // do_marking_step routine, with an unbelievably large timeout value,
1887 // to drain the marking data structures of the remaining entries
1888 // added by the 'keep alive' oop closure above.
1889 
1890 class G1CMDrainMarkingStackClosure: public VoidClosure {
1891   G1ConcurrentMark* _cm;
1892   G1CMTask*         _task;
1893   bool              _is_serial;
1894  public:
1895   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1896     _cm(cm), _task(task), _is_serial(is_serial) {
1897     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1898   }
1899 
1900   void do_void() {
1901     do {
1902       // We call G1CMTask::do_marking_step() to completely drain the local
1903       // and global marking stacks of entries pushed by the 'keep alive'
1904       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1905       //
1906       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1907       // if there's nothing more to do (i.e. we've completely drained the
1908       // entries that were pushed as a a result of applying the 'keep alive'
1909       // closure to the entries on the discovered ref lists) or we overflow
1910       // the global marking stack.
1911       //
1912       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1913       // flag while there may still be some work to do. (See the comment at
1914       // the beginning of G1CMTask::do_marking_step() for those conditions -
1915       // one of which is reaching the specified time target.) It is only
1916       // when G1CMTask::do_marking_step() returns without setting the
1917       // has_aborted() flag that the marking step has completed.
1918 
1919       _task->do_marking_step(1000000000.0 /* something very large */,
1920                              true         /* do_termination */,
1921                              _is_serial);
1922     } while (_task->has_aborted() && !_cm->has_overflown());
1923   }
1924 };
1925 
1926 // Implementation of AbstractRefProcTaskExecutor for parallel
1927 // reference processing at the end of G1 concurrent marking
1928 
1929 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1930 private:
1931   G1CollectedHeap*  _g1h;
1932   G1ConcurrentMark* _cm;
1933   WorkGang*         _workers;
1934   uint              _active_workers;
1935 
1936 public:
1937   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1938                           G1ConcurrentMark* cm,
1939                           WorkGang* workers,
1940                           uint n_workers) :
1941     _g1h(g1h), _cm(cm),
1942     _workers(workers), _active_workers(n_workers) { }
1943 
1944   // Executes the given task using concurrent marking worker threads.
1945   virtual void execute(ProcessTask& task);
1946   virtual void execute(EnqueueTask& task);
1947 };
1948 
1949 class G1CMRefProcTaskProxy: public AbstractGangTask {
1950   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1951   ProcessTask&      _proc_task;
1952   G1CollectedHeap*  _g1h;
1953   G1ConcurrentMark* _cm;
1954 
1955 public:
1956   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1957                        G1CollectedHeap* g1h,
1958                        G1ConcurrentMark* cm) :
1959     AbstractGangTask("Process reference objects in parallel"),
1960     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1961     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1962     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1963   }
1964 
1965   virtual void work(uint worker_id) {
1966     ResourceMark rm;
1967     HandleMark hm;
1968     G1CMTask* task = _cm->task(worker_id);
1969     G1CMIsAliveClosure g1_is_alive(_g1h);
1970     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1971     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1972 
1973     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1974   }
1975 };
1976 
1977 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1978   assert(_workers != NULL, "Need parallel worker threads.");
1979   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1980 
1981   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1982 
1983   // We need to reset the concurrency level before each
1984   // proxy task execution, so that the termination protocol
1985   // and overflow handling in G1CMTask::do_marking_step() knows
1986   // how many workers to wait for.
1987   _cm->set_concurrency(_active_workers);
1988   _workers->run_task(&proc_task_proxy);
1989 }
1990 
1991 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1992   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1993   EnqueueTask& _enq_task;
1994 
1995 public:
1996   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1997     AbstractGangTask("Enqueue reference objects in parallel"),
1998     _enq_task(enq_task) { }
1999 
2000   virtual void work(uint worker_id) {
2001     _enq_task.work(worker_id);
2002   }
2003 };
2004 
2005 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2006   assert(_workers != NULL, "Need parallel worker threads.");
2007   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2008 
2009   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2010 
2011   // Not strictly necessary but...
2012   //
2013   // We need to reset the concurrency level before each
2014   // proxy task execution, so that the termination protocol
2015   // and overflow handling in G1CMTask::do_marking_step() knows
2016   // how many workers to wait for.
2017   _cm->set_concurrency(_active_workers);
2018   _workers->run_task(&enq_task_proxy);
2019 }
2020 
2021 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2022   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2023 }
2024 
2025 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2026   if (has_overflown()) {
2027     // Skip processing the discovered references if we have
2028     // overflown the global marking stack. Reference objects
2029     // only get discovered once so it is OK to not
2030     // de-populate the discovered reference lists. We could have,
2031     // but the only benefit would be that, when marking restarts,
2032     // less reference objects are discovered.
2033     return;
2034   }
2035 
2036   ResourceMark rm;
2037   HandleMark   hm;
2038 
2039   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2040 
2041   // Is alive closure.
2042   G1CMIsAliveClosure g1_is_alive(g1h);
2043 
2044   // Inner scope to exclude the cleaning of the string and symbol
2045   // tables from the displayed time.
2046   {
2047     GCTraceTime(Debug, gc, phases) trace("Reference Processing", g1h->gc_timer_cm());
2048 
2049     ReferenceProcessor* rp = g1h->ref_processor_cm();
2050 
2051     // See the comment in G1CollectedHeap::ref_processing_init()
2052     // about how reference processing currently works in G1.
2053 
2054     // Set the soft reference policy
2055     rp->setup_policy(clear_all_soft_refs);
2056     assert(_markStack.isEmpty(), "mark stack should be empty");
2057 
2058     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2059     // in serial reference processing. Note these closures are also
2060     // used for serially processing (by the the current thread) the
2061     // JNI references during parallel reference processing.
2062     //
2063     // These closures do not need to synchronize with the worker
2064     // threads involved in parallel reference processing as these
2065     // instances are executed serially by the current thread (e.g.
2066     // reference processing is not multi-threaded and is thus
2067     // performed by the current thread instead of a gang worker).
2068     //
2069     // The gang tasks involved in parallel reference processing create
2070     // their own instances of these closures, which do their own
2071     // synchronization among themselves.
2072     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2073     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2074 
2075     // We need at least one active thread. If reference processing
2076     // is not multi-threaded we use the current (VMThread) thread,
2077     // otherwise we use the work gang from the G1CollectedHeap and
2078     // we utilize all the worker threads we can.
2079     bool processing_is_mt = rp->processing_is_mt();
2080     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2081     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2082 
2083     // Parallel processing task executor.
2084     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2085                                               g1h->workers(), active_workers);
2086     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2087 
2088     // Set the concurrency level. The phase was already set prior to
2089     // executing the remark task.
2090     set_concurrency(active_workers);
2091 
2092     // Set the degree of MT processing here.  If the discovery was done MT,
2093     // the number of threads involved during discovery could differ from
2094     // the number of active workers.  This is OK as long as the discovered
2095     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2096     rp->set_active_mt_degree(active_workers);
2097 
2098     // Process the weak references.
2099     const ReferenceProcessorStats& stats =
2100         rp->process_discovered_references(&g1_is_alive,
2101                                           &g1_keep_alive,
2102                                           &g1_drain_mark_stack,
2103                                           executor,
2104                                           g1h->gc_timer_cm());
2105     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2106 
2107     // The do_oop work routines of the keep_alive and drain_marking_stack
2108     // oop closures will set the has_overflown flag if we overflow the
2109     // global marking stack.
2110 
2111     assert(_markStack.overflow() || _markStack.isEmpty(),
2112             "mark stack should be empty (unless it overflowed)");
2113 
2114     if (_markStack.overflow()) {
2115       // This should have been done already when we tried to push an
2116       // entry on to the global mark stack. But let's do it again.
2117       set_has_overflown();
2118     }
2119 
2120     assert(rp->num_q() == active_workers, "why not");
2121 
2122     rp->enqueue_discovered_references(executor);
2123 
2124     rp->verify_no_references_recorded();
2125     assert(!rp->discovery_enabled(), "Post condition");
2126   }
2127 
2128   if (has_overflown()) {
2129     // We can not trust g1_is_alive if the marking stack overflowed
2130     return;
2131   }
2132 
2133   assert(_markStack.isEmpty(), "Marking should have completed");
2134 
2135   // Unload Klasses, String, Symbols, Code Cache, etc.
2136   if (ClassUnloadingWithConcurrentMark) {
2137     bool purged_classes;
2138 
2139     {
2140       GCTraceTime(Debug, gc, phases) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2141       purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2142     }
2143 
2144     {
2145       GCTraceTime(Debug, gc, phases) trace("Parallel Unloading", g1h->gc_timer_cm());
2146       weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2147     }
2148   }
2149 
2150   if (G1StringDedup::is_enabled()) {
2151     GCTraceTime(Debug, gc, phases) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2152     G1StringDedup::unlink(&g1_is_alive);
2153   }
2154 }
2155 
2156 void G1ConcurrentMark::swapMarkBitMaps() {
2157   G1CMBitMapRO* temp = _prevMarkBitMap;
2158   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
2159   _nextMarkBitMap    = (G1CMBitMap*)  temp;
2160 }
2161 
2162 // Closure for marking entries in SATB buffers.
2163 class G1CMSATBBufferClosure : public SATBBufferClosure {
2164 private:
2165   G1CMTask* _task;
2166   G1CollectedHeap* _g1h;
2167 
2168   // This is very similar to G1CMTask::deal_with_reference, but with
2169   // more relaxed requirements for the argument, so this must be more
2170   // circumspect about treating the argument as an object.
2171   void do_entry(void* entry) const {
2172     _task->increment_refs_reached();
2173     HeapRegion* hr = _g1h->heap_region_containing(entry);
2174     if (entry < hr->next_top_at_mark_start()) {
2175       // Until we get here, we don't know whether entry refers to a valid
2176       // object; it could instead have been a stale reference.
2177       oop obj = static_cast<oop>(entry);
2178       assert(obj->is_oop(true /* ignore mark word */),
2179              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2180       _task->make_reference_grey(obj, hr);
2181     }
2182   }
2183 
2184 public:
2185   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
2186     : _task(task), _g1h(g1h) { }
2187 
2188   virtual void do_buffer(void** buffer, size_t size) {
2189     for (size_t i = 0; i < size; ++i) {
2190       do_entry(buffer[i]);
2191     }
2192   }
2193 };
2194 
2195 class G1RemarkThreadsClosure : public ThreadClosure {
2196   G1CMSATBBufferClosure _cm_satb_cl;
2197   G1CMOopClosure _cm_cl;
2198   MarkingCodeBlobClosure _code_cl;
2199   int _thread_parity;
2200 
2201  public:
2202   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
2203     _cm_satb_cl(task, g1h),
2204     _cm_cl(g1h, g1h->concurrent_mark(), task),
2205     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2206     _thread_parity(Threads::thread_claim_parity()) {}
2207 
2208   void do_thread(Thread* thread) {
2209     if (thread->is_Java_thread()) {
2210       if (thread->claim_oops_do(true, _thread_parity)) {
2211         JavaThread* jt = (JavaThread*)thread;
2212 
2213         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2214         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2215         // * Alive if on the stack of an executing method
2216         // * Weakly reachable otherwise
2217         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2218         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2219         jt->nmethods_do(&_code_cl);
2220 
2221         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2222       }
2223     } else if (thread->is_VM_thread()) {
2224       if (thread->claim_oops_do(true, _thread_parity)) {
2225         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2226       }
2227     }
2228   }
2229 };
2230 
2231 class G1CMRemarkTask: public AbstractGangTask {
2232 private:
2233   G1ConcurrentMark* _cm;
2234 public:
2235   void work(uint worker_id) {
2236     // Since all available tasks are actually started, we should
2237     // only proceed if we're supposed to be active.
2238     if (worker_id < _cm->active_tasks()) {
2239       G1CMTask* task = _cm->task(worker_id);
2240       task->record_start_time();
2241       {
2242         ResourceMark rm;
2243         HandleMark hm;
2244 
2245         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2246         Threads::threads_do(&threads_f);
2247       }
2248 
2249       do {
2250         task->do_marking_step(1000000000.0 /* something very large */,
2251                               true         /* do_termination       */,
2252                               false        /* is_serial            */);
2253       } while (task->has_aborted() && !_cm->has_overflown());
2254       // If we overflow, then we do not want to restart. We instead
2255       // want to abort remark and do concurrent marking again.
2256       task->record_end_time();
2257     }
2258   }
2259 
2260   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
2261     AbstractGangTask("Par Remark"), _cm(cm) {
2262     _cm->terminator()->reset_for_reuse(active_workers);
2263   }
2264 };
2265 
2266 void G1ConcurrentMark::checkpointRootsFinalWork() {
2267   ResourceMark rm;
2268   HandleMark   hm;
2269   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2270 
2271   GCTraceTime(Debug, gc, phases) trace("Finalize Marking", g1h->gc_timer_cm());
2272 
2273   g1h->ensure_parsability(false);
2274 
2275   // this is remark, so we'll use up all active threads
2276   uint active_workers = g1h->workers()->active_workers();
2277   set_concurrency_and_phase(active_workers, false /* concurrent */);
2278   // Leave _parallel_marking_threads at it's
2279   // value originally calculated in the G1ConcurrentMark
2280   // constructor and pass values of the active workers
2281   // through the gang in the task.
2282 
2283   {
2284     StrongRootsScope srs(active_workers);
2285 
2286     G1CMRemarkTask remarkTask(this, active_workers);
2287     // We will start all available threads, even if we decide that the
2288     // active_workers will be fewer. The extra ones will just bail out
2289     // immediately.
2290     g1h->workers()->run_task(&remarkTask);
2291   }
2292 
2293   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2294   guarantee(has_overflown() ||
2295             satb_mq_set.completed_buffers_num() == 0,
2296             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
2297             BOOL_TO_STR(has_overflown()),
2298             satb_mq_set.completed_buffers_num());
2299 
2300   print_stats();
2301 }
2302 
2303 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2304   // Note we are overriding the read-only view of the prev map here, via
2305   // the cast.
2306   ((G1CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2307 }
2308 
2309 HeapRegion*
2310 G1ConcurrentMark::claim_region(uint worker_id) {
2311   // "checkpoint" the finger
2312   HeapWord* finger = _finger;
2313 
2314   // _heap_end will not change underneath our feet; it only changes at
2315   // yield points.
2316   while (finger < _heap_end) {
2317     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2318 
2319     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2320 
2321     // Above heap_region_containing may return NULL as we always scan claim
2322     // until the end of the heap. In this case, just jump to the next region.
2323     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2324 
2325     // Is the gap between reading the finger and doing the CAS too long?
2326     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2327     if (res == finger && curr_region != NULL) {
2328       // we succeeded
2329       HeapWord*   bottom        = curr_region->bottom();
2330       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2331 
2332       // notice that _finger == end cannot be guaranteed here since,
2333       // someone else might have moved the finger even further
2334       assert(_finger >= end, "the finger should have moved forward");
2335 
2336       if (limit > bottom) {
2337         return curr_region;
2338       } else {
2339         assert(limit == bottom,
2340                "the region limit should be at bottom");
2341         // we return NULL and the caller should try calling
2342         // claim_region() again.
2343         return NULL;
2344       }
2345     } else {
2346       assert(_finger > finger, "the finger should have moved forward");
2347       // read it again
2348       finger = _finger;
2349     }
2350   }
2351 
2352   return NULL;
2353 }
2354 
2355 #ifndef PRODUCT
2356 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2357 private:
2358   G1CollectedHeap* _g1h;
2359   const char* _phase;
2360   int _info;
2361 
2362 public:
2363   VerifyNoCSetOops(const char* phase, int info = -1) :
2364     _g1h(G1CollectedHeap::heap()),
2365     _phase(phase),
2366     _info(info)
2367   { }
2368 
2369   void operator()(oop obj) const {
2370     guarantee(obj->is_oop(),
2371               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2372               p2i(obj), _phase, _info);
2373     guarantee(!_g1h->obj_in_cs(obj),
2374               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2375               p2i(obj), _phase, _info);
2376   }
2377 };
2378 
2379 void G1ConcurrentMark::verify_no_cset_oops() {
2380   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2381   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2382     return;
2383   }
2384 
2385   // Verify entries on the global mark stack
2386   _markStack.iterate(VerifyNoCSetOops("Stack"));
2387 
2388   // Verify entries on the task queues
2389   for (uint i = 0; i < _max_worker_id; ++i) {
2390     G1CMTaskQueue* queue = _task_queues->queue(i);
2391     queue->iterate(VerifyNoCSetOops("Queue", i));
2392   }
2393 
2394   // Verify the global finger
2395   HeapWord* global_finger = finger();
2396   if (global_finger != NULL && global_finger < _heap_end) {
2397     // Since we always iterate over all regions, we might get a NULL HeapRegion
2398     // here.
2399     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2400     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2401               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2402               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2403   }
2404 
2405   // Verify the task fingers
2406   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2407   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2408     G1CMTask* task = _tasks[i];
2409     HeapWord* task_finger = task->finger();
2410     if (task_finger != NULL && task_finger < _heap_end) {
2411       // See above note on the global finger verification.
2412       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2413       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2414                 !task_hr->in_collection_set(),
2415                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2416                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2417     }
2418   }
2419 }
2420 #endif // PRODUCT
2421 
2422 // Aggregate the counting data that was constructed concurrently
2423 // with marking.
2424 class AggregateCountDataHRClosure: public HeapRegionClosure {
2425   G1CollectedHeap* _g1h;
2426   G1ConcurrentMark* _cm;
2427   CardTableModRefBS* _ct_bs;
2428   BitMap* _cm_card_bm;
2429   uint _max_worker_id;
2430 
2431  public:
2432   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2433                               BitMap* cm_card_bm,
2434                               uint max_worker_id) :
2435     _g1h(g1h), _cm(g1h->concurrent_mark()),
2436     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2437     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2438 
2439   bool doHeapRegion(HeapRegion* hr) {
2440     HeapWord* start = hr->bottom();
2441     HeapWord* limit = hr->next_top_at_mark_start();
2442     HeapWord* end = hr->end();
2443 
2444     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2445            "Preconditions not met - "
2446            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2447            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2448            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2449 
2450     assert(hr->next_marked_bytes() == 0, "Precondition");
2451 
2452     if (start == limit) {
2453       // NTAMS of this region has not been set so nothing to do.
2454       return false;
2455     }
2456 
2457     // 'start' should be in the heap.
2458     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2459     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2460     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2461 
2462     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2463     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2464     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2465 
2466     // If ntams is not card aligned then we bump card bitmap index
2467     // for limit so that we get the all the cards spanned by
2468     // the object ending at ntams.
2469     // Note: if this is the last region in the heap then ntams
2470     // could be actually just beyond the end of the the heap;
2471     // limit_idx will then  correspond to a (non-existent) card
2472     // that is also outside the heap.
2473     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2474       limit_idx += 1;
2475     }
2476 
2477     assert(limit_idx <= end_idx, "or else use atomics");
2478 
2479     // Aggregate the "stripe" in the count data associated with hr.
2480     uint hrm_index = hr->hrm_index();
2481     size_t marked_bytes = 0;
2482 
2483     for (uint i = 0; i < _max_worker_id; i += 1) {
2484       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2485       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2486 
2487       // Fetch the marked_bytes in this region for task i and
2488       // add it to the running total for this region.
2489       marked_bytes += marked_bytes_array[hrm_index];
2490 
2491       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2492       // into the global card bitmap.
2493       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2494 
2495       while (scan_idx < limit_idx) {
2496         assert(task_card_bm->at(scan_idx) == true, "should be");
2497         _cm_card_bm->set_bit(scan_idx);
2498         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2499 
2500         // BitMap::get_next_one_offset() can handle the case when
2501         // its left_offset parameter is greater than its right_offset
2502         // parameter. It does, however, have an early exit if
2503         // left_offset == right_offset. So let's limit the value
2504         // passed in for left offset here.
2505         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2506         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2507       }
2508     }
2509 
2510     // Update the marked bytes for this region.
2511     hr->add_to_marked_bytes(marked_bytes);
2512 
2513     // Next heap region
2514     return false;
2515   }
2516 };
2517 
2518 class G1AggregateCountDataTask: public AbstractGangTask {
2519 protected:
2520   G1CollectedHeap* _g1h;
2521   G1ConcurrentMark* _cm;
2522   BitMap* _cm_card_bm;
2523   uint _max_worker_id;
2524   uint _active_workers;
2525   HeapRegionClaimer _hrclaimer;
2526 
2527 public:
2528   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2529                            G1ConcurrentMark* cm,
2530                            BitMap* cm_card_bm,
2531                            uint max_worker_id,
2532                            uint n_workers) :
2533       AbstractGangTask("Count Aggregation"),
2534       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2535       _max_worker_id(max_worker_id),
2536       _active_workers(n_workers),
2537       _hrclaimer(_active_workers) {
2538   }
2539 
2540   void work(uint worker_id) {
2541     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2542 
2543     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2544   }
2545 };
2546 
2547 
2548 void G1ConcurrentMark::aggregate_count_data() {
2549   uint n_workers = _g1h->workers()->active_workers();
2550 
2551   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2552                                            _max_worker_id, n_workers);
2553 
2554   _g1h->workers()->run_task(&g1_par_agg_task);
2555 }
2556 
2557 // Clear the per-worker arrays used to store the per-region counting data
2558 void G1ConcurrentMark::clear_all_count_data() {
2559   // Clear the global card bitmap - it will be filled during
2560   // liveness count aggregation (during remark) and the
2561   // final counting task.
2562   _card_bm.clear();
2563 
2564   // Clear the global region bitmap - it will be filled as part
2565   // of the final counting task.
2566   _region_bm.clear();
2567 
2568   uint max_regions = _g1h->max_regions();
2569   assert(_max_worker_id > 0, "uninitialized");
2570 
2571   for (uint i = 0; i < _max_worker_id; i += 1) {
2572     BitMap* task_card_bm = count_card_bitmap_for(i);
2573     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2574 
2575     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2576     assert(marked_bytes_array != NULL, "uninitialized");
2577 
2578     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2579     task_card_bm->clear();
2580   }
2581 }
2582 
2583 void G1ConcurrentMark::print_stats() {
2584   if (!log_is_enabled(Debug, gc, stats)) {
2585     return;
2586   }
2587   log_debug(gc, stats)("---------------------------------------------------------------------");
2588   for (size_t i = 0; i < _active_tasks; ++i) {
2589     _tasks[i]->print_stats();
2590     log_debug(gc, stats)("---------------------------------------------------------------------");
2591   }
2592 }
2593 
2594 // abandon current marking iteration due to a Full GC
2595 void G1ConcurrentMark::abort() {
2596   if (!cmThread()->during_cycle() || _has_aborted) {
2597     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2598     return;
2599   }
2600 
2601   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2602   // concurrent bitmap clearing.
2603   _nextMarkBitMap->clearAll();
2604 
2605   // Note we cannot clear the previous marking bitmap here
2606   // since VerifyDuringGC verifies the objects marked during
2607   // a full GC against the previous bitmap.
2608 
2609   // Clear the liveness counting data
2610   clear_all_count_data();
2611   // Empty mark stack
2612   reset_marking_state();
2613   for (uint i = 0; i < _max_worker_id; ++i) {
2614     _tasks[i]->clear_region_fields();
2615   }
2616   _first_overflow_barrier_sync.abort();
2617   _second_overflow_barrier_sync.abort();
2618   _has_aborted = true;
2619 
2620   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2621   satb_mq_set.abandon_partial_marking();
2622   // This can be called either during or outside marking, we'll read
2623   // the expected_active value from the SATB queue set.
2624   satb_mq_set.set_active_all_threads(
2625                                  false, /* new active value */
2626                                  satb_mq_set.is_active() /* expected_active */);
2627 
2628   _g1h->trace_heap_after_concurrent_cycle();
2629 
2630   _g1h->register_concurrent_cycle_end();
2631 }
2632 
2633 static void print_ms_time_info(const char* prefix, const char* name,
2634                                NumberSeq& ns) {
2635   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2636                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2637   if (ns.num() > 0) {
2638     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2639                            prefix, ns.sd(), ns.maximum());
2640   }
2641 }
2642 
2643 void G1ConcurrentMark::print_summary_info() {
2644   LogHandle(gc, marking) log;
2645   if (!log.is_trace()) {
2646     return;
2647   }
2648 
2649   log.trace(" Concurrent marking:");
2650   print_ms_time_info("  ", "init marks", _init_times);
2651   print_ms_time_info("  ", "remarks", _remark_times);
2652   {
2653     print_ms_time_info("     ", "final marks", _remark_mark_times);
2654     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2655 
2656   }
2657   print_ms_time_info("  ", "cleanups", _cleanup_times);
2658   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2659             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2660   if (G1ScrubRemSets) {
2661     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2662               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2663   }
2664   log.trace("  Total stop_world time = %8.2f s.",
2665             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2666   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2667             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2668 }
2669 
2670 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2671   _parallel_workers->print_worker_threads_on(st);
2672 }
2673 
2674 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2675   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2676       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2677   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2678   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2679 }
2680 
2681 // We take a break if someone is trying to stop the world.
2682 bool G1ConcurrentMark::do_yield_check(uint worker_id) {
2683   if (SuspendibleThreadSet::should_yield()) {
2684     SuspendibleThreadSet::yield();
2685     return true;
2686   } else {
2687     return false;
2688   }
2689 }
2690 
2691 // Closure for iteration over bitmaps
2692 class G1CMBitMapClosure : public BitMapClosure {
2693 private:
2694   // the bitmap that is being iterated over
2695   G1CMBitMap*                 _nextMarkBitMap;
2696   G1ConcurrentMark*           _cm;
2697   G1CMTask*                   _task;
2698 
2699 public:
2700   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2701     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2702 
2703   bool do_bit(size_t offset) {
2704     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2705     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2706     assert( addr < _cm->finger(), "invariant");
2707     assert(addr >= _task->finger(), "invariant");
2708 
2709     // We move that task's local finger along.
2710     _task->move_finger_to(addr);
2711 
2712     _task->scan_object(oop(addr));
2713     // we only partially drain the local queue and global stack
2714     _task->drain_local_queue(true);
2715     _task->drain_global_stack(true);
2716 
2717     // if the has_aborted flag has been raised, we need to bail out of
2718     // the iteration
2719     return !_task->has_aborted();
2720   }
2721 };
2722 
2723 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2724   ReferenceProcessor* result = g1h->ref_processor_cm();
2725   assert(result != NULL, "CM reference processor should not be NULL");
2726   return result;
2727 }
2728 
2729 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2730                                G1ConcurrentMark* cm,
2731                                G1CMTask* task)
2732   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2733     _g1h(g1h), _cm(cm), _task(task)
2734 { }
2735 
2736 void G1CMTask::setup_for_region(HeapRegion* hr) {
2737   assert(hr != NULL,
2738         "claim_region() should have filtered out NULL regions");
2739   _curr_region  = hr;
2740   _finger       = hr->bottom();
2741   update_region_limit();
2742 }
2743 
2744 void G1CMTask::update_region_limit() {
2745   HeapRegion* hr            = _curr_region;
2746   HeapWord* bottom          = hr->bottom();
2747   HeapWord* limit           = hr->next_top_at_mark_start();
2748 
2749   if (limit == bottom) {
2750     // The region was collected underneath our feet.
2751     // We set the finger to bottom to ensure that the bitmap
2752     // iteration that will follow this will not do anything.
2753     // (this is not a condition that holds when we set the region up,
2754     // as the region is not supposed to be empty in the first place)
2755     _finger = bottom;
2756   } else if (limit >= _region_limit) {
2757     assert(limit >= _finger, "peace of mind");
2758   } else {
2759     assert(limit < _region_limit, "only way to get here");
2760     // This can happen under some pretty unusual circumstances.  An
2761     // evacuation pause empties the region underneath our feet (NTAMS
2762     // at bottom). We then do some allocation in the region (NTAMS
2763     // stays at bottom), followed by the region being used as a GC
2764     // alloc region (NTAMS will move to top() and the objects
2765     // originally below it will be grayed). All objects now marked in
2766     // the region are explicitly grayed, if below the global finger,
2767     // and we do not need in fact to scan anything else. So, we simply
2768     // set _finger to be limit to ensure that the bitmap iteration
2769     // doesn't do anything.
2770     _finger = limit;
2771   }
2772 
2773   _region_limit = limit;
2774 }
2775 
2776 void G1CMTask::giveup_current_region() {
2777   assert(_curr_region != NULL, "invariant");
2778   clear_region_fields();
2779 }
2780 
2781 void G1CMTask::clear_region_fields() {
2782   // Values for these three fields that indicate that we're not
2783   // holding on to a region.
2784   _curr_region   = NULL;
2785   _finger        = NULL;
2786   _region_limit  = NULL;
2787 }
2788 
2789 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2790   if (cm_oop_closure == NULL) {
2791     assert(_cm_oop_closure != NULL, "invariant");
2792   } else {
2793     assert(_cm_oop_closure == NULL, "invariant");
2794   }
2795   _cm_oop_closure = cm_oop_closure;
2796 }
2797 
2798 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2799   guarantee(nextMarkBitMap != NULL, "invariant");
2800   _nextMarkBitMap                = nextMarkBitMap;
2801   clear_region_fields();
2802 
2803   _calls                         = 0;
2804   _elapsed_time_ms               = 0.0;
2805   _termination_time_ms           = 0.0;
2806   _termination_start_time_ms     = 0.0;
2807 }
2808 
2809 bool G1CMTask::should_exit_termination() {
2810   regular_clock_call();
2811   // This is called when we are in the termination protocol. We should
2812   // quit if, for some reason, this task wants to abort or the global
2813   // stack is not empty (this means that we can get work from it).
2814   return !_cm->mark_stack_empty() || has_aborted();
2815 }
2816 
2817 void G1CMTask::reached_limit() {
2818   assert(_words_scanned >= _words_scanned_limit ||
2819          _refs_reached >= _refs_reached_limit ,
2820          "shouldn't have been called otherwise");
2821   regular_clock_call();
2822 }
2823 
2824 void G1CMTask::regular_clock_call() {
2825   if (has_aborted()) return;
2826 
2827   // First, we need to recalculate the words scanned and refs reached
2828   // limits for the next clock call.
2829   recalculate_limits();
2830 
2831   // During the regular clock call we do the following
2832 
2833   // (1) If an overflow has been flagged, then we abort.
2834   if (_cm->has_overflown()) {
2835     set_has_aborted();
2836     return;
2837   }
2838 
2839   // If we are not concurrent (i.e. we're doing remark) we don't need
2840   // to check anything else. The other steps are only needed during
2841   // the concurrent marking phase.
2842   if (!concurrent()) return;
2843 
2844   // (2) If marking has been aborted for Full GC, then we also abort.
2845   if (_cm->has_aborted()) {
2846     set_has_aborted();
2847     return;
2848   }
2849 
2850   double curr_time_ms = os::elapsedVTime() * 1000.0;
2851 
2852   // (4) We check whether we should yield. If we have to, then we abort.
2853   if (SuspendibleThreadSet::should_yield()) {
2854     // We should yield. To do this we abort the task. The caller is
2855     // responsible for yielding.
2856     set_has_aborted();
2857     return;
2858   }
2859 
2860   // (5) We check whether we've reached our time quota. If we have,
2861   // then we abort.
2862   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2863   if (elapsed_time_ms > _time_target_ms) {
2864     set_has_aborted();
2865     _has_timed_out = true;
2866     return;
2867   }
2868 
2869   // (6) Finally, we check whether there are enough completed STAB
2870   // buffers available for processing. If there are, we abort.
2871   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2872   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2873     // we do need to process SATB buffers, we'll abort and restart
2874     // the marking task to do so
2875     set_has_aborted();
2876     return;
2877   }
2878 }
2879 
2880 void G1CMTask::recalculate_limits() {
2881   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2882   _words_scanned_limit      = _real_words_scanned_limit;
2883 
2884   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2885   _refs_reached_limit       = _real_refs_reached_limit;
2886 }
2887 
2888 void G1CMTask::decrease_limits() {
2889   // This is called when we believe that we're going to do an infrequent
2890   // operation which will increase the per byte scanned cost (i.e. move
2891   // entries to/from the global stack). It basically tries to decrease the
2892   // scanning limit so that the clock is called earlier.
2893 
2894   _words_scanned_limit = _real_words_scanned_limit -
2895     3 * words_scanned_period / 4;
2896   _refs_reached_limit  = _real_refs_reached_limit -
2897     3 * refs_reached_period / 4;
2898 }
2899 
2900 void G1CMTask::move_entries_to_global_stack() {
2901   // local array where we'll store the entries that will be popped
2902   // from the local queue
2903   oop buffer[global_stack_transfer_size];
2904 
2905   int n = 0;
2906   oop obj;
2907   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2908     buffer[n] = obj;
2909     ++n;
2910   }
2911 
2912   if (n > 0) {
2913     // we popped at least one entry from the local queue
2914 
2915     if (!_cm->mark_stack_push(buffer, n)) {
2916       set_has_aborted();
2917     }
2918   }
2919 
2920   // this operation was quite expensive, so decrease the limits
2921   decrease_limits();
2922 }
2923 
2924 void G1CMTask::get_entries_from_global_stack() {
2925   // local array where we'll store the entries that will be popped
2926   // from the global stack.
2927   oop buffer[global_stack_transfer_size];
2928   int n;
2929   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2930   assert(n <= global_stack_transfer_size,
2931          "we should not pop more than the given limit");
2932   if (n > 0) {
2933     // yes, we did actually pop at least one entry
2934     for (int i = 0; i < n; ++i) {
2935       bool success = _task_queue->push(buffer[i]);
2936       // We only call this when the local queue is empty or under a
2937       // given target limit. So, we do not expect this push to fail.
2938       assert(success, "invariant");
2939     }
2940   }
2941 
2942   // this operation was quite expensive, so decrease the limits
2943   decrease_limits();
2944 }
2945 
2946 void G1CMTask::drain_local_queue(bool partially) {
2947   if (has_aborted()) return;
2948 
2949   // Decide what the target size is, depending whether we're going to
2950   // drain it partially (so that other tasks can steal if they run out
2951   // of things to do) or totally (at the very end).
2952   size_t target_size;
2953   if (partially) {
2954     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2955   } else {
2956     target_size = 0;
2957   }
2958 
2959   if (_task_queue->size() > target_size) {
2960     oop obj;
2961     bool ret = _task_queue->pop_local(obj);
2962     while (ret) {
2963       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2964       assert(!_g1h->is_on_master_free_list(
2965                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2966 
2967       scan_object(obj);
2968 
2969       if (_task_queue->size() <= target_size || has_aborted()) {
2970         ret = false;
2971       } else {
2972         ret = _task_queue->pop_local(obj);
2973       }
2974     }
2975   }
2976 }
2977 
2978 void G1CMTask::drain_global_stack(bool partially) {
2979   if (has_aborted()) return;
2980 
2981   // We have a policy to drain the local queue before we attempt to
2982   // drain the global stack.
2983   assert(partially || _task_queue->size() == 0, "invariant");
2984 
2985   // Decide what the target size is, depending whether we're going to
2986   // drain it partially (so that other tasks can steal if they run out
2987   // of things to do) or totally (at the very end).  Notice that,
2988   // because we move entries from the global stack in chunks or
2989   // because another task might be doing the same, we might in fact
2990   // drop below the target. But, this is not a problem.
2991   size_t target_size;
2992   if (partially) {
2993     target_size = _cm->partial_mark_stack_size_target();
2994   } else {
2995     target_size = 0;
2996   }
2997 
2998   if (_cm->mark_stack_size() > target_size) {
2999     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3000       get_entries_from_global_stack();
3001       drain_local_queue(partially);
3002     }
3003   }
3004 }
3005 
3006 // SATB Queue has several assumptions on whether to call the par or
3007 // non-par versions of the methods. this is why some of the code is
3008 // replicated. We should really get rid of the single-threaded version
3009 // of the code to simplify things.
3010 void G1CMTask::drain_satb_buffers() {
3011   if (has_aborted()) return;
3012 
3013   // We set this so that the regular clock knows that we're in the
3014   // middle of draining buffers and doesn't set the abort flag when it
3015   // notices that SATB buffers are available for draining. It'd be
3016   // very counter productive if it did that. :-)
3017   _draining_satb_buffers = true;
3018 
3019   G1CMSATBBufferClosure satb_cl(this, _g1h);
3020   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3021 
3022   // This keeps claiming and applying the closure to completed buffers
3023   // until we run out of buffers or we need to abort.
3024   while (!has_aborted() &&
3025          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3026     regular_clock_call();
3027   }
3028 
3029   _draining_satb_buffers = false;
3030 
3031   assert(has_aborted() ||
3032          concurrent() ||
3033          satb_mq_set.completed_buffers_num() == 0, "invariant");
3034 
3035   // again, this was a potentially expensive operation, decrease the
3036   // limits to get the regular clock call early
3037   decrease_limits();
3038 }
3039 
3040 void G1CMTask::print_stats() {
3041   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3042                        _worker_id, _calls);
3043   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3044                        _elapsed_time_ms, _termination_time_ms);
3045   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3046                        _step_times_ms.num(), _step_times_ms.avg(),
3047                        _step_times_ms.sd());
3048   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3049                        _step_times_ms.maximum(), _step_times_ms.sum());
3050 }
3051 
3052 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3053   return _task_queues->steal(worker_id, hash_seed, obj);
3054 }
3055 
3056 /*****************************************************************************
3057 
3058     The do_marking_step(time_target_ms, ...) method is the building
3059     block of the parallel marking framework. It can be called in parallel
3060     with other invocations of do_marking_step() on different tasks
3061     (but only one per task, obviously) and concurrently with the
3062     mutator threads, or during remark, hence it eliminates the need
3063     for two versions of the code. When called during remark, it will
3064     pick up from where the task left off during the concurrent marking
3065     phase. Interestingly, tasks are also claimable during evacuation
3066     pauses too, since do_marking_step() ensures that it aborts before
3067     it needs to yield.
3068 
3069     The data structures that it uses to do marking work are the
3070     following:
3071 
3072       (1) Marking Bitmap. If there are gray objects that appear only
3073       on the bitmap (this happens either when dealing with an overflow
3074       or when the initial marking phase has simply marked the roots
3075       and didn't push them on the stack), then tasks claim heap
3076       regions whose bitmap they then scan to find gray objects. A
3077       global finger indicates where the end of the last claimed region
3078       is. A local finger indicates how far into the region a task has
3079       scanned. The two fingers are used to determine how to gray an
3080       object (i.e. whether simply marking it is OK, as it will be
3081       visited by a task in the future, or whether it needs to be also
3082       pushed on a stack).
3083 
3084       (2) Local Queue. The local queue of the task which is accessed
3085       reasonably efficiently by the task. Other tasks can steal from
3086       it when they run out of work. Throughout the marking phase, a
3087       task attempts to keep its local queue short but not totally
3088       empty, so that entries are available for stealing by other
3089       tasks. Only when there is no more work, a task will totally
3090       drain its local queue.
3091 
3092       (3) Global Mark Stack. This handles local queue overflow. During
3093       marking only sets of entries are moved between it and the local
3094       queues, as access to it requires a mutex and more fine-grain
3095       interaction with it which might cause contention. If it
3096       overflows, then the marking phase should restart and iterate
3097       over the bitmap to identify gray objects. Throughout the marking
3098       phase, tasks attempt to keep the global mark stack at a small
3099       length but not totally empty, so that entries are available for
3100       popping by other tasks. Only when there is no more work, tasks
3101       will totally drain the global mark stack.
3102 
3103       (4) SATB Buffer Queue. This is where completed SATB buffers are
3104       made available. Buffers are regularly removed from this queue
3105       and scanned for roots, so that the queue doesn't get too
3106       long. During remark, all completed buffers are processed, as
3107       well as the filled in parts of any uncompleted buffers.
3108 
3109     The do_marking_step() method tries to abort when the time target
3110     has been reached. There are a few other cases when the
3111     do_marking_step() method also aborts:
3112 
3113       (1) When the marking phase has been aborted (after a Full GC).
3114 
3115       (2) When a global overflow (on the global stack) has been
3116       triggered. Before the task aborts, it will actually sync up with
3117       the other tasks to ensure that all the marking data structures
3118       (local queues, stacks, fingers etc.)  are re-initialized so that
3119       when do_marking_step() completes, the marking phase can
3120       immediately restart.
3121 
3122       (3) When enough completed SATB buffers are available. The
3123       do_marking_step() method only tries to drain SATB buffers right
3124       at the beginning. So, if enough buffers are available, the
3125       marking step aborts and the SATB buffers are processed at
3126       the beginning of the next invocation.
3127 
3128       (4) To yield. when we have to yield then we abort and yield
3129       right at the end of do_marking_step(). This saves us from a lot
3130       of hassle as, by yielding we might allow a Full GC. If this
3131       happens then objects will be compacted underneath our feet, the
3132       heap might shrink, etc. We save checking for this by just
3133       aborting and doing the yield right at the end.
3134 
3135     From the above it follows that the do_marking_step() method should
3136     be called in a loop (or, otherwise, regularly) until it completes.
3137 
3138     If a marking step completes without its has_aborted() flag being
3139     true, it means it has completed the current marking phase (and
3140     also all other marking tasks have done so and have all synced up).
3141 
3142     A method called regular_clock_call() is invoked "regularly" (in
3143     sub ms intervals) throughout marking. It is this clock method that
3144     checks all the abort conditions which were mentioned above and
3145     decides when the task should abort. A work-based scheme is used to
3146     trigger this clock method: when the number of object words the
3147     marking phase has scanned or the number of references the marking
3148     phase has visited reach a given limit. Additional invocations to
3149     the method clock have been planted in a few other strategic places
3150     too. The initial reason for the clock method was to avoid calling
3151     vtime too regularly, as it is quite expensive. So, once it was in
3152     place, it was natural to piggy-back all the other conditions on it
3153     too and not constantly check them throughout the code.
3154 
3155     If do_termination is true then do_marking_step will enter its
3156     termination protocol.
3157 
3158     The value of is_serial must be true when do_marking_step is being
3159     called serially (i.e. by the VMThread) and do_marking_step should
3160     skip any synchronization in the termination and overflow code.
3161     Examples include the serial remark code and the serial reference
3162     processing closures.
3163 
3164     The value of is_serial must be false when do_marking_step is
3165     being called by any of the worker threads in a work gang.
3166     Examples include the concurrent marking code (CMMarkingTask),
3167     the MT remark code, and the MT reference processing closures.
3168 
3169  *****************************************************************************/
3170 
3171 void G1CMTask::do_marking_step(double time_target_ms,
3172                                bool do_termination,
3173                                bool is_serial) {
3174   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3175   assert(concurrent() == _cm->concurrent(), "they should be the same");
3176 
3177   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3178   assert(_task_queues != NULL, "invariant");
3179   assert(_task_queue != NULL, "invariant");
3180   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3181 
3182   assert(!_claimed,
3183          "only one thread should claim this task at any one time");
3184 
3185   // OK, this doesn't safeguard again all possible scenarios, as it is
3186   // possible for two threads to set the _claimed flag at the same
3187   // time. But it is only for debugging purposes anyway and it will
3188   // catch most problems.
3189   _claimed = true;
3190 
3191   _start_time_ms = os::elapsedVTime() * 1000.0;
3192 
3193   // If do_stealing is true then do_marking_step will attempt to
3194   // steal work from the other G1CMTasks. It only makes sense to
3195   // enable stealing when the termination protocol is enabled
3196   // and do_marking_step() is not being called serially.
3197   bool do_stealing = do_termination && !is_serial;
3198 
3199   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3200   _time_target_ms = time_target_ms - diff_prediction_ms;
3201 
3202   // set up the variables that are used in the work-based scheme to
3203   // call the regular clock method
3204   _words_scanned = 0;
3205   _refs_reached  = 0;
3206   recalculate_limits();
3207 
3208   // clear all flags
3209   clear_has_aborted();
3210   _has_timed_out = false;
3211   _draining_satb_buffers = false;
3212 
3213   ++_calls;
3214 
3215   // Set up the bitmap and oop closures. Anything that uses them is
3216   // eventually called from this method, so it is OK to allocate these
3217   // statically.
3218   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3219   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
3220   set_cm_oop_closure(&cm_oop_closure);
3221 
3222   if (_cm->has_overflown()) {
3223     // This can happen if the mark stack overflows during a GC pause
3224     // and this task, after a yield point, restarts. We have to abort
3225     // as we need to get into the overflow protocol which happens
3226     // right at the end of this task.
3227     set_has_aborted();
3228   }
3229 
3230   // First drain any available SATB buffers. After this, we will not
3231   // look at SATB buffers before the next invocation of this method.
3232   // If enough completed SATB buffers are queued up, the regular clock
3233   // will abort this task so that it restarts.
3234   drain_satb_buffers();
3235   // ...then partially drain the local queue and the global stack
3236   drain_local_queue(true);
3237   drain_global_stack(true);
3238 
3239   do {
3240     if (!has_aborted() && _curr_region != NULL) {
3241       // This means that we're already holding on to a region.
3242       assert(_finger != NULL, "if region is not NULL, then the finger "
3243              "should not be NULL either");
3244 
3245       // We might have restarted this task after an evacuation pause
3246       // which might have evacuated the region we're holding on to
3247       // underneath our feet. Let's read its limit again to make sure
3248       // that we do not iterate over a region of the heap that
3249       // contains garbage (update_region_limit() will also move
3250       // _finger to the start of the region if it is found empty).
3251       update_region_limit();
3252       // We will start from _finger not from the start of the region,
3253       // as we might be restarting this task after aborting half-way
3254       // through scanning this region. In this case, _finger points to
3255       // the address where we last found a marked object. If this is a
3256       // fresh region, _finger points to start().
3257       MemRegion mr = MemRegion(_finger, _region_limit);
3258 
3259       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3260              "humongous regions should go around loop once only");
3261 
3262       // Some special cases:
3263       // If the memory region is empty, we can just give up the region.
3264       // If the current region is humongous then we only need to check
3265       // the bitmap for the bit associated with the start of the object,
3266       // scan the object if it's live, and give up the region.
3267       // Otherwise, let's iterate over the bitmap of the part of the region
3268       // that is left.
3269       // If the iteration is successful, give up the region.
3270       if (mr.is_empty()) {
3271         giveup_current_region();
3272         regular_clock_call();
3273       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3274         if (_nextMarkBitMap->isMarked(mr.start())) {
3275           // The object is marked - apply the closure
3276           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3277           bitmap_closure.do_bit(offset);
3278         }
3279         // Even if this task aborted while scanning the humongous object
3280         // we can (and should) give up the current region.
3281         giveup_current_region();
3282         regular_clock_call();
3283       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3284         giveup_current_region();
3285         regular_clock_call();
3286       } else {
3287         assert(has_aborted(), "currently the only way to do so");
3288         // The only way to abort the bitmap iteration is to return
3289         // false from the do_bit() method. However, inside the
3290         // do_bit() method we move the _finger to point to the
3291         // object currently being looked at. So, if we bail out, we
3292         // have definitely set _finger to something non-null.
3293         assert(_finger != NULL, "invariant");
3294 
3295         // Region iteration was actually aborted. So now _finger
3296         // points to the address of the object we last scanned. If we
3297         // leave it there, when we restart this task, we will rescan
3298         // the object. It is easy to avoid this. We move the finger by
3299         // enough to point to the next possible object header (the
3300         // bitmap knows by how much we need to move it as it knows its
3301         // granularity).
3302         assert(_finger < _region_limit, "invariant");
3303         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3304         // Check if bitmap iteration was aborted while scanning the last object
3305         if (new_finger >= _region_limit) {
3306           giveup_current_region();
3307         } else {
3308           move_finger_to(new_finger);
3309         }
3310       }
3311     }
3312     // At this point we have either completed iterating over the
3313     // region we were holding on to, or we have aborted.
3314 
3315     // We then partially drain the local queue and the global stack.
3316     // (Do we really need this?)
3317     drain_local_queue(true);
3318     drain_global_stack(true);
3319 
3320     // Read the note on the claim_region() method on why it might
3321     // return NULL with potentially more regions available for
3322     // claiming and why we have to check out_of_regions() to determine
3323     // whether we're done or not.
3324     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3325       // We are going to try to claim a new region. We should have
3326       // given up on the previous one.
3327       // Separated the asserts so that we know which one fires.
3328       assert(_curr_region  == NULL, "invariant");
3329       assert(_finger       == NULL, "invariant");
3330       assert(_region_limit == NULL, "invariant");
3331       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3332       if (claimed_region != NULL) {
3333         // Yes, we managed to claim one
3334         setup_for_region(claimed_region);
3335         assert(_curr_region == claimed_region, "invariant");
3336       }
3337       // It is important to call the regular clock here. It might take
3338       // a while to claim a region if, for example, we hit a large
3339       // block of empty regions. So we need to call the regular clock
3340       // method once round the loop to make sure it's called
3341       // frequently enough.
3342       regular_clock_call();
3343     }
3344 
3345     if (!has_aborted() && _curr_region == NULL) {
3346       assert(_cm->out_of_regions(),
3347              "at this point we should be out of regions");
3348     }
3349   } while ( _curr_region != NULL && !has_aborted());
3350 
3351   if (!has_aborted()) {
3352     // We cannot check whether the global stack is empty, since other
3353     // tasks might be pushing objects to it concurrently.
3354     assert(_cm->out_of_regions(),
3355            "at this point we should be out of regions");
3356     // Try to reduce the number of available SATB buffers so that
3357     // remark has less work to do.
3358     drain_satb_buffers();
3359   }
3360 
3361   // Since we've done everything else, we can now totally drain the
3362   // local queue and global stack.
3363   drain_local_queue(false);
3364   drain_global_stack(false);
3365 
3366   // Attempt at work stealing from other task's queues.
3367   if (do_stealing && !has_aborted()) {
3368     // We have not aborted. This means that we have finished all that
3369     // we could. Let's try to do some stealing...
3370 
3371     // We cannot check whether the global stack is empty, since other
3372     // tasks might be pushing objects to it concurrently.
3373     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3374            "only way to reach here");
3375     while (!has_aborted()) {
3376       oop obj;
3377       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3378         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3379                "any stolen object should be marked");
3380         scan_object(obj);
3381 
3382         // And since we're towards the end, let's totally drain the
3383         // local queue and global stack.
3384         drain_local_queue(false);
3385         drain_global_stack(false);
3386       } else {
3387         break;
3388       }
3389     }
3390   }
3391 
3392   // We still haven't aborted. Now, let's try to get into the
3393   // termination protocol.
3394   if (do_termination && !has_aborted()) {
3395     // We cannot check whether the global stack is empty, since other
3396     // tasks might be concurrently pushing objects on it.
3397     // Separated the asserts so that we know which one fires.
3398     assert(_cm->out_of_regions(), "only way to reach here");
3399     assert(_task_queue->size() == 0, "only way to reach here");
3400     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3401 
3402     // The G1CMTask class also extends the TerminatorTerminator class,
3403     // hence its should_exit_termination() method will also decide
3404     // whether to exit the termination protocol or not.
3405     bool finished = (is_serial ||
3406                      _cm->terminator()->offer_termination(this));
3407     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3408     _termination_time_ms +=
3409       termination_end_time_ms - _termination_start_time_ms;
3410 
3411     if (finished) {
3412       // We're all done.
3413 
3414       if (_worker_id == 0) {
3415         // let's allow task 0 to do this
3416         if (concurrent()) {
3417           assert(_cm->concurrent_marking_in_progress(), "invariant");
3418           // we need to set this to false before the next
3419           // safepoint. This way we ensure that the marking phase
3420           // doesn't observe any more heap expansions.
3421           _cm->clear_concurrent_marking_in_progress();
3422         }
3423       }
3424 
3425       // We can now guarantee that the global stack is empty, since
3426       // all other tasks have finished. We separated the guarantees so
3427       // that, if a condition is false, we can immediately find out
3428       // which one.
3429       guarantee(_cm->out_of_regions(), "only way to reach here");
3430       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3431       guarantee(_task_queue->size() == 0, "only way to reach here");
3432       guarantee(!_cm->has_overflown(), "only way to reach here");
3433       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3434     } else {
3435       // Apparently there's more work to do. Let's abort this task. It
3436       // will restart it and we can hopefully find more things to do.
3437       set_has_aborted();
3438     }
3439   }
3440 
3441   // Mainly for debugging purposes to make sure that a pointer to the
3442   // closure which was statically allocated in this frame doesn't
3443   // escape it by accident.
3444   set_cm_oop_closure(NULL);
3445   double end_time_ms = os::elapsedVTime() * 1000.0;
3446   double elapsed_time_ms = end_time_ms - _start_time_ms;
3447   // Update the step history.
3448   _step_times_ms.add(elapsed_time_ms);
3449 
3450   if (has_aborted()) {
3451     // The task was aborted for some reason.
3452     if (_has_timed_out) {
3453       double diff_ms = elapsed_time_ms - _time_target_ms;
3454       // Keep statistics of how well we did with respect to hitting
3455       // our target only if we actually timed out (if we aborted for
3456       // other reasons, then the results might get skewed).
3457       _marking_step_diffs_ms.add(diff_ms);
3458     }
3459 
3460     if (_cm->has_overflown()) {
3461       // This is the interesting one. We aborted because a global
3462       // overflow was raised. This means we have to restart the
3463       // marking phase and start iterating over regions. However, in
3464       // order to do this we have to make sure that all tasks stop
3465       // what they are doing and re-initialize in a safe manner. We
3466       // will achieve this with the use of two barrier sync points.
3467 
3468       if (!is_serial) {
3469         // We only need to enter the sync barrier if being called
3470         // from a parallel context
3471         _cm->enter_first_sync_barrier(_worker_id);
3472 
3473         // When we exit this sync barrier we know that all tasks have
3474         // stopped doing marking work. So, it's now safe to
3475         // re-initialize our data structures. At the end of this method,
3476         // task 0 will clear the global data structures.
3477       }
3478 
3479       // We clear the local state of this task...
3480       clear_region_fields();
3481 
3482       if (!is_serial) {
3483         // ...and enter the second barrier.
3484         _cm->enter_second_sync_barrier(_worker_id);
3485       }
3486       // At this point, if we're during the concurrent phase of
3487       // marking, everything has been re-initialized and we're
3488       // ready to restart.
3489     }
3490   }
3491 
3492   _claimed = false;
3493 }
3494 
3495 G1CMTask::G1CMTask(uint worker_id,
3496                    G1ConcurrentMark* cm,
3497                    size_t* marked_bytes,
3498                    BitMap* card_bm,
3499                    G1CMTaskQueue* task_queue,
3500                    G1CMTaskQueueSet* task_queues)
3501   : _g1h(G1CollectedHeap::heap()),
3502     _worker_id(worker_id), _cm(cm),
3503     _claimed(false),
3504     _nextMarkBitMap(NULL), _hash_seed(17),
3505     _task_queue(task_queue),
3506     _task_queues(task_queues),
3507     _cm_oop_closure(NULL),
3508     _marked_bytes_array(marked_bytes),
3509     _card_bm(card_bm) {
3510   guarantee(task_queue != NULL, "invariant");
3511   guarantee(task_queues != NULL, "invariant");
3512 
3513   _marking_step_diffs_ms.add(0.5);
3514 }
3515 
3516 // These are formatting macros that are used below to ensure
3517 // consistent formatting. The *_H_* versions are used to format the
3518 // header for a particular value and they should be kept consistent
3519 // with the corresponding macro. Also note that most of the macros add
3520 // the necessary white space (as a prefix) which makes them a bit
3521 // easier to compose.
3522 
3523 // All the output lines are prefixed with this string to be able to
3524 // identify them easily in a large log file.
3525 #define G1PPRL_LINE_PREFIX            "###"
3526 
3527 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3528 #ifdef _LP64
3529 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3530 #else // _LP64
3531 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3532 #endif // _LP64
3533 
3534 // For per-region info
3535 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3536 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3537 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3538 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3539 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3540 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3541 
3542 // For summary info
3543 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3544 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3545 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3546 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3547 
3548 G1PrintRegionLivenessInfoClosure::
3549 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3550   : _total_used_bytes(0), _total_capacity_bytes(0),
3551     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3552     _hum_used_bytes(0), _hum_capacity_bytes(0),
3553     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3554     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3555   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3556   MemRegion g1_reserved = g1h->g1_reserved();
3557   double now = os::elapsedTime();
3558 
3559   // Print the header of the output.
3560   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3561   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3562                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3563                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3564                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3565                           HeapRegion::GrainBytes);
3566   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3567   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3568                           G1PPRL_TYPE_H_FORMAT
3569                           G1PPRL_ADDR_BASE_H_FORMAT
3570                           G1PPRL_BYTE_H_FORMAT
3571                           G1PPRL_BYTE_H_FORMAT
3572                           G1PPRL_BYTE_H_FORMAT
3573                           G1PPRL_DOUBLE_H_FORMAT
3574                           G1PPRL_BYTE_H_FORMAT
3575                           G1PPRL_BYTE_H_FORMAT,
3576                           "type", "address-range",
3577                           "used", "prev-live", "next-live", "gc-eff",
3578                           "remset", "code-roots");
3579   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3580                           G1PPRL_TYPE_H_FORMAT
3581                           G1PPRL_ADDR_BASE_H_FORMAT
3582                           G1PPRL_BYTE_H_FORMAT
3583                           G1PPRL_BYTE_H_FORMAT
3584                           G1PPRL_BYTE_H_FORMAT
3585                           G1PPRL_DOUBLE_H_FORMAT
3586                           G1PPRL_BYTE_H_FORMAT
3587                           G1PPRL_BYTE_H_FORMAT,
3588                           "", "",
3589                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3590                           "(bytes)", "(bytes)");
3591 }
3592 
3593 // It takes as a parameter a reference to one of the _hum_* fields, it
3594 // deduces the corresponding value for a region in a humongous region
3595 // series (either the region size, or what's left if the _hum_* field
3596 // is < the region size), and updates the _hum_* field accordingly.
3597 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3598   size_t bytes = 0;
3599   // The > 0 check is to deal with the prev and next live bytes which
3600   // could be 0.
3601   if (*hum_bytes > 0) {
3602     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3603     *hum_bytes -= bytes;
3604   }
3605   return bytes;
3606 }
3607 
3608 // It deduces the values for a region in a humongous region series
3609 // from the _hum_* fields and updates those accordingly. It assumes
3610 // that that _hum_* fields have already been set up from the "starts
3611 // humongous" region and we visit the regions in address order.
3612 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3613                                                      size_t* capacity_bytes,
3614                                                      size_t* prev_live_bytes,
3615                                                      size_t* next_live_bytes) {
3616   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3617   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3618   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3619   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3620   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3621 }
3622 
3623 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3624   const char* type       = r->get_type_str();
3625   HeapWord* bottom       = r->bottom();
3626   HeapWord* end          = r->end();
3627   size_t capacity_bytes  = r->capacity();
3628   size_t used_bytes      = r->used();
3629   size_t prev_live_bytes = r->live_bytes();
3630   size_t next_live_bytes = r->next_live_bytes();
3631   double gc_eff          = r->gc_efficiency();
3632   size_t remset_bytes    = r->rem_set()->mem_size();
3633   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3634 
3635   if (r->is_starts_humongous()) {
3636     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3637            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3638            "they should have been zeroed after the last time we used them");
3639     // Set up the _hum_* fields.
3640     _hum_capacity_bytes  = capacity_bytes;
3641     _hum_used_bytes      = used_bytes;
3642     _hum_prev_live_bytes = prev_live_bytes;
3643     _hum_next_live_bytes = next_live_bytes;
3644     get_hum_bytes(&used_bytes, &capacity_bytes,
3645                   &prev_live_bytes, &next_live_bytes);
3646     end = bottom + HeapRegion::GrainWords;
3647   } else if (r->is_continues_humongous()) {
3648     get_hum_bytes(&used_bytes, &capacity_bytes,
3649                   &prev_live_bytes, &next_live_bytes);
3650     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3651   }
3652 
3653   _total_used_bytes      += used_bytes;
3654   _total_capacity_bytes  += capacity_bytes;
3655   _total_prev_live_bytes += prev_live_bytes;
3656   _total_next_live_bytes += next_live_bytes;
3657   _total_remset_bytes    += remset_bytes;
3658   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3659 
3660   // Print a line for this particular region.
3661   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3662                           G1PPRL_TYPE_FORMAT
3663                           G1PPRL_ADDR_BASE_FORMAT
3664                           G1PPRL_BYTE_FORMAT
3665                           G1PPRL_BYTE_FORMAT
3666                           G1PPRL_BYTE_FORMAT
3667                           G1PPRL_DOUBLE_FORMAT
3668                           G1PPRL_BYTE_FORMAT
3669                           G1PPRL_BYTE_FORMAT,
3670                           type, p2i(bottom), p2i(end),
3671                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3672                           remset_bytes, strong_code_roots_bytes);
3673 
3674   return false;
3675 }
3676 
3677 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3678   // add static memory usages to remembered set sizes
3679   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3680   // Print the footer of the output.
3681   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3682   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3683                          " SUMMARY"
3684                          G1PPRL_SUM_MB_FORMAT("capacity")
3685                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3686                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3687                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3688                          G1PPRL_SUM_MB_FORMAT("remset")
3689                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3690                          bytes_to_mb(_total_capacity_bytes),
3691                          bytes_to_mb(_total_used_bytes),
3692                          perc(_total_used_bytes, _total_capacity_bytes),
3693                          bytes_to_mb(_total_prev_live_bytes),
3694                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3695                          bytes_to_mb(_total_next_live_bytes),
3696                          perc(_total_next_live_bytes, _total_capacity_bytes),
3697                          bytes_to_mb(_total_remset_bytes),
3698                          bytes_to_mb(_total_strong_code_roots_bytes));
3699 }