1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorPolicy.hpp"
  32 #include "gc/g1/g1CollectorState.hpp"
  33 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/g1/suspendibleThreadSet.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/taskqueue.inline.hpp"
  49 #include "gc/shared/vmGCOperations.hpp"
  50 #include "logging/log.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                  const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   assert(limit != NULL, "limit must not be NULL");
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 #ifndef PRODUCT
  86 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  87   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  88   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  89          "size inconsistency");
  90   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  91          _bmWordSize  == heap_rs.word_size();
  92 }
  93 #endif
  94 
  95 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  96   _bm.print_on_error(st, prefix);
  97 }
  98 
  99 size_t G1CMBitMap::compute_size(size_t heap_size) {
 100   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 101 }
 102 
 103 size_t G1CMBitMap::mark_distance() {
 104   return MinObjAlignmentInBytes * BitsPerByte;
 105 }
 106 
 107 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 108   _bmStartWord = heap.start();
 109   _bmWordSize = heap.word_size();
 110 
 111   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 112   _bm.set_size(_bmWordSize >> _shifter);
 113 
 114   storage->set_mapping_changed_listener(&_listener);
 115 }
 116 
 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 118   if (zero_filled) {
 119     return;
 120   }
 121   // We need to clear the bitmap on commit, removing any existing information.
 122   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 123   _bm->clearRange(mr);
 124 }
 125 
 126 // Closure used for clearing the given mark bitmap.
 127 class ClearBitmapHRClosure : public AbortableHeapRegionClosure {
 128  private:
 129   G1ConcurrentMark* _cm;
 130   G1CMBitMap* _bitmap;
 131   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 132  public:
 133   ClearBitmapHRClosure(G1ConcurrentMark* cm, G1CMBitMap* bitmap, bool may_yield) : AbortableHeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 134     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 135   }
 136 
 137   virtual bool doHeapRegionAbortable(HeapRegion* r) {
 138     size_t const chunk_size_in_words = M / HeapWordSize;
 139 
 140     HeapWord* cur = r->bottom();
 141     HeapWord* const end = r->end();
 142 
 143     while (cur < end) {
 144       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 145       _bitmap->clearRange(mr);
 146 
 147       cur += chunk_size_in_words;
 148 
 149       // Abort iteration if after yielding the marking has been aborted.
 150       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 151         return true;
 152       }
 153       // Repeat the asserts from before the start of the closure. We will do them
 154       // as asserts here to minimize their overhead on the product. However, we
 155       // will have them as guarantees at the beginning / end of the bitmap
 156       // clearing to get some checking in the product.
 157       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 158       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 159     }
 160 
 161     return false;
 162   }
 163 };
 164 
 165 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 166   ClearBitmapHRClosure* _cl;
 167   HeapRegionClaimer     _hrclaimer;
 168   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 169 
 170 public:
 171   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 172       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 173 
 174   void work(uint worker_id) {
 175     SuspendibleThreadSetJoiner sts_join(_suspendible);
 176     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 177   }
 178 };
 179 
 180 void G1CMBitMap::clearAll() {
 181   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 182   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 183   uint n_workers = g1h->workers()->active_workers();
 184   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 185   g1h->workers()->run_task(&task);
 186   guarantee(cl.complete(), "Must have completed iteration.");
 187   return;
 188 }
 189 
 190 void G1CMBitMap::clearRange(MemRegion mr) {
 191   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 192   assert(!mr.is_empty(), "unexpected empty region");
 193   // convert address range into offset range
 194   _bm.at_put_range(heapWordToOffset(mr.start()),
 195                    heapWordToOffset(mr.end()), false);
 196 }
 197 
 198 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) :
 199   _base(NULL), _cm(cm)
 200 {}
 201 
 202 bool G1CMMarkStack::allocate(size_t capacity) {
 203   // allocate a stack of the requisite depth
 204   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 205   if (!rs.is_reserved()) {
 206     warning("ConcurrentMark MarkStack allocation failure");
 207     return false;
 208   }
 209   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 210   if (!_virtual_space.initialize(rs, rs.size())) {
 211     warning("ConcurrentMark MarkStack backing store failure");
 212     // Release the virtual memory reserved for the marking stack
 213     rs.release();
 214     return false;
 215   }
 216   assert(_virtual_space.committed_size() == rs.size(),
 217          "Didn't reserve backing store for all of G1ConcurrentMark stack?");
 218   _base = (oop*) _virtual_space.low();
 219   setEmpty();
 220   _capacity = (jint) capacity;
 221   _saved_index = -1;
 222   _should_expand = false;
 223   return true;
 224 }
 225 
 226 void G1CMMarkStack::expand() {
 227   // Called, during remark, if we've overflown the marking stack during marking.
 228   assert(isEmpty(), "stack should been emptied while handling overflow");
 229   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 230   // Clear expansion flag
 231   _should_expand = false;
 232   if (_capacity == (jint) MarkStackSizeMax) {
 233     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 234     return;
 235   }
 236   // Double capacity if possible
 237   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 238   // Do not give up existing stack until we have managed to
 239   // get the double capacity that we desired.
 240   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 241                                                            sizeof(oop)));
 242   if (rs.is_reserved()) {
 243     // Release the backing store associated with old stack
 244     _virtual_space.release();
 245     // Reinitialize virtual space for new stack
 246     if (!_virtual_space.initialize(rs, rs.size())) {
 247       fatal("Not enough swap for expanded marking stack capacity");
 248     }
 249     _base = (oop*)(_virtual_space.low());
 250     _index = 0;
 251     _capacity = new_capacity;
 252   } else {
 253     // Failed to double capacity, continue;
 254     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 255                   _capacity / K, new_capacity / K);
 256   }
 257 }
 258 
 259 void G1CMMarkStack::set_should_expand() {
 260   // If we're resetting the marking state because of an
 261   // marking stack overflow, record that we should, if
 262   // possible, expand the stack.
 263   _should_expand = _cm->has_overflown();
 264 }
 265 
 266 G1CMMarkStack::~G1CMMarkStack() {
 267   if (_base != NULL) {
 268     _base = NULL;
 269     _virtual_space.release();
 270   }
 271 }
 272 
 273 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 274   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 275   jint start = _index;
 276   jint next_index = start + n;
 277   if (next_index > _capacity) {
 278     _overflow = true;
 279     return;
 280   }
 281   // Otherwise.
 282   _index = next_index;
 283   for (int i = 0; i < n; i++) {
 284     int ind = start + i;
 285     assert(ind < _capacity, "By overflow test above.");
 286     _base[ind] = ptr_arr[i];
 287   }
 288 }
 289 
 290 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 291   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 292   jint index = _index;
 293   if (index == 0) {
 294     *n = 0;
 295     return false;
 296   } else {
 297     int k = MIN2(max, index);
 298     jint  new_ind = index - k;
 299     for (int j = 0; j < k; j++) {
 300       ptr_arr[j] = _base[new_ind + j];
 301     }
 302     _index = new_ind;
 303     *n = k;
 304     return true;
 305   }
 306 }
 307 
 308 void G1CMMarkStack::note_start_of_gc() {
 309   assert(_saved_index == -1,
 310          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 311   _saved_index = _index;
 312 }
 313 
 314 void G1CMMarkStack::note_end_of_gc() {
 315   // This is intentionally a guarantee, instead of an assert. If we
 316   // accidentally add something to the mark stack during GC, it
 317   // will be a correctness issue so it's better if we crash. we'll
 318   // only check this once per GC anyway, so it won't be a performance
 319   // issue in any way.
 320   guarantee(_saved_index == _index,
 321             "saved index: %d index: %d", _saved_index, _index);
 322   _saved_index = -1;
 323 }
 324 
 325 G1CMRootRegions::G1CMRootRegions() :
 326   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 327   _should_abort(false),  _next_survivor(NULL) { }
 328 
 329 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) {
 330   _young_list = g1h->young_list();
 331   _cm = cm;
 332 }
 333 
 334 void G1CMRootRegions::prepare_for_scan() {
 335   assert(!scan_in_progress(), "pre-condition");
 336 
 337   // Currently, only survivors can be root regions.
 338   assert(_next_survivor == NULL, "pre-condition");
 339   _next_survivor = _young_list->first_survivor_region();
 340   _scan_in_progress = (_next_survivor != NULL);
 341   _should_abort = false;
 342 }
 343 
 344 HeapRegion* G1CMRootRegions::claim_next() {
 345   if (_should_abort) {
 346     // If someone has set the should_abort flag, we return NULL to
 347     // force the caller to bail out of their loop.
 348     return NULL;
 349   }
 350 
 351   // Currently, only survivors can be root regions.
 352   HeapRegion* res = _next_survivor;
 353   if (res != NULL) {
 354     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 355     // Read it again in case it changed while we were waiting for the lock.
 356     res = _next_survivor;
 357     if (res != NULL) {
 358       if (res == _young_list->last_survivor_region()) {
 359         // We just claimed the last survivor so store NULL to indicate
 360         // that we're done.
 361         _next_survivor = NULL;
 362       } else {
 363         _next_survivor = res->get_next_young_region();
 364       }
 365     } else {
 366       // Someone else claimed the last survivor while we were trying
 367       // to take the lock so nothing else to do.
 368     }
 369   }
 370   assert(res == NULL || res->is_survivor(), "post-condition");
 371 
 372   return res;
 373 }
 374 
 375 void G1CMRootRegions::notify_scan_done() {
 376   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 377   _scan_in_progress = false;
 378   RootRegionScan_lock->notify_all();
 379 }
 380 
 381 void G1CMRootRegions::cancel_scan() {
 382   notify_scan_done();
 383 }
 384 
 385 void G1CMRootRegions::scan_finished() {
 386   assert(scan_in_progress(), "pre-condition");
 387 
 388   // Currently, only survivors can be root regions.
 389   if (!_should_abort) {
 390     assert(_next_survivor == NULL, "we should have claimed all survivors");
 391   }
 392   _next_survivor = NULL;
 393 
 394   notify_scan_done();
 395 }
 396 
 397 bool G1CMRootRegions::wait_until_scan_finished() {
 398   if (!scan_in_progress()) return false;
 399 
 400   {
 401     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 402     while (scan_in_progress()) {
 403       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 404     }
 405   }
 406   return true;
 407 }
 408 
 409 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 410   return MAX2((n_par_threads + 2) / 4, 1U);
 411 }
 412 
 413 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 414   _g1h(g1h),
 415   _markBitMap1(),
 416   _markBitMap2(),
 417   _parallel_marking_threads(0),
 418   _max_parallel_marking_threads(0),
 419   _sleep_factor(0.0),
 420   _marking_task_overhead(1.0),
 421   _cleanup_list("Cleanup List"),
 422   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 423   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 424             CardTableModRefBS::card_shift,
 425             false /* in_resource_area*/),
 426 
 427   _prevMarkBitMap(&_markBitMap1),
 428   _nextMarkBitMap(&_markBitMap2),
 429 
 430   _markStack(this),
 431   // _finger set in set_non_marking_state
 432 
 433   _max_worker_id(ParallelGCThreads),
 434   // _active_tasks set in set_non_marking_state
 435   // _tasks set inside the constructor
 436   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 437   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 438 
 439   _has_overflown(false),
 440   _concurrent(false),
 441   _has_aborted(false),
 442   _restart_for_overflow(false),
 443   _concurrent_marking_in_progress(false),
 444   _concurrent_phase_status(ConcPhaseNotStarted),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL),
 458   _completed_initialization(false) {
 459 
 460   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 461   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 462 
 463   // Create & start a ConcurrentMark thread.
 464   _cmThread = new ConcurrentMarkThread(this);
 465   assert(cmThread() != NULL, "CM Thread should have been created");
 466   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 467   if (_cmThread->osthread() == NULL) {
 468       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 469   }
 470 
 471   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 472   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 473   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 474 
 475   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 476   satb_qs.set_buffer_size(G1SATBBufferSize);
 477 
 478   _root_regions.init(_g1h, this);
 479 
 480   if (ConcGCThreads > ParallelGCThreads) {
 481     warning("Can't have more ConcGCThreads (%u) "
 482             "than ParallelGCThreads (%u).",
 483             ConcGCThreads, ParallelGCThreads);
 484     return;
 485   }
 486   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 487     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 488     // if both are set
 489     _sleep_factor             = 0.0;
 490     _marking_task_overhead    = 1.0;
 491   } else if (G1MarkingOverheadPercent > 0) {
 492     // We will calculate the number of parallel marking threads based
 493     // on a target overhead with respect to the soft real-time goal
 494     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 495     double overall_cm_overhead =
 496       (double) MaxGCPauseMillis * marking_overhead /
 497       (double) GCPauseIntervalMillis;
 498     double cpu_ratio = 1.0 / (double) os::processor_count();
 499     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 500     double marking_task_overhead =
 501       overall_cm_overhead / marking_thread_num *
 502                                               (double) os::processor_count();
 503     double sleep_factor =
 504                        (1.0 - marking_task_overhead) / marking_task_overhead;
 505 
 506     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 507     _sleep_factor             = sleep_factor;
 508     _marking_task_overhead    = marking_task_overhead;
 509   } else {
 510     // Calculate the number of parallel marking threads by scaling
 511     // the number of parallel GC threads.
 512     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 513     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 514     _sleep_factor             = 0.0;
 515     _marking_task_overhead    = 1.0;
 516   }
 517 
 518   assert(ConcGCThreads > 0, "Should have been set");
 519   _parallel_marking_threads = ConcGCThreads;
 520   _max_parallel_marking_threads = _parallel_marking_threads;
 521 
 522   _parallel_workers = new WorkGang("G1 Marker",
 523        _max_parallel_marking_threads, false, true);
 524   if (_parallel_workers == NULL) {
 525     vm_exit_during_initialization("Failed necessary allocation.");
 526   } else {
 527     _parallel_workers->initialize_workers();
 528   }
 529 
 530   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 531     size_t mark_stack_size =
 532       MIN2(MarkStackSizeMax,
 533           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 534     // Verify that the calculated value for MarkStackSize is in range.
 535     // It would be nice to use the private utility routine from Arguments.
 536     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 537       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 538               "must be between 1 and " SIZE_FORMAT,
 539               mark_stack_size, MarkStackSizeMax);
 540       return;
 541     }
 542     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 543   } else {
 544     // Verify MarkStackSize is in range.
 545     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 546       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 547         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 548           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 549                   "must be between 1 and " SIZE_FORMAT,
 550                   MarkStackSize, MarkStackSizeMax);
 551           return;
 552         }
 553       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 554         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 555           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 556                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 557                   MarkStackSize, MarkStackSizeMax);
 558           return;
 559         }
 560       }
 561     }
 562   }
 563 
 564   if (!_markStack.allocate(MarkStackSize)) {
 565     warning("Failed to allocate CM marking stack");
 566     return;
 567   }
 568 
 569   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 570   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 571 
 572   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 573   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 574 
 575   BitMap::idx_t card_bm_size = _card_bm.size();
 576 
 577   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 578   _active_tasks = _max_worker_id;
 579 
 580   uint max_regions = _g1h->max_regions();
 581   for (uint i = 0; i < _max_worker_id; ++i) {
 582     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 583     task_queue->initialize();
 584     _task_queues->register_queue(i, task_queue);
 585 
 586     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 587     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 588 
 589     _tasks[i] = new G1CMTask(i, this,
 590                              _count_marked_bytes[i],
 591                              &_count_card_bitmaps[i],
 592                              task_queue, _task_queues);
 593 
 594     _accum_task_vtime[i] = 0.0;
 595   }
 596 
 597   // Calculate the card number for the bottom of the heap. Used
 598   // in biasing indexes into the accounting card bitmaps.
 599   _heap_bottom_card_num =
 600     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 601                                 CardTableModRefBS::card_shift);
 602 
 603   // Clear all the liveness counting data
 604   clear_all_count_data();
 605 
 606   // so that the call below can read a sensible value
 607   _heap_start = g1h->reserved_region().start();
 608   set_non_marking_state();
 609   _completed_initialization = true;
 610 }
 611 
 612 void G1ConcurrentMark::reset() {
 613   // Starting values for these two. This should be called in a STW
 614   // phase.
 615   MemRegion reserved = _g1h->g1_reserved();
 616   _heap_start = reserved.start();
 617   _heap_end   = reserved.end();
 618 
 619   // Separated the asserts so that we know which one fires.
 620   assert(_heap_start != NULL, "heap bounds should look ok");
 621   assert(_heap_end != NULL, "heap bounds should look ok");
 622   assert(_heap_start < _heap_end, "heap bounds should look ok");
 623 
 624   // Reset all the marking data structures and any necessary flags
 625   reset_marking_state();
 626 
 627   // We do reset all of them, since different phases will use
 628   // different number of active threads. So, it's easiest to have all
 629   // of them ready.
 630   for (uint i = 0; i < _max_worker_id; ++i) {
 631     _tasks[i]->reset(_nextMarkBitMap);
 632   }
 633 
 634   // we need this to make sure that the flag is on during the evac
 635   // pause with initial mark piggy-backed
 636   set_concurrent_marking_in_progress();
 637 }
 638 
 639 
 640 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
 641   _markStack.set_should_expand();
 642   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 643   if (clear_overflow) {
 644     clear_has_overflown();
 645   } else {
 646     assert(has_overflown(), "pre-condition");
 647   }
 648   _finger = _heap_start;
 649 
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     G1CMTaskQueue* queue = _task_queues->queue(i);
 652     queue->set_empty();
 653   }
 654 }
 655 
 656 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 657   assert(active_tasks <= _max_worker_id, "we should not have more");
 658 
 659   _active_tasks = active_tasks;
 660   // Need to update the three data structures below according to the
 661   // number of active threads for this phase.
 662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 665 }
 666 
 667 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 668   set_concurrency(active_tasks);
 669 
 670   _concurrent = concurrent;
 671   // We propagate this to all tasks, not just the active ones.
 672   for (uint i = 0; i < _max_worker_id; ++i)
 673     _tasks[i]->set_concurrent(concurrent);
 674 
 675   if (concurrent) {
 676     set_concurrent_marking_in_progress();
 677   } else {
 678     // We currently assume that the concurrent flag has been set to
 679     // false before we start remark. At this point we should also be
 680     // in a STW phase.
 681     assert(!concurrent_marking_in_progress(), "invariant");
 682     assert(out_of_regions(),
 683            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 684            p2i(_finger), p2i(_heap_end));
 685   }
 686 }
 687 
 688 void G1ConcurrentMark::set_non_marking_state() {
 689   // We set the global marking state to some default values when we're
 690   // not doing marking.
 691   reset_marking_state();
 692   _active_tasks = 0;
 693   clear_concurrent_marking_in_progress();
 694 }
 695 
 696 G1ConcurrentMark::~G1ConcurrentMark() {
 697   // The G1ConcurrentMark instance is never freed.
 698   ShouldNotReachHere();
 699 }
 700 
 701 void G1ConcurrentMark::clearNextBitmap() {
 702   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 703 
 704   // Make sure that the concurrent mark thread looks to still be in
 705   // the current cycle.
 706   guarantee(cmThread()->during_cycle(), "invariant");
 707 
 708   // We are finishing up the current cycle by clearing the next
 709   // marking bitmap and getting it ready for the next cycle. During
 710   // this time no other cycle can start. So, let's make sure that this
 711   // is the case.
 712   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 713 
 714   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 715   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 716   _parallel_workers->run_task(&task);
 717 
 718   // Clear the liveness counting data. If the marking has been aborted, the abort()
 719   // call already did that.
 720   if (cl.complete()) {
 721     clear_all_count_data();
 722   }
 723 
 724   // Repeat the asserts from above.
 725   guarantee(cmThread()->during_cycle(), "invariant");
 726   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 727 }
 728 
 729 class CheckBitmapClearHRClosure : public AbortableHeapRegionClosure {
 730   G1CMBitMap* _bitmap;
 731   bool _error;
 732  public:
 733   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 734   }
 735 
 736   virtual bool doHeapRegionAbortable(HeapRegion* r) {
 737     // This closure can be called concurrently to the mutator, so we must make sure
 738     // that the result of the getNextMarkedWordAddress() call is compared to the
 739     // value passed to it as limit to detect any found bits.
 740     // end never changes in G1.
 741     HeapWord* end = r->end();
 742     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 743   }
 744 };
 745 
 746 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 747   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 748   _g1h->heap_region_iterate(&cl);
 749   return cl.complete();
 750 }
 751 
 752 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 753 public:
 754   void doHeapRegion(HeapRegion* r) {
 755     r->note_start_of_marking();
 756   }
 757 };
 758 
 759 void G1ConcurrentMark::checkpointRootsInitialPre() {
 760   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 761   G1CollectorPolicy* g1p = g1h->g1_policy();
 762 
 763   _has_aborted = false;
 764 
 765   // Initialize marking structures. This has to be done in a STW phase.
 766   reset();
 767 
 768   // For each region note start of marking.
 769   NoteStartOfMarkHRClosure startcl;
 770   g1h->heap_region_iterate(&startcl);
 771 }
 772 
 773 
 774 void G1ConcurrentMark::checkpointRootsInitialPost() {
 775   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 776 
 777   // Start Concurrent Marking weak-reference discovery.
 778   ReferenceProcessor* rp = g1h->ref_processor_cm();
 779   // enable ("weak") refs discovery
 780   rp->enable_discovery();
 781   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 782 
 783   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 784   // This is the start of  the marking cycle, we're expected all
 785   // threads to have SATB queues with active set to false.
 786   satb_mq_set.set_active_all_threads(true, /* new active value */
 787                                      false /* expected_active */);
 788 
 789   _root_regions.prepare_for_scan();
 790 
 791   // update_g1_committed() will be called at the end of an evac pause
 792   // when marking is on. So, it's also called at the end of the
 793   // initial-mark pause to update the heap end, if the heap expands
 794   // during it. No need to call it here.
 795 }
 796 
 797 /*
 798  * Notice that in the next two methods, we actually leave the STS
 799  * during the barrier sync and join it immediately afterwards. If we
 800  * do not do this, the following deadlock can occur: one thread could
 801  * be in the barrier sync code, waiting for the other thread to also
 802  * sync up, whereas another one could be trying to yield, while also
 803  * waiting for the other threads to sync up too.
 804  *
 805  * Note, however, that this code is also used during remark and in
 806  * this case we should not attempt to leave / enter the STS, otherwise
 807  * we'll either hit an assert (debug / fastdebug) or deadlock
 808  * (product). So we should only leave / enter the STS if we are
 809  * operating concurrently.
 810  *
 811  * Because the thread that does the sync barrier has left the STS, it
 812  * is possible to be suspended for a Full GC or an evacuation pause
 813  * could occur. This is actually safe, since the entering the sync
 814  * barrier is one of the last things do_marking_step() does, and it
 815  * doesn't manipulate any data structures afterwards.
 816  */
 817 
 818 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 819   bool barrier_aborted;
 820   {
 821     SuspendibleThreadSetLeaver sts_leave(concurrent());
 822     barrier_aborted = !_first_overflow_barrier_sync.enter();
 823   }
 824 
 825   // at this point everyone should have synced up and not be doing any
 826   // more work
 827 
 828   if (barrier_aborted) {
 829     // If the barrier aborted we ignore the overflow condition and
 830     // just abort the whole marking phase as quickly as possible.
 831     return;
 832   }
 833 
 834   // If we're executing the concurrent phase of marking, reset the marking
 835   // state; otherwise the marking state is reset after reference processing,
 836   // during the remark pause.
 837   // If we reset here as a result of an overflow during the remark we will
 838   // see assertion failures from any subsequent set_concurrency_and_phase()
 839   // calls.
 840   if (concurrent()) {
 841     // let the task associated with with worker 0 do this
 842     if (worker_id == 0) {
 843       // task 0 is responsible for clearing the global data structures
 844       // We should be here because of an overflow. During STW we should
 845       // not clear the overflow flag since we rely on it being true when
 846       // we exit this method to abort the pause and restart concurrent
 847       // marking.
 848       reset_marking_state(true /* clear_overflow */);
 849 
 850       log_info(gc)("Concurrent Mark reset for overflow");
 851     }
 852   }
 853 
 854   // after this, each task should reset its own data structures then
 855   // then go into the second barrier
 856 }
 857 
 858 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 859   SuspendibleThreadSetLeaver sts_leave(concurrent());
 860   _second_overflow_barrier_sync.enter();
 861 
 862   // at this point everything should be re-initialized and ready to go
 863 }
 864 
 865 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 866 private:
 867   G1ConcurrentMark*     _cm;
 868   ConcurrentMarkThread* _cmt;
 869 
 870 public:
 871   void work(uint worker_id) {
 872     assert(Thread::current()->is_ConcurrentGC_thread(),
 873            "this should only be done by a conc GC thread");
 874     ResourceMark rm;
 875 
 876     double start_vtime = os::elapsedVTime();
 877 
 878     {
 879       SuspendibleThreadSetJoiner sts_join;
 880 
 881       assert(worker_id < _cm->active_tasks(), "invariant");
 882       G1CMTask* the_task = _cm->task(worker_id);
 883       the_task->record_start_time();
 884       if (!_cm->has_aborted()) {
 885         do {
 886           double start_vtime_sec = os::elapsedVTime();
 887           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 888 
 889           the_task->do_marking_step(mark_step_duration_ms,
 890                                     true  /* do_termination */,
 891                                     false /* is_serial*/);
 892 
 893           double end_vtime_sec = os::elapsedVTime();
 894           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 895           _cm->clear_has_overflown();
 896 
 897           _cm->do_yield_check(worker_id);
 898 
 899           jlong sleep_time_ms;
 900           if (!_cm->has_aborted() && the_task->has_aborted()) {
 901             sleep_time_ms =
 902               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 903             {
 904               SuspendibleThreadSetLeaver sts_leave;
 905               os::sleep(Thread::current(), sleep_time_ms, false);
 906             }
 907           }
 908         } while (!_cm->has_aborted() && the_task->has_aborted());
 909       }
 910       the_task->record_end_time();
 911       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 912     }
 913 
 914     double end_vtime = os::elapsedVTime();
 915     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 916   }
 917 
 918   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 919                             ConcurrentMarkThread* cmt) :
 920       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 921 
 922   ~G1CMConcurrentMarkingTask() { }
 923 };
 924 
 925 // Calculates the number of active workers for a concurrent
 926 // phase.
 927 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 928   uint n_conc_workers = 0;
 929   if (!UseDynamicNumberOfGCThreads ||
 930       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 931        !ForceDynamicNumberOfGCThreads)) {
 932     n_conc_workers = max_parallel_marking_threads();
 933   } else {
 934     n_conc_workers =
 935       AdaptiveSizePolicy::calc_default_active_workers(
 936                                    max_parallel_marking_threads(),
 937                                    1, /* Minimum workers */
 938                                    parallel_marking_threads(),
 939                                    Threads::number_of_non_daemon_threads());
 940     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 941     // that scaling has already gone into "_max_parallel_marking_threads".
 942   }
 943   assert(n_conc_workers > 0, "Always need at least 1");
 944   return n_conc_workers;
 945 }
 946 
 947 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 948   // Currently, only survivors can be root regions.
 949   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 950   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 951 
 952   const uintx interval = PrefetchScanIntervalInBytes;
 953   HeapWord* curr = hr->bottom();
 954   const HeapWord* end = hr->top();
 955   while (curr < end) {
 956     Prefetch::read(curr, interval);
 957     oop obj = oop(curr);
 958     int size = obj->oop_iterate_size(&cl);
 959     assert(size == obj->size(), "sanity");
 960     curr += size;
 961   }
 962 }
 963 
 964 class G1CMRootRegionScanTask : public AbstractGangTask {
 965 private:
 966   G1ConcurrentMark* _cm;
 967 
 968 public:
 969   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 970     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 971 
 972   void work(uint worker_id) {
 973     assert(Thread::current()->is_ConcurrentGC_thread(),
 974            "this should only be done by a conc GC thread");
 975 
 976     G1CMRootRegions* root_regions = _cm->root_regions();
 977     HeapRegion* hr = root_regions->claim_next();
 978     while (hr != NULL) {
 979       _cm->scanRootRegion(hr, worker_id);
 980       hr = root_regions->claim_next();
 981     }
 982   }
 983 };
 984 
 985 void G1ConcurrentMark::scanRootRegions() {
 986   // scan_in_progress() will have been set to true only if there was
 987   // at least one root region to scan. So, if it's false, we
 988   // should not attempt to do any further work.
 989   if (root_regions()->scan_in_progress()) {
 990     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 991     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 992 
 993     _parallel_marking_threads = calc_parallel_marking_threads();
 994     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 995            "Maximum number of marking threads exceeded");
 996     uint active_workers = MAX2(1U, parallel_marking_threads());
 997 
 998     G1CMRootRegionScanTask task(this);
 999     _parallel_workers->set_active_workers(active_workers);
1000     _parallel_workers->run_task(&task);
1001 
1002     // It's possible that has_aborted() is true here without actually
1003     // aborting the survivor scan earlier. This is OK as it's
1004     // mainly used for sanity checking.
1005     root_regions()->scan_finished();
1006   }
1007 }
1008 
1009 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) {
1010   uint old_val = 0;
1011   do {
1012     old_val = Atomic::cmpxchg(ConcPhaseStarted, &_concurrent_phase_status, ConcPhaseNotStarted);
1013   } while (old_val != ConcPhaseNotStarted);
1014   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
1015 }
1016 
1017 void G1ConcurrentMark::register_concurrent_phase_end_common(bool end_timer) {
1018   if (_concurrent_phase_status == ConcPhaseNotStarted) {
1019     return;
1020   }
1021 
1022   uint old_val = Atomic::cmpxchg(ConcPhaseStopping, &_concurrent_phase_status, ConcPhaseStarted);
1023   if (old_val == ConcPhaseStarted) {
1024     _g1h->gc_timer_cm()->register_gc_concurrent_end();
1025     // If 'end_timer' is true, we came here to end timer which needs concurrent phase ended.
1026     // We need to end it before changing the status to 'ConcPhaseNotStarted' to prevent
1027     // starting a new concurrent phase by 'ConcurrentMarkThread'.
1028     if (end_timer) {
1029       _g1h->gc_timer_cm()->register_gc_end();
1030     }
1031     old_val = Atomic::cmpxchg(ConcPhaseNotStarted, &_concurrent_phase_status, ConcPhaseStopping);
1032     assert(old_val == ConcPhaseStopping, "Should not have changed since we entered this scope.");
1033   } else {
1034     do {
1035       // Let other thread finish changing '_concurrent_phase_status' to 'ConcPhaseNotStarted'.
1036       os::naked_short_sleep(1);
1037     } while (_concurrent_phase_status != ConcPhaseNotStarted);
1038   }
1039 }
1040 
1041 void G1ConcurrentMark::register_concurrent_phase_end() {
1042   register_concurrent_phase_end_common(false);
1043 }
1044 
1045 void G1ConcurrentMark::register_concurrent_gc_end_and_stop_timer() {
1046   register_concurrent_phase_end_common(true);
1047 }
1048 
1049 void G1ConcurrentMark::markFromRoots() {
1050   // we might be tempted to assert that:
1051   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1052   //        "inconsistent argument?");
1053   // However that wouldn't be right, because it's possible that
1054   // a safepoint is indeed in progress as a younger generation
1055   // stop-the-world GC happens even as we mark in this generation.
1056 
1057   _restart_for_overflow = false;
1058 
1059   // _g1h has _n_par_threads
1060   _parallel_marking_threads = calc_parallel_marking_threads();
1061   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1062     "Maximum number of marking threads exceeded");
1063 
1064   uint active_workers = MAX2(1U, parallel_marking_threads());
1065   assert(active_workers > 0, "Should have been set");
1066 
1067   // Parallel task terminator is set in "set_concurrency_and_phase()"
1068   set_concurrency_and_phase(active_workers, true /* concurrent */);
1069 
1070   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1071   _parallel_workers->set_active_workers(active_workers);
1072   _parallel_workers->run_task(&markingTask);
1073   print_stats();
1074 }
1075 
1076 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1077   // world is stopped at this checkpoint
1078   assert(SafepointSynchronize::is_at_safepoint(),
1079          "world should be stopped");
1080 
1081   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1082 
1083   // If a full collection has happened, we shouldn't do this.
1084   if (has_aborted()) {
1085     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1086     return;
1087   }
1088 
1089   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1090 
1091   if (VerifyDuringGC) {
1092     HandleMark hm;  // handle scope
1093     g1h->prepare_for_verify();
1094     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1095   }
1096   g1h->verifier()->check_bitmaps("Remark Start");
1097 
1098   G1CollectorPolicy* g1p = g1h->g1_policy();
1099   g1p->record_concurrent_mark_remark_start();
1100 
1101   double start = os::elapsedTime();
1102 
1103   checkpointRootsFinalWork();
1104 
1105   double mark_work_end = os::elapsedTime();
1106 
1107   weakRefsWork(clear_all_soft_refs);
1108 
1109   if (has_overflown()) {
1110     // Oops.  We overflowed.  Restart concurrent marking.
1111     _restart_for_overflow = true;
1112     log_develop_trace(gc)("Remark led to restart for overflow.");
1113 
1114     // Verify the heap w.r.t. the previous marking bitmap.
1115     if (VerifyDuringGC) {
1116       HandleMark hm;  // handle scope
1117       g1h->prepare_for_verify();
1118       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1119     }
1120 
1121     // Clear the marking state because we will be restarting
1122     // marking due to overflowing the global mark stack.
1123     reset_marking_state();
1124   } else {
1125     {
1126       GCTraceTime(Debug, gc) trace("Aggregate Data", g1h->gc_timer_cm());
1127 
1128       // Aggregate the per-task counting data that we have accumulated
1129       // while marking.
1130       aggregate_count_data();
1131     }
1132 
1133     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1134     // We're done with marking.
1135     // This is the end of  the marking cycle, we're expected all
1136     // threads to have SATB queues with active set to true.
1137     satb_mq_set.set_active_all_threads(false, /* new active value */
1138                                        true /* expected_active */);
1139 
1140     if (VerifyDuringGC) {
1141       HandleMark hm;  // handle scope
1142       g1h->prepare_for_verify();
1143       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1144     }
1145     g1h->verifier()->check_bitmaps("Remark End");
1146     assert(!restart_for_overflow(), "sanity");
1147     // Completely reset the marking state since marking completed
1148     set_non_marking_state();
1149   }
1150 
1151   // Expand the marking stack, if we have to and if we can.
1152   if (_markStack.should_expand()) {
1153     _markStack.expand();
1154   }
1155 
1156   // Statistics
1157   double now = os::elapsedTime();
1158   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1159   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1160   _remark_times.add((now - start) * 1000.0);
1161 
1162   g1p->record_concurrent_mark_remark_end();
1163 
1164   G1CMIsAliveClosure is_alive(g1h);
1165   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1166 }
1167 
1168 // Base class of the closures that finalize and verify the
1169 // liveness counting data.
1170 class G1CMCountDataClosureBase: public HeapRegionClosure {
1171 protected:
1172   G1CollectedHeap* _g1h;
1173   G1ConcurrentMark* _cm;
1174   CardTableModRefBS* _ct_bs;
1175 
1176   BitMap* _region_bm;
1177   BitMap* _card_bm;
1178 
1179   // Takes a region that's not empty (i.e., it has at least one
1180   // live object in it and sets its corresponding bit on the region
1181   // bitmap to 1.
1182   void set_bit_for_region(HeapRegion* hr) {
1183     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1184     _region_bm->par_at_put(index, true);
1185   }
1186 
1187 public:
1188   G1CMCountDataClosureBase(G1CollectedHeap* g1h,
1189                            BitMap* region_bm, BitMap* card_bm):
1190     _g1h(g1h), _cm(g1h->concurrent_mark()),
1191     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1192     _region_bm(region_bm), _card_bm(card_bm) { }
1193 };
1194 
1195 // Closure that calculates the # live objects per region. Used
1196 // for verification purposes during the cleanup pause.
1197 class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
1198   G1CMBitMapRO* _bm;
1199   size_t _region_marked_bytes;
1200 
1201 public:
1202   CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
1203                          BitMap* region_bm, BitMap* card_bm) :
1204     G1CMCountDataClosureBase(g1h, region_bm, card_bm),
1205     _bm(bm), _region_marked_bytes(0) { }
1206 
1207   void doHeapRegion(HeapRegion* hr) {
1208     HeapWord* ntams = hr->next_top_at_mark_start();
1209     HeapWord* start = hr->bottom();
1210 
1211     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1212            "Preconditions not met - "
1213            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1214            p2i(start), p2i(ntams), p2i(hr->end()));
1215 
1216     // Find the first marked object at or after "start".
1217     start = _bm->getNextMarkedWordAddress(start, ntams);
1218 
1219     size_t marked_bytes = 0;
1220 
1221     while (start < ntams) {
1222       oop obj = oop(start);
1223       int obj_sz = obj->size();
1224       HeapWord* obj_end = start + obj_sz;
1225 
1226       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1227       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1228 
1229       // Note: if we're looking at the last region in heap - obj_end
1230       // could be actually just beyond the end of the heap; end_idx
1231       // will then correspond to a (non-existent) card that is also
1232       // just beyond the heap.
1233       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1234         // end of object is not card aligned - increment to cover
1235         // all the cards spanned by the object
1236         end_idx += 1;
1237       }
1238 
1239       // Set the bits in the card BM for the cards spanned by this object.
1240       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1241 
1242       // Add the size of this object to the number of marked bytes.
1243       marked_bytes += (size_t)obj_sz * HeapWordSize;
1244 
1245       // This will happen if we are handling a humongous object that spans
1246       // several heap regions.
1247       if (obj_end > hr->end()) {
1248         break;
1249       }
1250       // Find the next marked object after this one.
1251       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1252     }
1253 
1254     // Mark the allocated-since-marking portion...
1255     HeapWord* top = hr->top();
1256     if (ntams < top) {
1257       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1258       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1259 
1260       // Note: if we're looking at the last region in heap - top
1261       // could be actually just beyond the end of the heap; end_idx
1262       // will then correspond to a (non-existent) card that is also
1263       // just beyond the heap.
1264       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1265         // end of object is not card aligned - increment to cover
1266         // all the cards spanned by the object
1267         end_idx += 1;
1268       }
1269       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1270 
1271       // This definitely means the region has live objects.
1272       set_bit_for_region(hr);
1273     }
1274 
1275     // Update the live region bitmap.
1276     if (marked_bytes > 0) {
1277       set_bit_for_region(hr);
1278     }
1279 
1280     // Set the marked bytes for the current region so that
1281     // it can be queried by a calling verification routine
1282     _region_marked_bytes = marked_bytes;
1283   }
1284 
1285   size_t region_marked_bytes() const { return _region_marked_bytes; }
1286 };
1287 
1288 // Heap region closure used for verifying the counting data
1289 // that was accumulated concurrently and aggregated during
1290 // the remark pause. This closure is applied to the heap
1291 // regions during the STW cleanup pause.
1292 
1293 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1294   G1CollectedHeap* _g1h;
1295   G1ConcurrentMark* _cm;
1296   CalcLiveObjectsClosure _calc_cl;
1297   BitMap* _region_bm;   // Region BM to be verified
1298   BitMap* _card_bm;     // Card BM to be verified
1299 
1300   BitMap* _exp_region_bm; // Expected Region BM values
1301   BitMap* _exp_card_bm;   // Expected card BM values
1302 
1303   int _failures;
1304 
1305 public:
1306   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1307                                 BitMap* region_bm,
1308                                 BitMap* card_bm,
1309                                 BitMap* exp_region_bm,
1310                                 BitMap* exp_card_bm) :
1311     _g1h(g1h), _cm(g1h->concurrent_mark()),
1312     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1313     _region_bm(region_bm), _card_bm(card_bm),
1314     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1315     _failures(0) { }
1316 
1317   int failures() const { return _failures; }
1318 
1319   void doHeapRegion(HeapRegion* hr) {
1320     int failures = 0;
1321 
1322     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1323     // this region and set the corresponding bits in the expected region
1324     // and card bitmaps.
1325     _calc_cl.doHeapRegion(hr);
1326 
1327     // Verify the marked bytes for this region.
1328     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1329     size_t act_marked_bytes = hr->next_marked_bytes();
1330 
1331     if (exp_marked_bytes > act_marked_bytes) {
1332       if (hr->is_starts_humongous()) {
1333         // For start_humongous regions, the size of the whole object will be
1334         // in exp_marked_bytes.
1335         HeapRegion* region = hr;
1336         int num_regions;
1337         for (num_regions = 0; region != NULL; num_regions++) {
1338           region = _g1h->next_region_in_humongous(region);
1339         }
1340         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1341           failures += 1;
1342         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1343           failures += 1;
1344         }
1345       } else {
1346         // We're not OK if expected marked bytes > actual marked bytes. It means
1347         // we have missed accounting some objects during the actual marking.
1348         failures += 1;
1349       }
1350     }
1351 
1352     // Verify the bit, for this region, in the actual and expected
1353     // (which was just calculated) region bit maps.
1354     // We're not OK if the bit in the calculated expected region
1355     // bitmap is set and the bit in the actual region bitmap is not.
1356     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1357 
1358     bool expected = _exp_region_bm->at(index);
1359     bool actual = _region_bm->at(index);
1360     if (expected && !actual) {
1361       failures += 1;
1362     }
1363 
1364     // Verify that the card bit maps for the cards spanned by the current
1365     // region match. We have an error if we have a set bit in the expected
1366     // bit map and the corresponding bit in the actual bitmap is not set.
1367 
1368     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1369     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1370 
1371     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1372       expected = _exp_card_bm->at(i);
1373       actual = _card_bm->at(i);
1374 
1375       if (expected && !actual) {
1376         failures += 1;
1377       }
1378     }
1379 
1380     _failures += failures;
1381   }
1382 };
1383 
1384 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1385 protected:
1386   G1CollectedHeap* _g1h;
1387   G1ConcurrentMark* _cm;
1388   BitMap* _actual_region_bm;
1389   BitMap* _actual_card_bm;
1390 
1391   uint    _n_workers;
1392 
1393   BitMap* _expected_region_bm;
1394   BitMap* _expected_card_bm;
1395 
1396   int  _failures;
1397 
1398   HeapRegionClaimer _hrclaimer;
1399 
1400 public:
1401   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1402                             BitMap* region_bm, BitMap* card_bm,
1403                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1404     : AbstractGangTask("G1 verify final counting"),
1405       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1406       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1407       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1408       _failures(0),
1409       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1410     assert(VerifyDuringGC, "don't call this otherwise");
1411     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1412     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1413   }
1414 
1415   void work(uint worker_id) {
1416     assert(worker_id < _n_workers, "invariant");
1417 
1418     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1419                                             _actual_region_bm, _actual_card_bm,
1420                                             _expected_region_bm,
1421                                             _expected_card_bm);
1422 
1423     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1424 
1425     Atomic::add(verify_cl.failures(), &_failures);
1426   }
1427 
1428   int failures() const { return _failures; }
1429 };
1430 
1431 // Closure that finalizes the liveness counting data.
1432 // Used during the cleanup pause.
1433 // Sets the bits corresponding to the interval [NTAMS, top]
1434 // (which contains the implicitly live objects) in the
1435 // card liveness bitmap. Also sets the bit for each region,
1436 // containing live data, in the region liveness bitmap.
1437 
1438 class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
1439  public:
1440   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1441                               BitMap* region_bm,
1442                               BitMap* card_bm) :
1443     G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1444 
1445   void doHeapRegion(HeapRegion* hr) {
1446     HeapWord* ntams = hr->next_top_at_mark_start();
1447     HeapWord* top   = hr->top();
1448 
1449     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1450 
1451     // Mark the allocated-since-marking portion...
1452     if (ntams < top) {
1453       // This definitely means the region has live objects.
1454       set_bit_for_region(hr);
1455 
1456       // Now set the bits in the card bitmap for [ntams, top)
1457       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1458       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1459 
1460       // Note: if we're looking at the last region in heap - top
1461       // could be actually just beyond the end of the heap; end_idx
1462       // will then correspond to a (non-existent) card that is also
1463       // just beyond the heap.
1464       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1465         // end of object is not card aligned - increment to cover
1466         // all the cards spanned by the object
1467         end_idx += 1;
1468       }
1469 
1470       assert(end_idx <= _card_bm->size(),
1471              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1472              end_idx, _card_bm->size());
1473       assert(start_idx < _card_bm->size(),
1474              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1475              start_idx, _card_bm->size());
1476 
1477       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1478     }
1479 
1480     // Set the bit for the region if it contains live data
1481     if (hr->next_marked_bytes() > 0) {
1482       set_bit_for_region(hr);
1483     }
1484   }
1485 };
1486 
1487 class G1ParFinalCountTask: public AbstractGangTask {
1488 protected:
1489   G1CollectedHeap* _g1h;
1490   G1ConcurrentMark* _cm;
1491   BitMap* _actual_region_bm;
1492   BitMap* _actual_card_bm;
1493 
1494   uint    _n_workers;
1495   HeapRegionClaimer _hrclaimer;
1496 
1497 public:
1498   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1499     : AbstractGangTask("G1 final counting"),
1500       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1501       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1502       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1503   }
1504 
1505   void work(uint worker_id) {
1506     assert(worker_id < _n_workers, "invariant");
1507 
1508     FinalCountDataUpdateClosure final_update_cl(_g1h,
1509                                                 _actual_region_bm,
1510                                                 _actual_card_bm);
1511 
1512     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1513   }
1514 };
1515 
1516 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1517   G1CollectedHeap* _g1;
1518   size_t _freed_bytes;
1519   FreeRegionList* _local_cleanup_list;
1520   uint _old_regions_removed;
1521   uint _humongous_regions_removed;
1522   HRRSCleanupTask* _hrrs_cleanup_task;
1523 
1524 public:
1525   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1526                              FreeRegionList* local_cleanup_list,
1527                              HRRSCleanupTask* hrrs_cleanup_task) :
1528     _g1(g1),
1529     _freed_bytes(0),
1530     _local_cleanup_list(local_cleanup_list),
1531     _old_regions_removed(0),
1532     _humongous_regions_removed(0),
1533     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1534 
1535   size_t freed_bytes() { return _freed_bytes; }
1536   const uint old_regions_removed() { return _old_regions_removed; }
1537   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1538 
1539   void doHeapRegion(HeapRegion *hr) {
1540     if (hr->is_archive()) {
1541       return;
1542     }
1543     // We use a claim value of zero here because all regions
1544     // were claimed with value 1 in the FinalCount task.
1545     _g1->reset_gc_time_stamps(hr);
1546     hr->note_end_of_marking();
1547 
1548     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1549       _freed_bytes += hr->used();
1550       hr->set_containing_set(NULL);
1551       if (hr->is_humongous()) {
1552         _humongous_regions_removed++;
1553         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1554       } else {
1555         _old_regions_removed++;
1556         _g1->free_region(hr, _local_cleanup_list, true);
1557       }
1558     } else {
1559       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1560     }
1561   }
1562 };
1563 
1564 class G1ParNoteEndTask: public AbstractGangTask {
1565   friend class G1NoteEndOfConcMarkClosure;
1566 
1567 protected:
1568   G1CollectedHeap* _g1h;
1569   FreeRegionList* _cleanup_list;
1570   HeapRegionClaimer _hrclaimer;
1571 
1572 public:
1573   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1574       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1575   }
1576 
1577   void work(uint worker_id) {
1578     FreeRegionList local_cleanup_list("Local Cleanup List");
1579     HRRSCleanupTask hrrs_cleanup_task;
1580     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1581                                            &hrrs_cleanup_task);
1582     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1583 
1584     // Now update the lists
1585     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1586     {
1587       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1588       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1589 
1590       // If we iterate over the global cleanup list at the end of
1591       // cleanup to do this printing we will not guarantee to only
1592       // generate output for the newly-reclaimed regions (the list
1593       // might not be empty at the beginning of cleanup; we might
1594       // still be working on its previous contents). So we do the
1595       // printing here, before we append the new regions to the global
1596       // cleanup list.
1597 
1598       G1HRPrinter* hr_printer = _g1h->hr_printer();
1599       if (hr_printer->is_active()) {
1600         FreeRegionListIterator iter(&local_cleanup_list);
1601         while (iter.more_available()) {
1602           HeapRegion* hr = iter.get_next();
1603           hr_printer->cleanup(hr);
1604         }
1605       }
1606 
1607       _cleanup_list->add_ordered(&local_cleanup_list);
1608       assert(local_cleanup_list.is_empty(), "post-condition");
1609 
1610       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1611     }
1612   }
1613 };
1614 
1615 void G1ConcurrentMark::cleanup() {
1616   // world is stopped at this checkpoint
1617   assert(SafepointSynchronize::is_at_safepoint(),
1618          "world should be stopped");
1619   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1620 
1621   // If a full collection has happened, we shouldn't do this.
1622   if (has_aborted()) {
1623     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1624     return;
1625   }
1626 
1627   g1h->verifier()->verify_region_sets_optional();
1628 
1629   if (VerifyDuringGC) {
1630     HandleMark hm;  // handle scope
1631     g1h->prepare_for_verify();
1632     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1633   }
1634   g1h->verifier()->check_bitmaps("Cleanup Start");
1635 
1636   G1CollectorPolicy* g1p = g1h->g1_policy();
1637   g1p->record_concurrent_mark_cleanup_start();
1638 
1639   double start = os::elapsedTime();
1640 
1641   HeapRegionRemSet::reset_for_cleanup_tasks();
1642 
1643   // Do counting once more with the world stopped for good measure.
1644   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1645 
1646   g1h->workers()->run_task(&g1_par_count_task);
1647 
1648   if (VerifyDuringGC) {
1649     // Verify that the counting data accumulated during marking matches
1650     // that calculated by walking the marking bitmap.
1651 
1652     // Bitmaps to hold expected values
1653     BitMap expected_region_bm(_region_bm.size(), true);
1654     BitMap expected_card_bm(_card_bm.size(), true);
1655 
1656     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1657                                                  &_region_bm,
1658                                                  &_card_bm,
1659                                                  &expected_region_bm,
1660                                                  &expected_card_bm);
1661 
1662     g1h->workers()->run_task(&g1_par_verify_task);
1663 
1664     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1665   }
1666 
1667   size_t start_used_bytes = g1h->used();
1668   g1h->collector_state()->set_mark_in_progress(false);
1669 
1670   double count_end = os::elapsedTime();
1671   double this_final_counting_time = (count_end - start);
1672   _total_counting_time += this_final_counting_time;
1673 
1674   if (log_is_enabled(Trace, gc, liveness)) {
1675     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1676     _g1h->heap_region_iterate(&cl);
1677   }
1678 
1679   // Install newly created mark bitMap as "prev".
1680   swapMarkBitMaps();
1681 
1682   g1h->reset_gc_time_stamp();
1683 
1684   uint n_workers = _g1h->workers()->active_workers();
1685 
1686   // Note end of marking in all heap regions.
1687   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1688   g1h->workers()->run_task(&g1_par_note_end_task);
1689   g1h->check_gc_time_stamps();
1690 
1691   if (!cleanup_list_is_empty()) {
1692     // The cleanup list is not empty, so we'll have to process it
1693     // concurrently. Notify anyone else that might be wanting free
1694     // regions that there will be more free regions coming soon.
1695     g1h->set_free_regions_coming();
1696   }
1697 
1698   // call below, since it affects the metric by which we sort the heap
1699   // regions.
1700   if (G1ScrubRemSets) {
1701     double rs_scrub_start = os::elapsedTime();
1702     g1h->scrub_rem_set(&_region_bm, &_card_bm);
1703     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1704   }
1705 
1706   // this will also free any regions totally full of garbage objects,
1707   // and sort the regions.
1708   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1709 
1710   // Statistics.
1711   double end = os::elapsedTime();
1712   _cleanup_times.add((end - start) * 1000.0);
1713 
1714   // Clean up will have freed any regions completely full of garbage.
1715   // Update the soft reference policy with the new heap occupancy.
1716   Universe::update_heap_info_at_gc();
1717 
1718   if (VerifyDuringGC) {
1719     HandleMark hm;  // handle scope
1720     g1h->prepare_for_verify();
1721     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1722   }
1723 
1724   g1h->verifier()->check_bitmaps("Cleanup End");
1725 
1726   g1h->verifier()->verify_region_sets_optional();
1727 
1728   // We need to make this be a "collection" so any collection pause that
1729   // races with it goes around and waits for completeCleanup to finish.
1730   g1h->increment_total_collections();
1731 
1732   // Clean out dead classes and update Metaspace sizes.
1733   if (ClassUnloadingWithConcurrentMark) {
1734     ClassLoaderDataGraph::purge();
1735   }
1736   MetaspaceGC::compute_new_size();
1737 
1738   // We reclaimed old regions so we should calculate the sizes to make
1739   // sure we update the old gen/space data.
1740   g1h->g1mm()->update_sizes();
1741   g1h->allocation_context_stats().update_after_mark();
1742 
1743   g1h->trace_heap_after_concurrent_cycle();
1744 }
1745 
1746 void G1ConcurrentMark::completeCleanup() {
1747   if (has_aborted()) return;
1748 
1749   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1750 
1751   _cleanup_list.verify_optional();
1752   FreeRegionList tmp_free_list("Tmp Free List");
1753 
1754   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1755                                   "cleanup list has %u entries",
1756                                   _cleanup_list.length());
1757 
1758   // No one else should be accessing the _cleanup_list at this point,
1759   // so it is not necessary to take any locks
1760   while (!_cleanup_list.is_empty()) {
1761     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1762     assert(hr != NULL, "Got NULL from a non-empty list");
1763     hr->par_clear();
1764     tmp_free_list.add_ordered(hr);
1765 
1766     // Instead of adding one region at a time to the secondary_free_list,
1767     // we accumulate them in the local list and move them a few at a
1768     // time. This also cuts down on the number of notify_all() calls
1769     // we do during this process. We'll also append the local list when
1770     // _cleanup_list is empty (which means we just removed the last
1771     // region from the _cleanup_list).
1772     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1773         _cleanup_list.is_empty()) {
1774       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1775                                       "appending %u entries to the secondary_free_list, "
1776                                       "cleanup list still has %u entries",
1777                                       tmp_free_list.length(),
1778                                       _cleanup_list.length());
1779 
1780       {
1781         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1782         g1h->secondary_free_list_add(&tmp_free_list);
1783         SecondaryFreeList_lock->notify_all();
1784       }
1785 #ifndef PRODUCT
1786       if (G1StressConcRegionFreeing) {
1787         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1788           os::sleep(Thread::current(), (jlong) 1, false);
1789         }
1790       }
1791 #endif
1792     }
1793   }
1794   assert(tmp_free_list.is_empty(), "post-condition");
1795 }
1796 
1797 // Supporting Object and Oop closures for reference discovery
1798 // and processing in during marking
1799 
1800 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1801   HeapWord* addr = (HeapWord*)obj;
1802   return addr != NULL &&
1803          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1804 }
1805 
1806 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1807 // Uses the G1CMTask associated with a worker thread (for serial reference
1808 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1809 // trace referent objects.
1810 //
1811 // Using the G1CMTask and embedded local queues avoids having the worker
1812 // threads operating on the global mark stack. This reduces the risk
1813 // of overflowing the stack - which we would rather avoid at this late
1814 // state. Also using the tasks' local queues removes the potential
1815 // of the workers interfering with each other that could occur if
1816 // operating on the global stack.
1817 
1818 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1819   G1ConcurrentMark* _cm;
1820   G1CMTask*         _task;
1821   int               _ref_counter_limit;
1822   int               _ref_counter;
1823   bool              _is_serial;
1824  public:
1825   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1826     _cm(cm), _task(task), _is_serial(is_serial),
1827     _ref_counter_limit(G1RefProcDrainInterval) {
1828     assert(_ref_counter_limit > 0, "sanity");
1829     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1830     _ref_counter = _ref_counter_limit;
1831   }
1832 
1833   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1834   virtual void do_oop(      oop* p) { do_oop_work(p); }
1835 
1836   template <class T> void do_oop_work(T* p) {
1837     if (!_cm->has_overflown()) {
1838       oop obj = oopDesc::load_decode_heap_oop(p);
1839       _task->deal_with_reference(obj);
1840       _ref_counter--;
1841 
1842       if (_ref_counter == 0) {
1843         // We have dealt with _ref_counter_limit references, pushing them
1844         // and objects reachable from them on to the local stack (and
1845         // possibly the global stack). Call G1CMTask::do_marking_step() to
1846         // process these entries.
1847         //
1848         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1849         // there's nothing more to do (i.e. we're done with the entries that
1850         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1851         // above) or we overflow.
1852         //
1853         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1854         // flag while there may still be some work to do. (See the comment at
1855         // the beginning of G1CMTask::do_marking_step() for those conditions -
1856         // one of which is reaching the specified time target.) It is only
1857         // when G1CMTask::do_marking_step() returns without setting the
1858         // has_aborted() flag that the marking step has completed.
1859         do {
1860           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1861           _task->do_marking_step(mark_step_duration_ms,
1862                                  false      /* do_termination */,
1863                                  _is_serial);
1864         } while (_task->has_aborted() && !_cm->has_overflown());
1865         _ref_counter = _ref_counter_limit;
1866       }
1867     }
1868   }
1869 };
1870 
1871 // 'Drain' oop closure used by both serial and parallel reference processing.
1872 // Uses the G1CMTask associated with a given worker thread (for serial
1873 // reference processing the G1CMtask for worker 0 is used). Calls the
1874 // do_marking_step routine, with an unbelievably large timeout value,
1875 // to drain the marking data structures of the remaining entries
1876 // added by the 'keep alive' oop closure above.
1877 
1878 class G1CMDrainMarkingStackClosure: public VoidClosure {
1879   G1ConcurrentMark* _cm;
1880   G1CMTask*         _task;
1881   bool              _is_serial;
1882  public:
1883   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1884     _cm(cm), _task(task), _is_serial(is_serial) {
1885     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1886   }
1887 
1888   void do_void() {
1889     do {
1890       // We call G1CMTask::do_marking_step() to completely drain the local
1891       // and global marking stacks of entries pushed by the 'keep alive'
1892       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1893       //
1894       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1895       // if there's nothing more to do (i.e. we've completely drained the
1896       // entries that were pushed as a a result of applying the 'keep alive'
1897       // closure to the entries on the discovered ref lists) or we overflow
1898       // the global marking stack.
1899       //
1900       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1901       // flag while there may still be some work to do. (See the comment at
1902       // the beginning of G1CMTask::do_marking_step() for those conditions -
1903       // one of which is reaching the specified time target.) It is only
1904       // when G1CMTask::do_marking_step() returns without setting the
1905       // has_aborted() flag that the marking step has completed.
1906 
1907       _task->do_marking_step(1000000000.0 /* something very large */,
1908                              true         /* do_termination */,
1909                              _is_serial);
1910     } while (_task->has_aborted() && !_cm->has_overflown());
1911   }
1912 };
1913 
1914 // Implementation of AbstractRefProcTaskExecutor for parallel
1915 // reference processing at the end of G1 concurrent marking
1916 
1917 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1918 private:
1919   G1CollectedHeap*  _g1h;
1920   G1ConcurrentMark* _cm;
1921   WorkGang*         _workers;
1922   uint              _active_workers;
1923 
1924 public:
1925   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1926                           G1ConcurrentMark* cm,
1927                           WorkGang* workers,
1928                           uint n_workers) :
1929     _g1h(g1h), _cm(cm),
1930     _workers(workers), _active_workers(n_workers) { }
1931 
1932   // Executes the given task using concurrent marking worker threads.
1933   virtual void execute(ProcessTask& task);
1934   virtual void execute(EnqueueTask& task);
1935 };
1936 
1937 class G1CMRefProcTaskProxy: public AbstractGangTask {
1938   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1939   ProcessTask&      _proc_task;
1940   G1CollectedHeap*  _g1h;
1941   G1ConcurrentMark* _cm;
1942 
1943 public:
1944   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1945                        G1CollectedHeap* g1h,
1946                        G1ConcurrentMark* cm) :
1947     AbstractGangTask("Process reference objects in parallel"),
1948     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1949     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1950     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1951   }
1952 
1953   virtual void work(uint worker_id) {
1954     ResourceMark rm;
1955     HandleMark hm;
1956     G1CMTask* task = _cm->task(worker_id);
1957     G1CMIsAliveClosure g1_is_alive(_g1h);
1958     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1959     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1960 
1961     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1962   }
1963 };
1964 
1965 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1966   assert(_workers != NULL, "Need parallel worker threads.");
1967   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1968 
1969   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1970 
1971   // We need to reset the concurrency level before each
1972   // proxy task execution, so that the termination protocol
1973   // and overflow handling in G1CMTask::do_marking_step() knows
1974   // how many workers to wait for.
1975   _cm->set_concurrency(_active_workers);
1976   _workers->run_task(&proc_task_proxy);
1977 }
1978 
1979 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1980   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1981   EnqueueTask& _enq_task;
1982 
1983 public:
1984   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1985     AbstractGangTask("Enqueue reference objects in parallel"),
1986     _enq_task(enq_task) { }
1987 
1988   virtual void work(uint worker_id) {
1989     _enq_task.work(worker_id);
1990   }
1991 };
1992 
1993 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1994   assert(_workers != NULL, "Need parallel worker threads.");
1995   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1996 
1997   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1998 
1999   // Not strictly necessary but...
2000   //
2001   // We need to reset the concurrency level before each
2002   // proxy task execution, so that the termination protocol
2003   // and overflow handling in G1CMTask::do_marking_step() knows
2004   // how many workers to wait for.
2005   _cm->set_concurrency(_active_workers);
2006   _workers->run_task(&enq_task_proxy);
2007 }
2008 
2009 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2010   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2011 }
2012 
2013 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2014   if (has_overflown()) {
2015     // Skip processing the discovered references if we have
2016     // overflown the global marking stack. Reference objects
2017     // only get discovered once so it is OK to not
2018     // de-populate the discovered reference lists. We could have,
2019     // but the only benefit would be that, when marking restarts,
2020     // less reference objects are discovered.
2021     return;
2022   }
2023 
2024   ResourceMark rm;
2025   HandleMark   hm;
2026 
2027   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2028 
2029   // Is alive closure.
2030   G1CMIsAliveClosure g1_is_alive(g1h);
2031 
2032   // Inner scope to exclude the cleaning of the string and symbol
2033   // tables from the displayed time.
2034   {
2035     GCTraceTime(Debug, gc) trace("Reference Processing", g1h->gc_timer_cm());
2036 
2037     ReferenceProcessor* rp = g1h->ref_processor_cm();
2038 
2039     // See the comment in G1CollectedHeap::ref_processing_init()
2040     // about how reference processing currently works in G1.
2041 
2042     // Set the soft reference policy
2043     rp->setup_policy(clear_all_soft_refs);
2044     assert(_markStack.isEmpty(), "mark stack should be empty");
2045 
2046     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2047     // in serial reference processing. Note these closures are also
2048     // used for serially processing (by the the current thread) the
2049     // JNI references during parallel reference processing.
2050     //
2051     // These closures do not need to synchronize with the worker
2052     // threads involved in parallel reference processing as these
2053     // instances are executed serially by the current thread (e.g.
2054     // reference processing is not multi-threaded and is thus
2055     // performed by the current thread instead of a gang worker).
2056     //
2057     // The gang tasks involved in parallel reference processing create
2058     // their own instances of these closures, which do their own
2059     // synchronization among themselves.
2060     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2061     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2062 
2063     // We need at least one active thread. If reference processing
2064     // is not multi-threaded we use the current (VMThread) thread,
2065     // otherwise we use the work gang from the G1CollectedHeap and
2066     // we utilize all the worker threads we can.
2067     bool processing_is_mt = rp->processing_is_mt();
2068     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2069     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2070 
2071     // Parallel processing task executor.
2072     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2073                                               g1h->workers(), active_workers);
2074     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2075 
2076     // Set the concurrency level. The phase was already set prior to
2077     // executing the remark task.
2078     set_concurrency(active_workers);
2079 
2080     // Set the degree of MT processing here.  If the discovery was done MT,
2081     // the number of threads involved during discovery could differ from
2082     // the number of active workers.  This is OK as long as the discovered
2083     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2084     rp->set_active_mt_degree(active_workers);
2085 
2086     // Process the weak references.
2087     const ReferenceProcessorStats& stats =
2088         rp->process_discovered_references(&g1_is_alive,
2089                                           &g1_keep_alive,
2090                                           &g1_drain_mark_stack,
2091                                           executor,
2092                                           g1h->gc_timer_cm());
2093     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2094 
2095     // The do_oop work routines of the keep_alive and drain_marking_stack
2096     // oop closures will set the has_overflown flag if we overflow the
2097     // global marking stack.
2098 
2099     assert(_markStack.overflow() || _markStack.isEmpty(),
2100             "mark stack should be empty (unless it overflowed)");
2101 
2102     if (_markStack.overflow()) {
2103       // This should have been done already when we tried to push an
2104       // entry on to the global mark stack. But let's do it again.
2105       set_has_overflown();
2106     }
2107 
2108     assert(rp->num_q() == active_workers, "why not");
2109 
2110     rp->enqueue_discovered_references(executor);
2111 
2112     rp->verify_no_references_recorded();
2113     assert(!rp->discovery_enabled(), "Post condition");
2114   }
2115 
2116   if (has_overflown()) {
2117     // We can not trust g1_is_alive if the marking stack overflowed
2118     return;
2119   }
2120 
2121   assert(_markStack.isEmpty(), "Marking should have completed");
2122 
2123   // Unload Klasses, String, Symbols, Code Cache, etc.
2124   {
2125     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2126 
2127     if (ClassUnloadingWithConcurrentMark) {
2128       bool purged_classes;
2129 
2130       {
2131         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2132         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2133       }
2134 
2135       {
2136         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2137         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2138       }
2139     }
2140 
2141     if (G1StringDedup::is_enabled()) {
2142       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2143       G1StringDedup::unlink(&g1_is_alive);
2144     }
2145   }
2146 }
2147 
2148 void G1ConcurrentMark::swapMarkBitMaps() {
2149   G1CMBitMapRO* temp = _prevMarkBitMap;
2150   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
2151   _nextMarkBitMap    = (G1CMBitMap*)  temp;
2152 }
2153 
2154 // Closure for marking entries in SATB buffers.
2155 class G1CMSATBBufferClosure : public SATBBufferClosure {
2156 private:
2157   G1CMTask* _task;
2158   G1CollectedHeap* _g1h;
2159 
2160   // This is very similar to G1CMTask::deal_with_reference, but with
2161   // more relaxed requirements for the argument, so this must be more
2162   // circumspect about treating the argument as an object.
2163   void do_entry(void* entry) const {
2164     _task->increment_refs_reached();
2165     HeapRegion* hr = _g1h->heap_region_containing(entry);
2166     if (entry < hr->next_top_at_mark_start()) {
2167       // Until we get here, we don't know whether entry refers to a valid
2168       // object; it could instead have been a stale reference.
2169       oop obj = static_cast<oop>(entry);
2170       assert(obj->is_oop(true /* ignore mark word */),
2171              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2172       _task->make_reference_grey(obj, hr);
2173     }
2174   }
2175 
2176 public:
2177   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
2178     : _task(task), _g1h(g1h) { }
2179 
2180   virtual void do_buffer(void** buffer, size_t size) {
2181     for (size_t i = 0; i < size; ++i) {
2182       do_entry(buffer[i]);
2183     }
2184   }
2185 };
2186 
2187 class G1RemarkThreadsClosure : public ThreadClosure {
2188   G1CMSATBBufferClosure _cm_satb_cl;
2189   G1CMOopClosure _cm_cl;
2190   MarkingCodeBlobClosure _code_cl;
2191   int _thread_parity;
2192 
2193  public:
2194   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
2195     _cm_satb_cl(task, g1h),
2196     _cm_cl(g1h, g1h->concurrent_mark(), task),
2197     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2198     _thread_parity(Threads::thread_claim_parity()) {}
2199 
2200   void do_thread(Thread* thread) {
2201     if (thread->is_Java_thread()) {
2202       if (thread->claim_oops_do(true, _thread_parity)) {
2203         JavaThread* jt = (JavaThread*)thread;
2204 
2205         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2206         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2207         // * Alive if on the stack of an executing method
2208         // * Weakly reachable otherwise
2209         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2210         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2211         jt->nmethods_do(&_code_cl);
2212 
2213         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2214       }
2215     } else if (thread->is_VM_thread()) {
2216       if (thread->claim_oops_do(true, _thread_parity)) {
2217         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2218       }
2219     }
2220   }
2221 };
2222 
2223 class G1CMRemarkTask: public AbstractGangTask {
2224 private:
2225   G1ConcurrentMark* _cm;
2226 public:
2227   void work(uint worker_id) {
2228     // Since all available tasks are actually started, we should
2229     // only proceed if we're supposed to be active.
2230     if (worker_id < _cm->active_tasks()) {
2231       G1CMTask* task = _cm->task(worker_id);
2232       task->record_start_time();
2233       {
2234         ResourceMark rm;
2235         HandleMark hm;
2236 
2237         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2238         Threads::threads_do(&threads_f);
2239       }
2240 
2241       do {
2242         task->do_marking_step(1000000000.0 /* something very large */,
2243                               true         /* do_termination       */,
2244                               false        /* is_serial            */);
2245       } while (task->has_aborted() && !_cm->has_overflown());
2246       // If we overflow, then we do not want to restart. We instead
2247       // want to abort remark and do concurrent marking again.
2248       task->record_end_time();
2249     }
2250   }
2251 
2252   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
2253     AbstractGangTask("Par Remark"), _cm(cm) {
2254     _cm->terminator()->reset_for_reuse(active_workers);
2255   }
2256 };
2257 
2258 void G1ConcurrentMark::checkpointRootsFinalWork() {
2259   ResourceMark rm;
2260   HandleMark   hm;
2261   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2262 
2263   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2264 
2265   g1h->ensure_parsability(false);
2266 
2267   // this is remark, so we'll use up all active threads
2268   uint active_workers = g1h->workers()->active_workers();
2269   set_concurrency_and_phase(active_workers, false /* concurrent */);
2270   // Leave _parallel_marking_threads at it's
2271   // value originally calculated in the G1ConcurrentMark
2272   // constructor and pass values of the active workers
2273   // through the gang in the task.
2274 
2275   {
2276     StrongRootsScope srs(active_workers);
2277 
2278     G1CMRemarkTask remarkTask(this, active_workers);
2279     // We will start all available threads, even if we decide that the
2280     // active_workers will be fewer. The extra ones will just bail out
2281     // immediately.
2282     g1h->workers()->run_task(&remarkTask);
2283   }
2284 
2285   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2286   guarantee(has_overflown() ||
2287             satb_mq_set.completed_buffers_num() == 0,
2288             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
2289             BOOL_TO_STR(has_overflown()),
2290             satb_mq_set.completed_buffers_num());
2291 
2292   print_stats();
2293 }
2294 
2295 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2296   // Note we are overriding the read-only view of the prev map here, via
2297   // the cast.
2298   ((G1CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2299 }
2300 
2301 HeapRegion*
2302 G1ConcurrentMark::claim_region(uint worker_id) {
2303   // "checkpoint" the finger
2304   HeapWord* finger = _finger;
2305 
2306   // _heap_end will not change underneath our feet; it only changes at
2307   // yield points.
2308   while (finger < _heap_end) {
2309     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2310 
2311     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2312 
2313     // Above heap_region_containing may return NULL as we always scan claim
2314     // until the end of the heap. In this case, just jump to the next region.
2315     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2316 
2317     // Is the gap between reading the finger and doing the CAS too long?
2318     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2319     if (res == finger && curr_region != NULL) {
2320       // we succeeded
2321       HeapWord*   bottom        = curr_region->bottom();
2322       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2323 
2324       // notice that _finger == end cannot be guaranteed here since,
2325       // someone else might have moved the finger even further
2326       assert(_finger >= end, "the finger should have moved forward");
2327 
2328       if (limit > bottom) {
2329         return curr_region;
2330       } else {
2331         assert(limit == bottom,
2332                "the region limit should be at bottom");
2333         // we return NULL and the caller should try calling
2334         // claim_region() again.
2335         return NULL;
2336       }
2337     } else {
2338       assert(_finger > finger, "the finger should have moved forward");
2339       // read it again
2340       finger = _finger;
2341     }
2342   }
2343 
2344   return NULL;
2345 }
2346 
2347 #ifndef PRODUCT
2348 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2349 private:
2350   G1CollectedHeap* _g1h;
2351   const char* _phase;
2352   int _info;
2353 
2354 public:
2355   VerifyNoCSetOops(const char* phase, int info = -1) :
2356     _g1h(G1CollectedHeap::heap()),
2357     _phase(phase),
2358     _info(info)
2359   { }
2360 
2361   void operator()(oop obj) const {
2362     guarantee(obj->is_oop(),
2363               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2364               p2i(obj), _phase, _info);
2365     guarantee(!_g1h->obj_in_cs(obj),
2366               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2367               p2i(obj), _phase, _info);
2368   }
2369 };
2370 
2371 void G1ConcurrentMark::verify_no_cset_oops() {
2372   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2373   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2374     return;
2375   }
2376 
2377   // Verify entries on the global mark stack
2378   _markStack.iterate(VerifyNoCSetOops("Stack"));
2379 
2380   // Verify entries on the task queues
2381   for (uint i = 0; i < _max_worker_id; ++i) {
2382     G1CMTaskQueue* queue = _task_queues->queue(i);
2383     queue->iterate(VerifyNoCSetOops("Queue", i));
2384   }
2385 
2386   // Verify the global finger
2387   HeapWord* global_finger = finger();
2388   if (global_finger != NULL && global_finger < _heap_end) {
2389     // Since we always iterate over all regions, we might get a NULL HeapRegion
2390     // here.
2391     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2392     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2393               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2394               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2395   }
2396 
2397   // Verify the task fingers
2398   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2399   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2400     G1CMTask* task = _tasks[i];
2401     HeapWord* task_finger = task->finger();
2402     if (task_finger != NULL && task_finger < _heap_end) {
2403       // See above note on the global finger verification.
2404       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2405       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2406                 !task_hr->in_collection_set(),
2407                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2408                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2409     }
2410   }
2411 }
2412 #endif // PRODUCT
2413 
2414 // Aggregate the counting data that was constructed concurrently
2415 // with marking.
2416 class AggregateCountDataHRClosure: public HeapRegionClosure {
2417   G1CollectedHeap* _g1h;
2418   G1ConcurrentMark* _cm;
2419   CardTableModRefBS* _ct_bs;
2420   BitMap* _cm_card_bm;
2421   uint _max_worker_id;
2422 
2423  public:
2424   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2425                               BitMap* cm_card_bm,
2426                               uint max_worker_id) :
2427     _g1h(g1h), _cm(g1h->concurrent_mark()),
2428     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2429     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2430 
2431   void doHeapRegion(HeapRegion* hr) {
2432     HeapWord* start = hr->bottom();
2433     HeapWord* limit = hr->next_top_at_mark_start();
2434     HeapWord* end = hr->end();
2435 
2436     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2437            "Preconditions not met - "
2438            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2439            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2440            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2441 
2442     assert(hr->next_marked_bytes() == 0, "Precondition");
2443 
2444     if (start == limit) {
2445       // NTAMS of this region has not been set so nothing to do.
2446       return;
2447     }
2448 
2449     // 'start' should be in the heap.
2450     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2451     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2452     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2453 
2454     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2455     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2456     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2457 
2458     // If ntams is not card aligned then we bump card bitmap index
2459     // for limit so that we get the all the cards spanned by
2460     // the object ending at ntams.
2461     // Note: if this is the last region in the heap then ntams
2462     // could be actually just beyond the end of the the heap;
2463     // limit_idx will then  correspond to a (non-existent) card
2464     // that is also outside the heap.
2465     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2466       limit_idx += 1;
2467     }
2468 
2469     assert(limit_idx <= end_idx, "or else use atomics");
2470 
2471     // Aggregate the "stripe" in the count data associated with hr.
2472     uint hrm_index = hr->hrm_index();
2473     size_t marked_bytes = 0;
2474 
2475     for (uint i = 0; i < _max_worker_id; i += 1) {
2476       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2477       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2478 
2479       // Fetch the marked_bytes in this region for task i and
2480       // add it to the running total for this region.
2481       marked_bytes += marked_bytes_array[hrm_index];
2482 
2483       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2484       // into the global card bitmap.
2485       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2486 
2487       while (scan_idx < limit_idx) {
2488         assert(task_card_bm->at(scan_idx) == true, "should be");
2489         _cm_card_bm->set_bit(scan_idx);
2490         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2491 
2492         // BitMap::get_next_one_offset() can handle the case when
2493         // its left_offset parameter is greater than its right_offset
2494         // parameter. It does, however, have an early exit if
2495         // left_offset == right_offset. So let's limit the value
2496         // passed in for left offset here.
2497         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2498         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2499       }
2500     }
2501 
2502     // Update the marked bytes for this region.
2503     hr->add_to_marked_bytes(marked_bytes);
2504   }
2505 };
2506 
2507 class G1AggregateCountDataTask: public AbstractGangTask {
2508 protected:
2509   G1CollectedHeap* _g1h;
2510   G1ConcurrentMark* _cm;
2511   BitMap* _cm_card_bm;
2512   uint _max_worker_id;
2513   uint _active_workers;
2514   HeapRegionClaimer _hrclaimer;
2515 
2516 public:
2517   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2518                            G1ConcurrentMark* cm,
2519                            BitMap* cm_card_bm,
2520                            uint max_worker_id,
2521                            uint n_workers) :
2522       AbstractGangTask("Count Aggregation"),
2523       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2524       _max_worker_id(max_worker_id),
2525       _active_workers(n_workers),
2526       _hrclaimer(_active_workers) {
2527   }
2528 
2529   void work(uint worker_id) {
2530     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2531 
2532     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2533   }
2534 };
2535 
2536 
2537 void G1ConcurrentMark::aggregate_count_data() {
2538   uint n_workers = _g1h->workers()->active_workers();
2539 
2540   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2541                                            _max_worker_id, n_workers);
2542 
2543   _g1h->workers()->run_task(&g1_par_agg_task);
2544 }
2545 
2546 // Clear the per-worker arrays used to store the per-region counting data
2547 void G1ConcurrentMark::clear_all_count_data() {
2548   // Clear the global card bitmap - it will be filled during
2549   // liveness count aggregation (during remark) and the
2550   // final counting task.
2551   _card_bm.clear();
2552 
2553   // Clear the global region bitmap - it will be filled as part
2554   // of the final counting task.
2555   _region_bm.clear();
2556 
2557   uint max_regions = _g1h->max_regions();
2558   assert(_max_worker_id > 0, "uninitialized");
2559 
2560   for (uint i = 0; i < _max_worker_id; i += 1) {
2561     BitMap* task_card_bm = count_card_bitmap_for(i);
2562     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2563 
2564     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2565     assert(marked_bytes_array != NULL, "uninitialized");
2566 
2567     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2568     task_card_bm->clear();
2569   }
2570 }
2571 
2572 void G1ConcurrentMark::print_stats() {
2573   if (!log_is_enabled(Debug, gc, stats)) {
2574     return;
2575   }
2576   log_debug(gc, stats)("---------------------------------------------------------------------");
2577   for (size_t i = 0; i < _active_tasks; ++i) {
2578     _tasks[i]->print_stats();
2579     log_debug(gc, stats)("---------------------------------------------------------------------");
2580   }
2581 }
2582 
2583 // abandon current marking iteration due to a Full GC
2584 void G1ConcurrentMark::abort() {
2585   if (!cmThread()->during_cycle() || _has_aborted) {
2586     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2587     return;
2588   }
2589 
2590   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2591   // concurrent bitmap clearing.
2592   _nextMarkBitMap->clearAll();
2593 
2594   // Note we cannot clear the previous marking bitmap here
2595   // since VerifyDuringGC verifies the objects marked during
2596   // a full GC against the previous bitmap.
2597 
2598   // Clear the liveness counting data
2599   clear_all_count_data();
2600   // Empty mark stack
2601   reset_marking_state();
2602   for (uint i = 0; i < _max_worker_id; ++i) {
2603     _tasks[i]->clear_region_fields();
2604   }
2605   _first_overflow_barrier_sync.abort();
2606   _second_overflow_barrier_sync.abort();
2607   _has_aborted = true;
2608 
2609   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2610   satb_mq_set.abandon_partial_marking();
2611   // This can be called either during or outside marking, we'll read
2612   // the expected_active value from the SATB queue set.
2613   satb_mq_set.set_active_all_threads(
2614                                  false, /* new active value */
2615                                  satb_mq_set.is_active() /* expected_active */);
2616 
2617   _g1h->trace_heap_after_concurrent_cycle();
2618 
2619   _g1h->register_concurrent_cycle_end();
2620 }
2621 
2622 static void print_ms_time_info(const char* prefix, const char* name,
2623                                NumberSeq& ns) {
2624   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2625                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2626   if (ns.num() > 0) {
2627     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2628                            prefix, ns.sd(), ns.maximum());
2629   }
2630 }
2631 
2632 void G1ConcurrentMark::print_summary_info() {
2633   LogHandle(gc, marking) log;
2634   if (!log.is_trace()) {
2635     return;
2636   }
2637 
2638   log.trace(" Concurrent marking:");
2639   print_ms_time_info("  ", "init marks", _init_times);
2640   print_ms_time_info("  ", "remarks", _remark_times);
2641   {
2642     print_ms_time_info("     ", "final marks", _remark_mark_times);
2643     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2644 
2645   }
2646   print_ms_time_info("  ", "cleanups", _cleanup_times);
2647   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2648             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2649   if (G1ScrubRemSets) {
2650     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2651               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2652   }
2653   log.trace("  Total stop_world time = %8.2f s.",
2654             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2655   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2656             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2657 }
2658 
2659 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2660   _parallel_workers->print_worker_threads_on(st);
2661 }
2662 
2663 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2664   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2665       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2666   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2667   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2668 }
2669 
2670 // We take a break if someone is trying to stop the world.
2671 bool G1ConcurrentMark::do_yield_check(uint worker_id) {
2672   if (SuspendibleThreadSet::should_yield()) {
2673     SuspendibleThreadSet::yield();
2674     return true;
2675   } else {
2676     return false;
2677   }
2678 }
2679 
2680 // Closure for iteration over bitmaps
2681 class G1CMBitMapClosure : public BitMapClosure {
2682 private:
2683   // the bitmap that is being iterated over
2684   G1CMBitMap*                 _nextMarkBitMap;
2685   G1ConcurrentMark*           _cm;
2686   G1CMTask*                   _task;
2687 
2688 public:
2689   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2690     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2691 
2692   bool do_bit(size_t offset) {
2693     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2694     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2695     assert( addr < _cm->finger(), "invariant");
2696     assert(addr >= _task->finger(), "invariant");
2697 
2698     // We move that task's local finger along.
2699     _task->move_finger_to(addr);
2700 
2701     _task->scan_object(oop(addr));
2702     // we only partially drain the local queue and global stack
2703     _task->drain_local_queue(true);
2704     _task->drain_global_stack(true);
2705 
2706     // if the has_aborted flag has been raised, we need to bail out of
2707     // the iteration
2708     return !_task->has_aborted();
2709   }
2710 };
2711 
2712 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2713   ReferenceProcessor* result = g1h->ref_processor_cm();
2714   assert(result != NULL, "CM reference processor should not be NULL");
2715   return result;
2716 }
2717 
2718 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2719                                G1ConcurrentMark* cm,
2720                                G1CMTask* task)
2721   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2722     _g1h(g1h), _cm(cm), _task(task)
2723 { }
2724 
2725 void G1CMTask::setup_for_region(HeapRegion* hr) {
2726   assert(hr != NULL,
2727         "claim_region() should have filtered out NULL regions");
2728   _curr_region  = hr;
2729   _finger       = hr->bottom();
2730   update_region_limit();
2731 }
2732 
2733 void G1CMTask::update_region_limit() {
2734   HeapRegion* hr            = _curr_region;
2735   HeapWord* bottom          = hr->bottom();
2736   HeapWord* limit           = hr->next_top_at_mark_start();
2737 
2738   if (limit == bottom) {
2739     // The region was collected underneath our feet.
2740     // We set the finger to bottom to ensure that the bitmap
2741     // iteration that will follow this will not do anything.
2742     // (this is not a condition that holds when we set the region up,
2743     // as the region is not supposed to be empty in the first place)
2744     _finger = bottom;
2745   } else if (limit >= _region_limit) {
2746     assert(limit >= _finger, "peace of mind");
2747   } else {
2748     assert(limit < _region_limit, "only way to get here");
2749     // This can happen under some pretty unusual circumstances.  An
2750     // evacuation pause empties the region underneath our feet (NTAMS
2751     // at bottom). We then do some allocation in the region (NTAMS
2752     // stays at bottom), followed by the region being used as a GC
2753     // alloc region (NTAMS will move to top() and the objects
2754     // originally below it will be grayed). All objects now marked in
2755     // the region are explicitly grayed, if below the global finger,
2756     // and we do not need in fact to scan anything else. So, we simply
2757     // set _finger to be limit to ensure that the bitmap iteration
2758     // doesn't do anything.
2759     _finger = limit;
2760   }
2761 
2762   _region_limit = limit;
2763 }
2764 
2765 void G1CMTask::giveup_current_region() {
2766   assert(_curr_region != NULL, "invariant");
2767   clear_region_fields();
2768 }
2769 
2770 void G1CMTask::clear_region_fields() {
2771   // Values for these three fields that indicate that we're not
2772   // holding on to a region.
2773   _curr_region   = NULL;
2774   _finger        = NULL;
2775   _region_limit  = NULL;
2776 }
2777 
2778 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2779   if (cm_oop_closure == NULL) {
2780     assert(_cm_oop_closure != NULL, "invariant");
2781   } else {
2782     assert(_cm_oop_closure == NULL, "invariant");
2783   }
2784   _cm_oop_closure = cm_oop_closure;
2785 }
2786 
2787 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2788   guarantee(nextMarkBitMap != NULL, "invariant");
2789   _nextMarkBitMap                = nextMarkBitMap;
2790   clear_region_fields();
2791 
2792   _calls                         = 0;
2793   _elapsed_time_ms               = 0.0;
2794   _termination_time_ms           = 0.0;
2795   _termination_start_time_ms     = 0.0;
2796 }
2797 
2798 bool G1CMTask::should_exit_termination() {
2799   regular_clock_call();
2800   // This is called when we are in the termination protocol. We should
2801   // quit if, for some reason, this task wants to abort or the global
2802   // stack is not empty (this means that we can get work from it).
2803   return !_cm->mark_stack_empty() || has_aborted();
2804 }
2805 
2806 void G1CMTask::reached_limit() {
2807   assert(_words_scanned >= _words_scanned_limit ||
2808          _refs_reached >= _refs_reached_limit ,
2809          "shouldn't have been called otherwise");
2810   regular_clock_call();
2811 }
2812 
2813 void G1CMTask::regular_clock_call() {
2814   if (has_aborted()) return;
2815 
2816   // First, we need to recalculate the words scanned and refs reached
2817   // limits for the next clock call.
2818   recalculate_limits();
2819 
2820   // During the regular clock call we do the following
2821 
2822   // (1) If an overflow has been flagged, then we abort.
2823   if (_cm->has_overflown()) {
2824     set_has_aborted();
2825     return;
2826   }
2827 
2828   // If we are not concurrent (i.e. we're doing remark) we don't need
2829   // to check anything else. The other steps are only needed during
2830   // the concurrent marking phase.
2831   if (!concurrent()) return;
2832 
2833   // (2) If marking has been aborted for Full GC, then we also abort.
2834   if (_cm->has_aborted()) {
2835     set_has_aborted();
2836     return;
2837   }
2838 
2839   double curr_time_ms = os::elapsedVTime() * 1000.0;
2840 
2841   // (4) We check whether we should yield. If we have to, then we abort.
2842   if (SuspendibleThreadSet::should_yield()) {
2843     // We should yield. To do this we abort the task. The caller is
2844     // responsible for yielding.
2845     set_has_aborted();
2846     return;
2847   }
2848 
2849   // (5) We check whether we've reached our time quota. If we have,
2850   // then we abort.
2851   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2852   if (elapsed_time_ms > _time_target_ms) {
2853     set_has_aborted();
2854     _has_timed_out = true;
2855     return;
2856   }
2857 
2858   // (6) Finally, we check whether there are enough completed STAB
2859   // buffers available for processing. If there are, we abort.
2860   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2861   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2862     // we do need to process SATB buffers, we'll abort and restart
2863     // the marking task to do so
2864     set_has_aborted();
2865     return;
2866   }
2867 }
2868 
2869 void G1CMTask::recalculate_limits() {
2870   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2871   _words_scanned_limit      = _real_words_scanned_limit;
2872 
2873   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2874   _refs_reached_limit       = _real_refs_reached_limit;
2875 }
2876 
2877 void G1CMTask::decrease_limits() {
2878   // This is called when we believe that we're going to do an infrequent
2879   // operation which will increase the per byte scanned cost (i.e. move
2880   // entries to/from the global stack). It basically tries to decrease the
2881   // scanning limit so that the clock is called earlier.
2882 
2883   _words_scanned_limit = _real_words_scanned_limit -
2884     3 * words_scanned_period / 4;
2885   _refs_reached_limit  = _real_refs_reached_limit -
2886     3 * refs_reached_period / 4;
2887 }
2888 
2889 void G1CMTask::move_entries_to_global_stack() {
2890   // local array where we'll store the entries that will be popped
2891   // from the local queue
2892   oop buffer[global_stack_transfer_size];
2893 
2894   int n = 0;
2895   oop obj;
2896   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2897     buffer[n] = obj;
2898     ++n;
2899   }
2900 
2901   if (n > 0) {
2902     // we popped at least one entry from the local queue
2903 
2904     if (!_cm->mark_stack_push(buffer, n)) {
2905       set_has_aborted();
2906     }
2907   }
2908 
2909   // this operation was quite expensive, so decrease the limits
2910   decrease_limits();
2911 }
2912 
2913 void G1CMTask::get_entries_from_global_stack() {
2914   // local array where we'll store the entries that will be popped
2915   // from the global stack.
2916   oop buffer[global_stack_transfer_size];
2917   int n;
2918   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2919   assert(n <= global_stack_transfer_size,
2920          "we should not pop more than the given limit");
2921   if (n > 0) {
2922     // yes, we did actually pop at least one entry
2923     for (int i = 0; i < n; ++i) {
2924       bool success = _task_queue->push(buffer[i]);
2925       // We only call this when the local queue is empty or under a
2926       // given target limit. So, we do not expect this push to fail.
2927       assert(success, "invariant");
2928     }
2929   }
2930 
2931   // this operation was quite expensive, so decrease the limits
2932   decrease_limits();
2933 }
2934 
2935 void G1CMTask::drain_local_queue(bool partially) {
2936   if (has_aborted()) return;
2937 
2938   // Decide what the target size is, depending whether we're going to
2939   // drain it partially (so that other tasks can steal if they run out
2940   // of things to do) or totally (at the very end).
2941   size_t target_size;
2942   if (partially) {
2943     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2944   } else {
2945     target_size = 0;
2946   }
2947 
2948   if (_task_queue->size() > target_size) {
2949     oop obj;
2950     bool ret = _task_queue->pop_local(obj);
2951     while (ret) {
2952       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2953       assert(!_g1h->is_on_master_free_list(
2954                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2955 
2956       scan_object(obj);
2957 
2958       if (_task_queue->size() <= target_size || has_aborted()) {
2959         ret = false;
2960       } else {
2961         ret = _task_queue->pop_local(obj);
2962       }
2963     }
2964   }
2965 }
2966 
2967 void G1CMTask::drain_global_stack(bool partially) {
2968   if (has_aborted()) return;
2969 
2970   // We have a policy to drain the local queue before we attempt to
2971   // drain the global stack.
2972   assert(partially || _task_queue->size() == 0, "invariant");
2973 
2974   // Decide what the target size is, depending whether we're going to
2975   // drain it partially (so that other tasks can steal if they run out
2976   // of things to do) or totally (at the very end).  Notice that,
2977   // because we move entries from the global stack in chunks or
2978   // because another task might be doing the same, we might in fact
2979   // drop below the target. But, this is not a problem.
2980   size_t target_size;
2981   if (partially) {
2982     target_size = _cm->partial_mark_stack_size_target();
2983   } else {
2984     target_size = 0;
2985   }
2986 
2987   if (_cm->mark_stack_size() > target_size) {
2988     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2989       get_entries_from_global_stack();
2990       drain_local_queue(partially);
2991     }
2992   }
2993 }
2994 
2995 // SATB Queue has several assumptions on whether to call the par or
2996 // non-par versions of the methods. this is why some of the code is
2997 // replicated. We should really get rid of the single-threaded version
2998 // of the code to simplify things.
2999 void G1CMTask::drain_satb_buffers() {
3000   if (has_aborted()) return;
3001 
3002   // We set this so that the regular clock knows that we're in the
3003   // middle of draining buffers and doesn't set the abort flag when it
3004   // notices that SATB buffers are available for draining. It'd be
3005   // very counter productive if it did that. :-)
3006   _draining_satb_buffers = true;
3007 
3008   G1CMSATBBufferClosure satb_cl(this, _g1h);
3009   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3010 
3011   // This keeps claiming and applying the closure to completed buffers
3012   // until we run out of buffers or we need to abort.
3013   while (!has_aborted() &&
3014          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3015     regular_clock_call();
3016   }
3017 
3018   _draining_satb_buffers = false;
3019 
3020   assert(has_aborted() ||
3021          concurrent() ||
3022          satb_mq_set.completed_buffers_num() == 0, "invariant");
3023 
3024   // again, this was a potentially expensive operation, decrease the
3025   // limits to get the regular clock call early
3026   decrease_limits();
3027 }
3028 
3029 void G1CMTask::print_stats() {
3030   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3031                        _worker_id, _calls);
3032   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3033                        _elapsed_time_ms, _termination_time_ms);
3034   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3035                        _step_times_ms.num(), _step_times_ms.avg(),
3036                        _step_times_ms.sd());
3037   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3038                        _step_times_ms.maximum(), _step_times_ms.sum());
3039 }
3040 
3041 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3042   return _task_queues->steal(worker_id, hash_seed, obj);
3043 }
3044 
3045 /*****************************************************************************
3046 
3047     The do_marking_step(time_target_ms, ...) method is the building
3048     block of the parallel marking framework. It can be called in parallel
3049     with other invocations of do_marking_step() on different tasks
3050     (but only one per task, obviously) and concurrently with the
3051     mutator threads, or during remark, hence it eliminates the need
3052     for two versions of the code. When called during remark, it will
3053     pick up from where the task left off during the concurrent marking
3054     phase. Interestingly, tasks are also claimable during evacuation
3055     pauses too, since do_marking_step() ensures that it aborts before
3056     it needs to yield.
3057 
3058     The data structures that it uses to do marking work are the
3059     following:
3060 
3061       (1) Marking Bitmap. If there are gray objects that appear only
3062       on the bitmap (this happens either when dealing with an overflow
3063       or when the initial marking phase has simply marked the roots
3064       and didn't push them on the stack), then tasks claim heap
3065       regions whose bitmap they then scan to find gray objects. A
3066       global finger indicates where the end of the last claimed region
3067       is. A local finger indicates how far into the region a task has
3068       scanned. The two fingers are used to determine how to gray an
3069       object (i.e. whether simply marking it is OK, as it will be
3070       visited by a task in the future, or whether it needs to be also
3071       pushed on a stack).
3072 
3073       (2) Local Queue. The local queue of the task which is accessed
3074       reasonably efficiently by the task. Other tasks can steal from
3075       it when they run out of work. Throughout the marking phase, a
3076       task attempts to keep its local queue short but not totally
3077       empty, so that entries are available for stealing by other
3078       tasks. Only when there is no more work, a task will totally
3079       drain its local queue.
3080 
3081       (3) Global Mark Stack. This handles local queue overflow. During
3082       marking only sets of entries are moved between it and the local
3083       queues, as access to it requires a mutex and more fine-grain
3084       interaction with it which might cause contention. If it
3085       overflows, then the marking phase should restart and iterate
3086       over the bitmap to identify gray objects. Throughout the marking
3087       phase, tasks attempt to keep the global mark stack at a small
3088       length but not totally empty, so that entries are available for
3089       popping by other tasks. Only when there is no more work, tasks
3090       will totally drain the global mark stack.
3091 
3092       (4) SATB Buffer Queue. This is where completed SATB buffers are
3093       made available. Buffers are regularly removed from this queue
3094       and scanned for roots, so that the queue doesn't get too
3095       long. During remark, all completed buffers are processed, as
3096       well as the filled in parts of any uncompleted buffers.
3097 
3098     The do_marking_step() method tries to abort when the time target
3099     has been reached. There are a few other cases when the
3100     do_marking_step() method also aborts:
3101 
3102       (1) When the marking phase has been aborted (after a Full GC).
3103 
3104       (2) When a global overflow (on the global stack) has been
3105       triggered. Before the task aborts, it will actually sync up with
3106       the other tasks to ensure that all the marking data structures
3107       (local queues, stacks, fingers etc.)  are re-initialized so that
3108       when do_marking_step() completes, the marking phase can
3109       immediately restart.
3110 
3111       (3) When enough completed SATB buffers are available. The
3112       do_marking_step() method only tries to drain SATB buffers right
3113       at the beginning. So, if enough buffers are available, the
3114       marking step aborts and the SATB buffers are processed at
3115       the beginning of the next invocation.
3116 
3117       (4) To yield. when we have to yield then we abort and yield
3118       right at the end of do_marking_step(). This saves us from a lot
3119       of hassle as, by yielding we might allow a Full GC. If this
3120       happens then objects will be compacted underneath our feet, the
3121       heap might shrink, etc. We save checking for this by just
3122       aborting and doing the yield right at the end.
3123 
3124     From the above it follows that the do_marking_step() method should
3125     be called in a loop (or, otherwise, regularly) until it completes.
3126 
3127     If a marking step completes without its has_aborted() flag being
3128     true, it means it has completed the current marking phase (and
3129     also all other marking tasks have done so and have all synced up).
3130 
3131     A method called regular_clock_call() is invoked "regularly" (in
3132     sub ms intervals) throughout marking. It is this clock method that
3133     checks all the abort conditions which were mentioned above and
3134     decides when the task should abort. A work-based scheme is used to
3135     trigger this clock method: when the number of object words the
3136     marking phase has scanned or the number of references the marking
3137     phase has visited reach a given limit. Additional invocations to
3138     the method clock have been planted in a few other strategic places
3139     too. The initial reason for the clock method was to avoid calling
3140     vtime too regularly, as it is quite expensive. So, once it was in
3141     place, it was natural to piggy-back all the other conditions on it
3142     too and not constantly check them throughout the code.
3143 
3144     If do_termination is true then do_marking_step will enter its
3145     termination protocol.
3146 
3147     The value of is_serial must be true when do_marking_step is being
3148     called serially (i.e. by the VMThread) and do_marking_step should
3149     skip any synchronization in the termination and overflow code.
3150     Examples include the serial remark code and the serial reference
3151     processing closures.
3152 
3153     The value of is_serial must be false when do_marking_step is
3154     being called by any of the worker threads in a work gang.
3155     Examples include the concurrent marking code (CMMarkingTask),
3156     the MT remark code, and the MT reference processing closures.
3157 
3158  *****************************************************************************/
3159 
3160 void G1CMTask::do_marking_step(double time_target_ms,
3161                                bool do_termination,
3162                                bool is_serial) {
3163   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3164   assert(concurrent() == _cm->concurrent(), "they should be the same");
3165 
3166   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3167   assert(_task_queues != NULL, "invariant");
3168   assert(_task_queue != NULL, "invariant");
3169   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3170 
3171   assert(!_claimed,
3172          "only one thread should claim this task at any one time");
3173 
3174   // OK, this doesn't safeguard again all possible scenarios, as it is
3175   // possible for two threads to set the _claimed flag at the same
3176   // time. But it is only for debugging purposes anyway and it will
3177   // catch most problems.
3178   _claimed = true;
3179 
3180   _start_time_ms = os::elapsedVTime() * 1000.0;
3181 
3182   // If do_stealing is true then do_marking_step will attempt to
3183   // steal work from the other G1CMTasks. It only makes sense to
3184   // enable stealing when the termination protocol is enabled
3185   // and do_marking_step() is not being called serially.
3186   bool do_stealing = do_termination && !is_serial;
3187 
3188   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3189   _time_target_ms = time_target_ms - diff_prediction_ms;
3190 
3191   // set up the variables that are used in the work-based scheme to
3192   // call the regular clock method
3193   _words_scanned = 0;
3194   _refs_reached  = 0;
3195   recalculate_limits();
3196 
3197   // clear all flags
3198   clear_has_aborted();
3199   _has_timed_out = false;
3200   _draining_satb_buffers = false;
3201 
3202   ++_calls;
3203 
3204   // Set up the bitmap and oop closures. Anything that uses them is
3205   // eventually called from this method, so it is OK to allocate these
3206   // statically.
3207   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3208   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
3209   set_cm_oop_closure(&cm_oop_closure);
3210 
3211   if (_cm->has_overflown()) {
3212     // This can happen if the mark stack overflows during a GC pause
3213     // and this task, after a yield point, restarts. We have to abort
3214     // as we need to get into the overflow protocol which happens
3215     // right at the end of this task.
3216     set_has_aborted();
3217   }
3218 
3219   // First drain any available SATB buffers. After this, we will not
3220   // look at SATB buffers before the next invocation of this method.
3221   // If enough completed SATB buffers are queued up, the regular clock
3222   // will abort this task so that it restarts.
3223   drain_satb_buffers();
3224   // ...then partially drain the local queue and the global stack
3225   drain_local_queue(true);
3226   drain_global_stack(true);
3227 
3228   do {
3229     if (!has_aborted() && _curr_region != NULL) {
3230       // This means that we're already holding on to a region.
3231       assert(_finger != NULL, "if region is not NULL, then the finger "
3232              "should not be NULL either");
3233 
3234       // We might have restarted this task after an evacuation pause
3235       // which might have evacuated the region we're holding on to
3236       // underneath our feet. Let's read its limit again to make sure
3237       // that we do not iterate over a region of the heap that
3238       // contains garbage (update_region_limit() will also move
3239       // _finger to the start of the region if it is found empty).
3240       update_region_limit();
3241       // We will start from _finger not from the start of the region,
3242       // as we might be restarting this task after aborting half-way
3243       // through scanning this region. In this case, _finger points to
3244       // the address where we last found a marked object. If this is a
3245       // fresh region, _finger points to start().
3246       MemRegion mr = MemRegion(_finger, _region_limit);
3247 
3248       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3249              "humongous regions should go around loop once only");
3250 
3251       // Some special cases:
3252       // If the memory region is empty, we can just give up the region.
3253       // If the current region is humongous then we only need to check
3254       // the bitmap for the bit associated with the start of the object,
3255       // scan the object if it's live, and give up the region.
3256       // Otherwise, let's iterate over the bitmap of the part of the region
3257       // that is left.
3258       // If the iteration is successful, give up the region.
3259       if (mr.is_empty()) {
3260         giveup_current_region();
3261         regular_clock_call();
3262       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3263         if (_nextMarkBitMap->isMarked(mr.start())) {
3264           // The object is marked - apply the closure
3265           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3266           bitmap_closure.do_bit(offset);
3267         }
3268         // Even if this task aborted while scanning the humongous object
3269         // we can (and should) give up the current region.
3270         giveup_current_region();
3271         regular_clock_call();
3272       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3273         giveup_current_region();
3274         regular_clock_call();
3275       } else {
3276         assert(has_aborted(), "currently the only way to do so");
3277         // The only way to abort the bitmap iteration is to return
3278         // false from the do_bit() method. However, inside the
3279         // do_bit() method we move the _finger to point to the
3280         // object currently being looked at. So, if we bail out, we
3281         // have definitely set _finger to something non-null.
3282         assert(_finger != NULL, "invariant");
3283 
3284         // Region iteration was actually aborted. So now _finger
3285         // points to the address of the object we last scanned. If we
3286         // leave it there, when we restart this task, we will rescan
3287         // the object. It is easy to avoid this. We move the finger by
3288         // enough to point to the next possible object header (the
3289         // bitmap knows by how much we need to move it as it knows its
3290         // granularity).
3291         assert(_finger < _region_limit, "invariant");
3292         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3293         // Check if bitmap iteration was aborted while scanning the last object
3294         if (new_finger >= _region_limit) {
3295           giveup_current_region();
3296         } else {
3297           move_finger_to(new_finger);
3298         }
3299       }
3300     }
3301     // At this point we have either completed iterating over the
3302     // region we were holding on to, or we have aborted.
3303 
3304     // We then partially drain the local queue and the global stack.
3305     // (Do we really need this?)
3306     drain_local_queue(true);
3307     drain_global_stack(true);
3308 
3309     // Read the note on the claim_region() method on why it might
3310     // return NULL with potentially more regions available for
3311     // claiming and why we have to check out_of_regions() to determine
3312     // whether we're done or not.
3313     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3314       // We are going to try to claim a new region. We should have
3315       // given up on the previous one.
3316       // Separated the asserts so that we know which one fires.
3317       assert(_curr_region  == NULL, "invariant");
3318       assert(_finger       == NULL, "invariant");
3319       assert(_region_limit == NULL, "invariant");
3320       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3321       if (claimed_region != NULL) {
3322         // Yes, we managed to claim one
3323         setup_for_region(claimed_region);
3324         assert(_curr_region == claimed_region, "invariant");
3325       }
3326       // It is important to call the regular clock here. It might take
3327       // a while to claim a region if, for example, we hit a large
3328       // block of empty regions. So we need to call the regular clock
3329       // method once round the loop to make sure it's called
3330       // frequently enough.
3331       regular_clock_call();
3332     }
3333 
3334     if (!has_aborted() && _curr_region == NULL) {
3335       assert(_cm->out_of_regions(),
3336              "at this point we should be out of regions");
3337     }
3338   } while ( _curr_region != NULL && !has_aborted());
3339 
3340   if (!has_aborted()) {
3341     // We cannot check whether the global stack is empty, since other
3342     // tasks might be pushing objects to it concurrently.
3343     assert(_cm->out_of_regions(),
3344            "at this point we should be out of regions");
3345     // Try to reduce the number of available SATB buffers so that
3346     // remark has less work to do.
3347     drain_satb_buffers();
3348   }
3349 
3350   // Since we've done everything else, we can now totally drain the
3351   // local queue and global stack.
3352   drain_local_queue(false);
3353   drain_global_stack(false);
3354 
3355   // Attempt at work stealing from other task's queues.
3356   if (do_stealing && !has_aborted()) {
3357     // We have not aborted. This means that we have finished all that
3358     // we could. Let's try to do some stealing...
3359 
3360     // We cannot check whether the global stack is empty, since other
3361     // tasks might be pushing objects to it concurrently.
3362     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3363            "only way to reach here");
3364     while (!has_aborted()) {
3365       oop obj;
3366       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3367         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3368                "any stolen object should be marked");
3369         scan_object(obj);
3370 
3371         // And since we're towards the end, let's totally drain the
3372         // local queue and global stack.
3373         drain_local_queue(false);
3374         drain_global_stack(false);
3375       } else {
3376         break;
3377       }
3378     }
3379   }
3380 
3381   // We still haven't aborted. Now, let's try to get into the
3382   // termination protocol.
3383   if (do_termination && !has_aborted()) {
3384     // We cannot check whether the global stack is empty, since other
3385     // tasks might be concurrently pushing objects on it.
3386     // Separated the asserts so that we know which one fires.
3387     assert(_cm->out_of_regions(), "only way to reach here");
3388     assert(_task_queue->size() == 0, "only way to reach here");
3389     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3390 
3391     // The G1CMTask class also extends the TerminatorTerminator class,
3392     // hence its should_exit_termination() method will also decide
3393     // whether to exit the termination protocol or not.
3394     bool finished = (is_serial ||
3395                      _cm->terminator()->offer_termination(this));
3396     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3397     _termination_time_ms +=
3398       termination_end_time_ms - _termination_start_time_ms;
3399 
3400     if (finished) {
3401       // We're all done.
3402 
3403       if (_worker_id == 0) {
3404         // let's allow task 0 to do this
3405         if (concurrent()) {
3406           assert(_cm->concurrent_marking_in_progress(), "invariant");
3407           // we need to set this to false before the next
3408           // safepoint. This way we ensure that the marking phase
3409           // doesn't observe any more heap expansions.
3410           _cm->clear_concurrent_marking_in_progress();
3411         }
3412       }
3413 
3414       // We can now guarantee that the global stack is empty, since
3415       // all other tasks have finished. We separated the guarantees so
3416       // that, if a condition is false, we can immediately find out
3417       // which one.
3418       guarantee(_cm->out_of_regions(), "only way to reach here");
3419       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3420       guarantee(_task_queue->size() == 0, "only way to reach here");
3421       guarantee(!_cm->has_overflown(), "only way to reach here");
3422       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3423     } else {
3424       // Apparently there's more work to do. Let's abort this task. It
3425       // will restart it and we can hopefully find more things to do.
3426       set_has_aborted();
3427     }
3428   }
3429 
3430   // Mainly for debugging purposes to make sure that a pointer to the
3431   // closure which was statically allocated in this frame doesn't
3432   // escape it by accident.
3433   set_cm_oop_closure(NULL);
3434   double end_time_ms = os::elapsedVTime() * 1000.0;
3435   double elapsed_time_ms = end_time_ms - _start_time_ms;
3436   // Update the step history.
3437   _step_times_ms.add(elapsed_time_ms);
3438 
3439   if (has_aborted()) {
3440     // The task was aborted for some reason.
3441     if (_has_timed_out) {
3442       double diff_ms = elapsed_time_ms - _time_target_ms;
3443       // Keep statistics of how well we did with respect to hitting
3444       // our target only if we actually timed out (if we aborted for
3445       // other reasons, then the results might get skewed).
3446       _marking_step_diffs_ms.add(diff_ms);
3447     }
3448 
3449     if (_cm->has_overflown()) {
3450       // This is the interesting one. We aborted because a global
3451       // overflow was raised. This means we have to restart the
3452       // marking phase and start iterating over regions. However, in
3453       // order to do this we have to make sure that all tasks stop
3454       // what they are doing and re-initialize in a safe manner. We
3455       // will achieve this with the use of two barrier sync points.
3456 
3457       if (!is_serial) {
3458         // We only need to enter the sync barrier if being called
3459         // from a parallel context
3460         _cm->enter_first_sync_barrier(_worker_id);
3461 
3462         // When we exit this sync barrier we know that all tasks have
3463         // stopped doing marking work. So, it's now safe to
3464         // re-initialize our data structures. At the end of this method,
3465         // task 0 will clear the global data structures.
3466       }
3467 
3468       // We clear the local state of this task...
3469       clear_region_fields();
3470 
3471       if (!is_serial) {
3472         // ...and enter the second barrier.
3473         _cm->enter_second_sync_barrier(_worker_id);
3474       }
3475       // At this point, if we're during the concurrent phase of
3476       // marking, everything has been re-initialized and we're
3477       // ready to restart.
3478     }
3479   }
3480 
3481   _claimed = false;
3482 }
3483 
3484 G1CMTask::G1CMTask(uint worker_id,
3485                    G1ConcurrentMark* cm,
3486                    size_t* marked_bytes,
3487                    BitMap* card_bm,
3488                    G1CMTaskQueue* task_queue,
3489                    G1CMTaskQueueSet* task_queues)
3490   : _g1h(G1CollectedHeap::heap()),
3491     _worker_id(worker_id), _cm(cm),
3492     _claimed(false),
3493     _nextMarkBitMap(NULL), _hash_seed(17),
3494     _task_queue(task_queue),
3495     _task_queues(task_queues),
3496     _cm_oop_closure(NULL),
3497     _marked_bytes_array(marked_bytes),
3498     _card_bm(card_bm) {
3499   guarantee(task_queue != NULL, "invariant");
3500   guarantee(task_queues != NULL, "invariant");
3501 
3502   _marking_step_diffs_ms.add(0.5);
3503 }
3504 
3505 // These are formatting macros that are used below to ensure
3506 // consistent formatting. The *_H_* versions are used to format the
3507 // header for a particular value and they should be kept consistent
3508 // with the corresponding macro. Also note that most of the macros add
3509 // the necessary white space (as a prefix) which makes them a bit
3510 // easier to compose.
3511 
3512 // All the output lines are prefixed with this string to be able to
3513 // identify them easily in a large log file.
3514 #define G1PPRL_LINE_PREFIX            "###"
3515 
3516 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3517 #ifdef _LP64
3518 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3519 #else // _LP64
3520 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3521 #endif // _LP64
3522 
3523 // For per-region info
3524 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3525 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3526 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3527 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3528 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3529 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3530 
3531 // For summary info
3532 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3533 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3534 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3535 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3536 
3537 G1PrintRegionLivenessInfoClosure::
3538 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3539   : _total_used_bytes(0), _total_capacity_bytes(0),
3540     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3541     _hum_used_bytes(0), _hum_capacity_bytes(0),
3542     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3543     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3544   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3545   MemRegion g1_reserved = g1h->g1_reserved();
3546   double now = os::elapsedTime();
3547 
3548   // Print the header of the output.
3549   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3550   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3551                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3552                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3553                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3554                           HeapRegion::GrainBytes);
3555   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3556   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3557                           G1PPRL_TYPE_H_FORMAT
3558                           G1PPRL_ADDR_BASE_H_FORMAT
3559                           G1PPRL_BYTE_H_FORMAT
3560                           G1PPRL_BYTE_H_FORMAT
3561                           G1PPRL_BYTE_H_FORMAT
3562                           G1PPRL_DOUBLE_H_FORMAT
3563                           G1PPRL_BYTE_H_FORMAT
3564                           G1PPRL_BYTE_H_FORMAT,
3565                           "type", "address-range",
3566                           "used", "prev-live", "next-live", "gc-eff",
3567                           "remset", "code-roots");
3568   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3569                           G1PPRL_TYPE_H_FORMAT
3570                           G1PPRL_ADDR_BASE_H_FORMAT
3571                           G1PPRL_BYTE_H_FORMAT
3572                           G1PPRL_BYTE_H_FORMAT
3573                           G1PPRL_BYTE_H_FORMAT
3574                           G1PPRL_DOUBLE_H_FORMAT
3575                           G1PPRL_BYTE_H_FORMAT
3576                           G1PPRL_BYTE_H_FORMAT,
3577                           "", "",
3578                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3579                           "(bytes)", "(bytes)");
3580 }
3581 
3582 // It takes as a parameter a reference to one of the _hum_* fields, it
3583 // deduces the corresponding value for a region in a humongous region
3584 // series (either the region size, or what's left if the _hum_* field
3585 // is < the region size), and updates the _hum_* field accordingly.
3586 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3587   size_t bytes = 0;
3588   // The > 0 check is to deal with the prev and next live bytes which
3589   // could be 0.
3590   if (*hum_bytes > 0) {
3591     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3592     *hum_bytes -= bytes;
3593   }
3594   return bytes;
3595 }
3596 
3597 // It deduces the values for a region in a humongous region series
3598 // from the _hum_* fields and updates those accordingly. It assumes
3599 // that that _hum_* fields have already been set up from the "starts
3600 // humongous" region and we visit the regions in address order.
3601 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3602                                                      size_t* capacity_bytes,
3603                                                      size_t* prev_live_bytes,
3604                                                      size_t* next_live_bytes) {
3605   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3606   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3607   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3608   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3609   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3610 }
3611 
3612 void G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3613   const char* type       = r->get_type_str();
3614   HeapWord* bottom       = r->bottom();
3615   HeapWord* end          = r->end();
3616   size_t capacity_bytes  = r->capacity();
3617   size_t used_bytes      = r->used();
3618   size_t prev_live_bytes = r->live_bytes();
3619   size_t next_live_bytes = r->next_live_bytes();
3620   double gc_eff          = r->gc_efficiency();
3621   size_t remset_bytes    = r->rem_set()->mem_size();
3622   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3623 
3624   if (r->is_starts_humongous()) {
3625     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3626            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3627            "they should have been zeroed after the last time we used them");
3628     // Set up the _hum_* fields.
3629     _hum_capacity_bytes  = capacity_bytes;
3630     _hum_used_bytes      = used_bytes;
3631     _hum_prev_live_bytes = prev_live_bytes;
3632     _hum_next_live_bytes = next_live_bytes;
3633     get_hum_bytes(&used_bytes, &capacity_bytes,
3634                   &prev_live_bytes, &next_live_bytes);
3635     end = bottom + HeapRegion::GrainWords;
3636   } else if (r->is_continues_humongous()) {
3637     get_hum_bytes(&used_bytes, &capacity_bytes,
3638                   &prev_live_bytes, &next_live_bytes);
3639     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3640   }
3641 
3642   _total_used_bytes      += used_bytes;
3643   _total_capacity_bytes  += capacity_bytes;
3644   _total_prev_live_bytes += prev_live_bytes;
3645   _total_next_live_bytes += next_live_bytes;
3646   _total_remset_bytes    += remset_bytes;
3647   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3648 
3649   // Print a line for this particular region.
3650   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3651                           G1PPRL_TYPE_FORMAT
3652                           G1PPRL_ADDR_BASE_FORMAT
3653                           G1PPRL_BYTE_FORMAT
3654                           G1PPRL_BYTE_FORMAT
3655                           G1PPRL_BYTE_FORMAT
3656                           G1PPRL_DOUBLE_FORMAT
3657                           G1PPRL_BYTE_FORMAT
3658                           G1PPRL_BYTE_FORMAT,
3659                           type, p2i(bottom), p2i(end),
3660                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3661                           remset_bytes, strong_code_roots_bytes);
3662 }
3663 
3664 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3665   // add static memory usages to remembered set sizes
3666   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3667   // Print the footer of the output.
3668   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3669   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3670                          " SUMMARY"
3671                          G1PPRL_SUM_MB_FORMAT("capacity")
3672                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3673                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3674                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3675                          G1PPRL_SUM_MB_FORMAT("remset")
3676                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3677                          bytes_to_mb(_total_capacity_bytes),
3678                          bytes_to_mb(_total_used_bytes),
3679                          perc(_total_used_bytes, _total_capacity_bytes),
3680                          bytes_to_mb(_total_prev_live_bytes),
3681                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3682                          bytes_to_mb(_total_next_live_bytes),
3683                          perc(_total_next_live_bytes, _total_capacity_bytes),
3684                          bytes_to_mb(_total_remset_bytes),
3685                          bytes_to_mb(_total_strong_code_roots_bytes));
3686 }