1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorPolicy.hpp"
  32 #include "gc/g1/g1CollectorState.hpp"
  33 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/g1/suspendibleThreadSet.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/taskqueue.inline.hpp"
  49 #include "gc/shared/vmGCOperations.hpp"
  50 #include "logging/log.hpp"
  51 #include "memory/allocation.hpp"
  52 #include "memory/resourceArea.hpp"
  53 #include "oops/oop.inline.hpp"
  54 #include "runtime/atomic.inline.hpp"
  55 #include "runtime/handles.inline.hpp"
  56 #include "runtime/java.hpp"
  57 #include "runtime/prefetch.inline.hpp"
  58 #include "services/memTracker.hpp"
  59 
  60 // Concurrent marking bit map wrapper
  61 
  62 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  63   _bm(),
  64   _shifter(shifter) {
  65   _bmStartWord = 0;
  66   _bmWordSize = 0;
  67 }
  68 
  69 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  70                                                  const HeapWord* limit) const {
  71   // First we must round addr *up* to a possible object boundary.
  72   addr = (HeapWord*)align_size_up((intptr_t)addr,
  73                                   HeapWordSize << _shifter);
  74   size_t addrOffset = heapWordToOffset(addr);
  75   assert(limit != NULL, "limit must not be NULL");
  76   size_t limitOffset = heapWordToOffset(limit);
  77   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  78   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  79   assert(nextAddr >= addr, "get_next_one postcondition");
  80   assert(nextAddr == limit || isMarked(nextAddr),
  81          "get_next_one postcondition");
  82   return nextAddr;
  83 }
  84 
  85 #ifndef PRODUCT
  86 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  87   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  88   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  89          "size inconsistency");
  90   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  91          _bmWordSize  == heap_rs.word_size();
  92 }
  93 #endif
  94 
  95 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  96   _bm.print_on_error(st, prefix);
  97 }
  98 
  99 size_t G1CMBitMap::compute_size(size_t heap_size) {
 100   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 101 }
 102 
 103 size_t G1CMBitMap::mark_distance() {
 104   return MinObjAlignmentInBytes * BitsPerByte;
 105 }
 106 
 107 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 108   _bmStartWord = heap.start();
 109   _bmWordSize = heap.word_size();
 110 
 111   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 112   _bm.set_size(_bmWordSize >> _shifter);
 113 
 114   storage->set_mapping_changed_listener(&_listener);
 115 }
 116 
 117 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 118   if (zero_filled) {
 119     return;
 120   }
 121   // We need to clear the bitmap on commit, removing any existing information.
 122   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 123   _bm->clearRange(mr);
 124 }
 125 
 126 // Closure used for clearing the given mark bitmap.
 127 class ClearBitmapHRClosure : public HeapRegionClosure {
 128  private:
 129   G1ConcurrentMark* _cm;
 130   G1CMBitMap* _bitmap;
 131   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 132  public:
 133   ClearBitmapHRClosure(G1ConcurrentMark* cm, G1CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 134     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 135   }
 136 
 137   virtual bool doHeapRegion(HeapRegion* r) {
 138     size_t const chunk_size_in_words = M / HeapWordSize;
 139 
 140     HeapWord* cur = r->bottom();
 141     HeapWord* const end = r->end();
 142 
 143     while (cur < end) {
 144       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 145       _bitmap->clearRange(mr);
 146 
 147       cur += chunk_size_in_words;
 148 
 149       // Abort iteration if after yielding the marking has been aborted.
 150       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 151         return true;
 152       }
 153       // Repeat the asserts from before the start of the closure. We will do them
 154       // as asserts here to minimize their overhead on the product. However, we
 155       // will have them as guarantees at the beginning / end of the bitmap
 156       // clearing to get some checking in the product.
 157       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 158       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 159     }
 160 
 161     return false;
 162   }
 163 };
 164 
 165 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 166   ClearBitmapHRClosure* _cl;
 167   HeapRegionClaimer     _hrclaimer;
 168   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 169 
 170 public:
 171   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 172       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 173 
 174   void work(uint worker_id) {
 175     SuspendibleThreadSetJoiner sts_join(_suspendible);
 176     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 177   }
 178 };
 179 
 180 void G1CMBitMap::clearAll() {
 181   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 182   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 183   uint n_workers = g1h->workers()->active_workers();
 184   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 185   g1h->workers()->run_task(&task);
 186   guarantee(cl.complete(), "Must have completed iteration.");
 187   return;
 188 }
 189 
 190 void G1CMBitMap::clearRange(MemRegion mr) {
 191   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 192   assert(!mr.is_empty(), "unexpected empty region");
 193   // convert address range into offset range
 194   _bm.at_put_range(heapWordToOffset(mr.start()),
 195                    heapWordToOffset(mr.end()), false);
 196 }
 197 
 198 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) :
 199   _base(NULL), _cm(cm)
 200 {}
 201 
 202 bool G1CMMarkStack::allocate(size_t capacity) {
 203   // allocate a stack of the requisite depth
 204   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 205   if (!rs.is_reserved()) {
 206     warning("ConcurrentMark MarkStack allocation failure");
 207     return false;
 208   }
 209   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 210   if (!_virtual_space.initialize(rs, rs.size())) {
 211     warning("ConcurrentMark MarkStack backing store failure");
 212     // Release the virtual memory reserved for the marking stack
 213     rs.release();
 214     return false;
 215   }
 216   assert(_virtual_space.committed_size() == rs.size(),
 217          "Didn't reserve backing store for all of G1ConcurrentMark stack?");
 218   _base = (oop*) _virtual_space.low();
 219   setEmpty();
 220   _capacity = (jint) capacity;
 221   _saved_index = -1;
 222   _should_expand = false;
 223   return true;
 224 }
 225 
 226 void G1CMMarkStack::expand() {
 227   // Called, during remark, if we've overflown the marking stack during marking.
 228   assert(isEmpty(), "stack should been emptied while handling overflow");
 229   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 230   // Clear expansion flag
 231   _should_expand = false;
 232   if (_capacity == (jint) MarkStackSizeMax) {
 233     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 234     return;
 235   }
 236   // Double capacity if possible
 237   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 238   // Do not give up existing stack until we have managed to
 239   // get the double capacity that we desired.
 240   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 241                                                            sizeof(oop)));
 242   if (rs.is_reserved()) {
 243     // Release the backing store associated with old stack
 244     _virtual_space.release();
 245     // Reinitialize virtual space for new stack
 246     if (!_virtual_space.initialize(rs, rs.size())) {
 247       fatal("Not enough swap for expanded marking stack capacity");
 248     }
 249     _base = (oop*)(_virtual_space.low());
 250     _index = 0;
 251     _capacity = new_capacity;
 252   } else {
 253     // Failed to double capacity, continue;
 254     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 255                   _capacity / K, new_capacity / K);
 256   }
 257 }
 258 
 259 void G1CMMarkStack::set_should_expand() {
 260   // If we're resetting the marking state because of an
 261   // marking stack overflow, record that we should, if
 262   // possible, expand the stack.
 263   _should_expand = _cm->has_overflown();
 264 }
 265 
 266 G1CMMarkStack::~G1CMMarkStack() {
 267   if (_base != NULL) {
 268     _base = NULL;
 269     _virtual_space.release();
 270   }
 271 }
 272 
 273 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 274   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 275   jint start = _index;
 276   jint next_index = start + n;
 277   if (next_index > _capacity) {
 278     _overflow = true;
 279     return;
 280   }
 281   // Otherwise.
 282   _index = next_index;
 283   for (int i = 0; i < n; i++) {
 284     int ind = start + i;
 285     assert(ind < _capacity, "By overflow test above.");
 286     _base[ind] = ptr_arr[i];
 287   }
 288 }
 289 
 290 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 291   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 292   jint index = _index;
 293   if (index == 0) {
 294     *n = 0;
 295     return false;
 296   } else {
 297     int k = MIN2(max, index);
 298     jint  new_ind = index - k;
 299     for (int j = 0; j < k; j++) {
 300       ptr_arr[j] = _base[new_ind + j];
 301     }
 302     _index = new_ind;
 303     *n = k;
 304     return true;
 305   }
 306 }
 307 
 308 void G1CMMarkStack::note_start_of_gc() {
 309   assert(_saved_index == -1,
 310          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 311   _saved_index = _index;
 312 }
 313 
 314 void G1CMMarkStack::note_end_of_gc() {
 315   // This is intentionally a guarantee, instead of an assert. If we
 316   // accidentally add something to the mark stack during GC, it
 317   // will be a correctness issue so it's better if we crash. we'll
 318   // only check this once per GC anyway, so it won't be a performance
 319   // issue in any way.
 320   guarantee(_saved_index == _index,
 321             "saved index: %d index: %d", _saved_index, _index);
 322   _saved_index = -1;
 323 }
 324 
 325 G1CMRootRegions::G1CMRootRegions() :
 326   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 327   _should_abort(false),  _next_survivor(NULL) { }
 328 
 329 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) {
 330   _young_list = g1h->young_list();
 331   _cm = cm;
 332 }
 333 
 334 void G1CMRootRegions::prepare_for_scan() {
 335   assert(!scan_in_progress(), "pre-condition");
 336 
 337   // Currently, only survivors can be root regions.
 338   assert(_next_survivor == NULL, "pre-condition");
 339   _next_survivor = _young_list->first_survivor_region();
 340   _scan_in_progress = (_next_survivor != NULL);
 341   _should_abort = false;
 342 }
 343 
 344 HeapRegion* G1CMRootRegions::claim_next() {
 345   if (_should_abort) {
 346     // If someone has set the should_abort flag, we return NULL to
 347     // force the caller to bail out of their loop.
 348     return NULL;
 349   }
 350 
 351   // Currently, only survivors can be root regions.
 352   HeapRegion* res = _next_survivor;
 353   if (res != NULL) {
 354     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 355     // Read it again in case it changed while we were waiting for the lock.
 356     res = _next_survivor;
 357     if (res != NULL) {
 358       if (res == _young_list->last_survivor_region()) {
 359         // We just claimed the last survivor so store NULL to indicate
 360         // that we're done.
 361         _next_survivor = NULL;
 362       } else {
 363         _next_survivor = res->get_next_young_region();
 364       }
 365     } else {
 366       // Someone else claimed the last survivor while we were trying
 367       // to take the lock so nothing else to do.
 368     }
 369   }
 370   assert(res == NULL || res->is_survivor(), "post-condition");
 371 
 372   return res;
 373 }
 374 
 375 void G1CMRootRegions::notify_scan_done() {
 376   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 377   _scan_in_progress = false;
 378   RootRegionScan_lock->notify_all();
 379 }
 380 
 381 void G1CMRootRegions::cancel_scan() {
 382   notify_scan_done();
 383 }
 384 
 385 void G1CMRootRegions::scan_finished() {
 386   assert(scan_in_progress(), "pre-condition");
 387 
 388   // Currently, only survivors can be root regions.
 389   if (!_should_abort) {
 390     assert(_next_survivor == NULL, "we should have claimed all survivors");
 391   }
 392   _next_survivor = NULL;
 393 
 394   notify_scan_done();
 395 }
 396 
 397 bool G1CMRootRegions::wait_until_scan_finished() {
 398   if (!scan_in_progress()) return false;
 399 
 400   {
 401     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 402     while (scan_in_progress()) {
 403       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 404     }
 405   }
 406   return true;
 407 }
 408 
 409 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 410   return MAX2((n_par_threads + 2) / 4, 1U);
 411 }
 412 
 413 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 414   _g1h(g1h),
 415   _markBitMap1(),
 416   _markBitMap2(),
 417   _parallel_marking_threads(0),
 418   _max_parallel_marking_threads(0),
 419   _sleep_factor(0.0),
 420   _marking_task_overhead(1.0),
 421   _cleanup_list("Cleanup List"),
 422   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 423   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 424             CardTableModRefBS::card_shift,
 425             false /* in_resource_area*/),
 426 
 427   _prevMarkBitMap(&_markBitMap1),
 428   _nextMarkBitMap(&_markBitMap2),
 429 
 430   _markStack(this),
 431   // _finger set in set_non_marking_state
 432 
 433   _max_worker_id(ParallelGCThreads),
 434   // _active_tasks set in set_non_marking_state
 435   // _tasks set inside the constructor
 436   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 437   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 438 
 439   _has_overflown(false),
 440   _concurrent(false),
 441   _has_aborted(false),
 442   _restart_for_overflow(false),
 443   _concurrent_marking_in_progress(false),
 444   _concurrent_phase_status(ConcPhaseNotStarted),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL),
 458   _completed_initialization(false) {
 459 
 460   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 461   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 462 
 463   // Create & start a ConcurrentMark thread.
 464   _cmThread = new ConcurrentMarkThread(this);
 465   assert(cmThread() != NULL, "CM Thread should have been created");
 466   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 467   if (_cmThread->osthread() == NULL) {
 468       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 469   }
 470 
 471   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 472   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 473   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 474 
 475   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 476   satb_qs.set_buffer_size(G1SATBBufferSize);
 477 
 478   _root_regions.init(_g1h, this);
 479 
 480   if (ConcGCThreads > ParallelGCThreads) {
 481     warning("Can't have more ConcGCThreads (%u) "
 482             "than ParallelGCThreads (%u).",
 483             ConcGCThreads, ParallelGCThreads);
 484     return;
 485   }
 486   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 487     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 488     // if both are set
 489     _sleep_factor             = 0.0;
 490     _marking_task_overhead    = 1.0;
 491   } else if (G1MarkingOverheadPercent > 0) {
 492     // We will calculate the number of parallel marking threads based
 493     // on a target overhead with respect to the soft real-time goal
 494     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 495     double overall_cm_overhead =
 496       (double) MaxGCPauseMillis * marking_overhead /
 497       (double) GCPauseIntervalMillis;
 498     double cpu_ratio = 1.0 / (double) os::processor_count();
 499     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 500     double marking_task_overhead =
 501       overall_cm_overhead / marking_thread_num *
 502                                               (double) os::processor_count();
 503     double sleep_factor =
 504                        (1.0 - marking_task_overhead) / marking_task_overhead;
 505 
 506     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 507     _sleep_factor             = sleep_factor;
 508     _marking_task_overhead    = marking_task_overhead;
 509   } else {
 510     // Calculate the number of parallel marking threads by scaling
 511     // the number of parallel GC threads.
 512     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 513     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 514     _sleep_factor             = 0.0;
 515     _marking_task_overhead    = 1.0;
 516   }
 517 
 518   assert(ConcGCThreads > 0, "Should have been set");
 519   _parallel_marking_threads = ConcGCThreads;
 520   _max_parallel_marking_threads = _parallel_marking_threads;
 521 
 522   _parallel_workers = new WorkGang("G1 Marker",
 523        _max_parallel_marking_threads, false, true);
 524   if (_parallel_workers == NULL) {
 525     vm_exit_during_initialization("Failed necessary allocation.");
 526   } else {
 527     _parallel_workers->initialize_workers();
 528   }
 529 
 530   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 531     size_t mark_stack_size =
 532       MIN2(MarkStackSizeMax,
 533           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 534     // Verify that the calculated value for MarkStackSize is in range.
 535     // It would be nice to use the private utility routine from Arguments.
 536     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 537       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 538               "must be between 1 and " SIZE_FORMAT,
 539               mark_stack_size, MarkStackSizeMax);
 540       return;
 541     }
 542     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 543   } else {
 544     // Verify MarkStackSize is in range.
 545     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 546       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 547         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 548           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 549                   "must be between 1 and " SIZE_FORMAT,
 550                   MarkStackSize, MarkStackSizeMax);
 551           return;
 552         }
 553       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 554         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 555           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 556                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 557                   MarkStackSize, MarkStackSizeMax);
 558           return;
 559         }
 560       }
 561     }
 562   }
 563 
 564   if (!_markStack.allocate(MarkStackSize)) {
 565     warning("Failed to allocate CM marking stack");
 566     return;
 567   }
 568 
 569   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 570   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 571 
 572   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 573   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 574 
 575   BitMap::idx_t card_bm_size = _card_bm.size();
 576 
 577   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 578   _active_tasks = _max_worker_id;
 579 
 580   uint max_regions = _g1h->max_regions();
 581   for (uint i = 0; i < _max_worker_id; ++i) {
 582     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 583     task_queue->initialize();
 584     _task_queues->register_queue(i, task_queue);
 585 
 586     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 587     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 588 
 589     _tasks[i] = new G1CMTask(i, this,
 590                              _count_marked_bytes[i],
 591                              &_count_card_bitmaps[i],
 592                              task_queue, _task_queues);
 593 
 594     _accum_task_vtime[i] = 0.0;
 595   }
 596 
 597   // Calculate the card number for the bottom of the heap. Used
 598   // in biasing indexes into the accounting card bitmaps.
 599   _heap_bottom_card_num =
 600     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 601                                 CardTableModRefBS::card_shift);
 602 
 603   // Clear all the liveness counting data
 604   clear_all_count_data();
 605 
 606   // so that the call below can read a sensible value
 607   _heap_start = g1h->reserved_region().start();
 608   set_non_marking_state();
 609   _completed_initialization = true;
 610 }
 611 
 612 void G1ConcurrentMark::reset() {
 613   // Starting values for these two. This should be called in a STW
 614   // phase.
 615   MemRegion reserved = _g1h->g1_reserved();
 616   _heap_start = reserved.start();
 617   _heap_end   = reserved.end();
 618 
 619   // Separated the asserts so that we know which one fires.
 620   assert(_heap_start != NULL, "heap bounds should look ok");
 621   assert(_heap_end != NULL, "heap bounds should look ok");
 622   assert(_heap_start < _heap_end, "heap bounds should look ok");
 623 
 624   // Reset all the marking data structures and any necessary flags
 625   reset_marking_state();
 626 
 627   // We do reset all of them, since different phases will use
 628   // different number of active threads. So, it's easiest to have all
 629   // of them ready.
 630   for (uint i = 0; i < _max_worker_id; ++i) {
 631     _tasks[i]->reset(_nextMarkBitMap);
 632   }
 633 
 634   // we need this to make sure that the flag is on during the evac
 635   // pause with initial mark piggy-backed
 636   set_concurrent_marking_in_progress();
 637 }
 638 
 639 
 640 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
 641   _markStack.set_should_expand();
 642   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 643   if (clear_overflow) {
 644     clear_has_overflown();
 645   } else {
 646     assert(has_overflown(), "pre-condition");
 647   }
 648   _finger = _heap_start;
 649 
 650   for (uint i = 0; i < _max_worker_id; ++i) {
 651     G1CMTaskQueue* queue = _task_queues->queue(i);
 652     queue->set_empty();
 653   }
 654 }
 655 
 656 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 657   assert(active_tasks <= _max_worker_id, "we should not have more");
 658 
 659   _active_tasks = active_tasks;
 660   // Need to update the three data structures below according to the
 661   // number of active threads for this phase.
 662   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 663   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 664   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 665 }
 666 
 667 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 668   set_concurrency(active_tasks);
 669 
 670   _concurrent = concurrent;
 671   // We propagate this to all tasks, not just the active ones.
 672   for (uint i = 0; i < _max_worker_id; ++i)
 673     _tasks[i]->set_concurrent(concurrent);
 674 
 675   if (concurrent) {
 676     set_concurrent_marking_in_progress();
 677   } else {
 678     // We currently assume that the concurrent flag has been set to
 679     // false before we start remark. At this point we should also be
 680     // in a STW phase.
 681     assert(!concurrent_marking_in_progress(), "invariant");
 682     assert(out_of_regions(),
 683            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 684            p2i(_finger), p2i(_heap_end));
 685   }
 686 }
 687 
 688 void G1ConcurrentMark::set_non_marking_state() {
 689   // We set the global marking state to some default values when we're
 690   // not doing marking.
 691   reset_marking_state();
 692   _active_tasks = 0;
 693   clear_concurrent_marking_in_progress();
 694 }
 695 
 696 G1ConcurrentMark::~G1ConcurrentMark() {
 697   // The G1ConcurrentMark instance is never freed.
 698   ShouldNotReachHere();
 699 }
 700 
 701 void G1ConcurrentMark::clearNextBitmap() {
 702   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 703 
 704   // Make sure that the concurrent mark thread looks to still be in
 705   // the current cycle.
 706   guarantee(cmThread()->during_cycle(), "invariant");
 707 
 708   // We are finishing up the current cycle by clearing the next
 709   // marking bitmap and getting it ready for the next cycle. During
 710   // this time no other cycle can start. So, let's make sure that this
 711   // is the case.
 712   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 713 
 714   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 715   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 716   _parallel_workers->run_task(&task);
 717 
 718   // Clear the liveness counting data. If the marking has been aborted, the abort()
 719   // call already did that.
 720   if (cl.complete()) {
 721     clear_all_count_data();
 722   }
 723 
 724   // Repeat the asserts from above.
 725   guarantee(cmThread()->during_cycle(), "invariant");
 726   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 727 }
 728 
 729 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 730   G1CMBitMap* _bitmap;
 731   bool _error;
 732  public:
 733   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 734   }
 735 
 736   virtual bool doHeapRegion(HeapRegion* r) {
 737     // This closure can be called concurrently to the mutator, so we must make sure
 738     // that the result of the getNextMarkedWordAddress() call is compared to the
 739     // value passed to it as limit to detect any found bits.
 740     // end never changes in G1.
 741     HeapWord* end = r->end();
 742     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 743   }
 744 };
 745 
 746 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 747   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 748   _g1h->heap_region_iterate(&cl);
 749   return cl.complete();
 750 }
 751 
 752 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 753 public:
 754   bool doHeapRegion(HeapRegion* r) {
 755     r->note_start_of_marking();
 756     return false;
 757   }
 758 };
 759 
 760 void G1ConcurrentMark::checkpointRootsInitialPre() {
 761   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 762   G1CollectorPolicy* g1p = g1h->g1_policy();
 763 
 764   _has_aborted = false;
 765 
 766   // Initialize marking structures. This has to be done in a STW phase.
 767   reset();
 768 
 769   // For each region note start of marking.
 770   NoteStartOfMarkHRClosure startcl;
 771   g1h->heap_region_iterate(&startcl);
 772 }
 773 
 774 
 775 void G1ConcurrentMark::checkpointRootsInitialPost() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777 
 778   // Start Concurrent Marking weak-reference discovery.
 779   ReferenceProcessor* rp = g1h->ref_processor_cm();
 780   // enable ("weak") refs discovery
 781   rp->enable_discovery();
 782   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 783 
 784   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 785   // This is the start of  the marking cycle, we're expected all
 786   // threads to have SATB queues with active set to false.
 787   satb_mq_set.set_active_all_threads(true, /* new active value */
 788                                      false /* expected_active */);
 789 
 790   _root_regions.prepare_for_scan();
 791 
 792   // update_g1_committed() will be called at the end of an evac pause
 793   // when marking is on. So, it's also called at the end of the
 794   // initial-mark pause to update the heap end, if the heap expands
 795   // during it. No need to call it here.
 796 }
 797 
 798 /*
 799  * Notice that in the next two methods, we actually leave the STS
 800  * during the barrier sync and join it immediately afterwards. If we
 801  * do not do this, the following deadlock can occur: one thread could
 802  * be in the barrier sync code, waiting for the other thread to also
 803  * sync up, whereas another one could be trying to yield, while also
 804  * waiting for the other threads to sync up too.
 805  *
 806  * Note, however, that this code is also used during remark and in
 807  * this case we should not attempt to leave / enter the STS, otherwise
 808  * we'll either hit an assert (debug / fastdebug) or deadlock
 809  * (product). So we should only leave / enter the STS if we are
 810  * operating concurrently.
 811  *
 812  * Because the thread that does the sync barrier has left the STS, it
 813  * is possible to be suspended for a Full GC or an evacuation pause
 814  * could occur. This is actually safe, since the entering the sync
 815  * barrier is one of the last things do_marking_step() does, and it
 816  * doesn't manipulate any data structures afterwards.
 817  */
 818 
 819 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 820   bool barrier_aborted;
 821   {
 822     SuspendibleThreadSetLeaver sts_leave(concurrent());
 823     barrier_aborted = !_first_overflow_barrier_sync.enter();
 824   }
 825 
 826   // at this point everyone should have synced up and not be doing any
 827   // more work
 828 
 829   if (barrier_aborted) {
 830     // If the barrier aborted we ignore the overflow condition and
 831     // just abort the whole marking phase as quickly as possible.
 832     return;
 833   }
 834 
 835   // If we're executing the concurrent phase of marking, reset the marking
 836   // state; otherwise the marking state is reset after reference processing,
 837   // during the remark pause.
 838   // If we reset here as a result of an overflow during the remark we will
 839   // see assertion failures from any subsequent set_concurrency_and_phase()
 840   // calls.
 841   if (concurrent()) {
 842     // let the task associated with with worker 0 do this
 843     if (worker_id == 0) {
 844       // task 0 is responsible for clearing the global data structures
 845       // We should be here because of an overflow. During STW we should
 846       // not clear the overflow flag since we rely on it being true when
 847       // we exit this method to abort the pause and restart concurrent
 848       // marking.
 849       reset_marking_state(true /* clear_overflow */);
 850 
 851       log_info(gc)("Concurrent Mark reset for overflow");
 852     }
 853   }
 854 
 855   // after this, each task should reset its own data structures then
 856   // then go into the second barrier
 857 }
 858 
 859 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 860   SuspendibleThreadSetLeaver sts_leave(concurrent());
 861   _second_overflow_barrier_sync.enter();
 862 
 863   // at this point everything should be re-initialized and ready to go
 864 }
 865 
 866 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 867 private:
 868   G1ConcurrentMark*     _cm;
 869   ConcurrentMarkThread* _cmt;
 870 
 871 public:
 872   void work(uint worker_id) {
 873     assert(Thread::current()->is_ConcurrentGC_thread(),
 874            "this should only be done by a conc GC thread");
 875     ResourceMark rm;
 876 
 877     double start_vtime = os::elapsedVTime();
 878 
 879     {
 880       SuspendibleThreadSetJoiner sts_join;
 881 
 882       assert(worker_id < _cm->active_tasks(), "invariant");
 883       G1CMTask* the_task = _cm->task(worker_id);
 884       the_task->record_start_time();
 885       if (!_cm->has_aborted()) {
 886         do {
 887           double start_vtime_sec = os::elapsedVTime();
 888           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 889 
 890           the_task->do_marking_step(mark_step_duration_ms,
 891                                     true  /* do_termination */,
 892                                     false /* is_serial*/);
 893 
 894           double end_vtime_sec = os::elapsedVTime();
 895           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 896           _cm->clear_has_overflown();
 897 
 898           _cm->do_yield_check(worker_id);
 899 
 900           jlong sleep_time_ms;
 901           if (!_cm->has_aborted() && the_task->has_aborted()) {
 902             sleep_time_ms =
 903               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 904             {
 905               SuspendibleThreadSetLeaver sts_leave;
 906               os::sleep(Thread::current(), sleep_time_ms, false);
 907             }
 908           }
 909         } while (!_cm->has_aborted() && the_task->has_aborted());
 910       }
 911       the_task->record_end_time();
 912       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 913     }
 914 
 915     double end_vtime = os::elapsedVTime();
 916     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 917   }
 918 
 919   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 920                             ConcurrentMarkThread* cmt) :
 921       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 922 
 923   ~G1CMConcurrentMarkingTask() { }
 924 };
 925 
 926 // Calculates the number of active workers for a concurrent
 927 // phase.
 928 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 929   uint n_conc_workers = 0;
 930   if (!UseDynamicNumberOfGCThreads ||
 931       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 932        !ForceDynamicNumberOfGCThreads)) {
 933     n_conc_workers = max_parallel_marking_threads();
 934   } else {
 935     n_conc_workers =
 936       AdaptiveSizePolicy::calc_default_active_workers(
 937                                    max_parallel_marking_threads(),
 938                                    1, /* Minimum workers */
 939                                    parallel_marking_threads(),
 940                                    Threads::number_of_non_daemon_threads());
 941     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 942     // that scaling has already gone into "_max_parallel_marking_threads".
 943   }
 944   assert(n_conc_workers > 0, "Always need at least 1");
 945   return n_conc_workers;
 946 }
 947 
 948 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 949   // Currently, only survivors can be root regions.
 950   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 951   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 952 
 953   const uintx interval = PrefetchScanIntervalInBytes;
 954   HeapWord* curr = hr->bottom();
 955   const HeapWord* end = hr->top();
 956   while (curr < end) {
 957     Prefetch::read(curr, interval);
 958     oop obj = oop(curr);
 959     int size = obj->oop_iterate_size(&cl);
 960     assert(size == obj->size(), "sanity");
 961     curr += size;
 962   }
 963 }
 964 
 965 class G1CMRootRegionScanTask : public AbstractGangTask {
 966 private:
 967   G1ConcurrentMark* _cm;
 968 
 969 public:
 970   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 971     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 972 
 973   void work(uint worker_id) {
 974     assert(Thread::current()->is_ConcurrentGC_thread(),
 975            "this should only be done by a conc GC thread");
 976 
 977     G1CMRootRegions* root_regions = _cm->root_regions();
 978     HeapRegion* hr = root_regions->claim_next();
 979     while (hr != NULL) {
 980       _cm->scanRootRegion(hr, worker_id);
 981       hr = root_regions->claim_next();
 982     }
 983   }
 984 };
 985 
 986 void G1ConcurrentMark::scanRootRegions() {
 987   // scan_in_progress() will have been set to true only if there was
 988   // at least one root region to scan. So, if it's false, we
 989   // should not attempt to do any further work.
 990   if (root_regions()->scan_in_progress()) {
 991     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 992     GCTraceConcTime(Info, gc) tt("Concurrent Root Region Scan");
 993 
 994     _parallel_marking_threads = calc_parallel_marking_threads();
 995     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 996            "Maximum number of marking threads exceeded");
 997     uint active_workers = MAX2(1U, parallel_marking_threads());
 998 
 999     G1CMRootRegionScanTask task(this);
1000     _parallel_workers->set_active_workers(active_workers);
1001     _parallel_workers->run_task(&task);
1002 
1003     // It's possible that has_aborted() is true here without actually
1004     // aborting the survivor scan earlier. This is OK as it's
1005     // mainly used for sanity checking.
1006     root_regions()->scan_finished();
1007   }
1008 }
1009 
1010 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) {
1011   uint old_val = 0;
1012   do {
1013     old_val = Atomic::cmpxchg(ConcPhaseStarted, &_concurrent_phase_status, ConcPhaseNotStarted);
1014   } while (old_val != ConcPhaseNotStarted);
1015   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
1016 }
1017 
1018 void G1ConcurrentMark::register_concurrent_phase_end_common(bool end_timer) {
1019   if (_concurrent_phase_status == ConcPhaseNotStarted) {
1020     return;
1021   }
1022 
1023   uint old_val = Atomic::cmpxchg(ConcPhaseStopping, &_concurrent_phase_status, ConcPhaseStarted);
1024   if (old_val == ConcPhaseStarted) {
1025     _g1h->gc_timer_cm()->register_gc_concurrent_end();
1026     // If 'end_timer' is true, we came here to end timer which needs concurrent phase ended.
1027     // We need to end it before changing the status to 'ConcPhaseNotStarted' to prevent
1028     // starting a new concurrent phase by 'ConcurrentMarkThread'.
1029     if (end_timer) {
1030       _g1h->gc_timer_cm()->register_gc_end();
1031     }
1032     old_val = Atomic::cmpxchg(ConcPhaseNotStarted, &_concurrent_phase_status, ConcPhaseStopping);
1033     assert(old_val == ConcPhaseStopping, "Should not have changed since we entered this scope.");
1034   } else {
1035     do {
1036       // Let other thread finish changing '_concurrent_phase_status' to 'ConcPhaseNotStarted'.
1037       os::naked_short_sleep(1);
1038     } while (_concurrent_phase_status != ConcPhaseNotStarted);
1039   }
1040 }
1041 
1042 void G1ConcurrentMark::register_concurrent_phase_end() {
1043   register_concurrent_phase_end_common(false);
1044 }
1045 
1046 void G1ConcurrentMark::register_concurrent_gc_end() {
1047   register_concurrent_phase_end_common(true);
1048 }
1049 
1050 void G1ConcurrentMark::markFromRoots() {
1051   // we might be tempted to assert that:
1052   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1053   //        "inconsistent argument?");
1054   // However that wouldn't be right, because it's possible that
1055   // a safepoint is indeed in progress as a younger generation
1056   // stop-the-world GC happens even as we mark in this generation.
1057 
1058   _restart_for_overflow = false;
1059 
1060   // _g1h has _n_par_threads
1061   _parallel_marking_threads = calc_parallel_marking_threads();
1062   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1063     "Maximum number of marking threads exceeded");
1064 
1065   uint active_workers = MAX2(1U, parallel_marking_threads());
1066   assert(active_workers > 0, "Should have been set");
1067 
1068   // Parallel task terminator is set in "set_concurrency_and_phase()"
1069   set_concurrency_and_phase(active_workers, true /* concurrent */);
1070 
1071   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1072   _parallel_workers->set_active_workers(active_workers);
1073   _parallel_workers->run_task(&markingTask);
1074   print_stats();
1075 }
1076 
1077 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1078   // world is stopped at this checkpoint
1079   assert(SafepointSynchronize::is_at_safepoint(),
1080          "world should be stopped");
1081 
1082   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1083 
1084   // If a full collection has happened, we shouldn't do this.
1085   if (has_aborted()) {
1086     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1087     return;
1088   }
1089 
1090   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1091 
1092   if (VerifyDuringGC) {
1093     HandleMark hm;  // handle scope
1094     g1h->prepare_for_verify();
1095     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1096   }
1097   g1h->verifier()->check_bitmaps("Remark Start");
1098 
1099   G1CollectorPolicy* g1p = g1h->g1_policy();
1100   g1p->record_concurrent_mark_remark_start();
1101 
1102   double start = os::elapsedTime();
1103 
1104   checkpointRootsFinalWork();
1105 
1106   double mark_work_end = os::elapsedTime();
1107 
1108   weakRefsWork(clear_all_soft_refs);
1109 
1110   if (has_overflown()) {
1111     // Oops.  We overflowed.  Restart concurrent marking.
1112     _restart_for_overflow = true;
1113     log_develop_trace(gc)("Remark led to restart for overflow.");
1114 
1115     // Verify the heap w.r.t. the previous marking bitmap.
1116     if (VerifyDuringGC) {
1117       HandleMark hm;  // handle scope
1118       g1h->prepare_for_verify();
1119       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1120     }
1121 
1122     // Clear the marking state because we will be restarting
1123     // marking due to overflowing the global mark stack.
1124     reset_marking_state();
1125   } else {
1126     {
1127       GCTraceTime(Debug, gc) trace("GC Aggregate Data", g1h->gc_timer_cm());
1128 
1129       // Aggregate the per-task counting data that we have accumulated
1130       // while marking.
1131       aggregate_count_data();
1132     }
1133 
1134     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1135     // We're done with marking.
1136     // This is the end of  the marking cycle, we're expected all
1137     // threads to have SATB queues with active set to true.
1138     satb_mq_set.set_active_all_threads(false, /* new active value */
1139                                        true /* expected_active */);
1140 
1141     if (VerifyDuringGC) {
1142       HandleMark hm;  // handle scope
1143       g1h->prepare_for_verify();
1144       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1145     }
1146     g1h->verifier()->check_bitmaps("Remark End");
1147     assert(!restart_for_overflow(), "sanity");
1148     // Completely reset the marking state since marking completed
1149     set_non_marking_state();
1150   }
1151 
1152   // Expand the marking stack, if we have to and if we can.
1153   if (_markStack.should_expand()) {
1154     _markStack.expand();
1155   }
1156 
1157   // Statistics
1158   double now = os::elapsedTime();
1159   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1160   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1161   _remark_times.add((now - start) * 1000.0);
1162 
1163   g1p->record_concurrent_mark_remark_end();
1164 
1165   G1CMIsAliveClosure is_alive(g1h);
1166   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1167 }
1168 
1169 // Base class of the closures that finalize and verify the
1170 // liveness counting data.
1171 class G1CMCountDataClosureBase: public HeapRegionClosure {
1172 protected:
1173   G1CollectedHeap* _g1h;
1174   G1ConcurrentMark* _cm;
1175   CardTableModRefBS* _ct_bs;
1176 
1177   BitMap* _region_bm;
1178   BitMap* _card_bm;
1179 
1180   // Takes a region that's not empty (i.e., it has at least one
1181   // live object in it and sets its corresponding bit on the region
1182   // bitmap to 1.
1183   void set_bit_for_region(HeapRegion* hr) {
1184     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1185     _region_bm->par_at_put(index, true);
1186   }
1187 
1188 public:
1189   G1CMCountDataClosureBase(G1CollectedHeap* g1h,
1190                            BitMap* region_bm, BitMap* card_bm):
1191     _g1h(g1h), _cm(g1h->concurrent_mark()),
1192     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1193     _region_bm(region_bm), _card_bm(card_bm) { }
1194 };
1195 
1196 // Closure that calculates the # live objects per region. Used
1197 // for verification purposes during the cleanup pause.
1198 class CalcLiveObjectsClosure: public G1CMCountDataClosureBase {
1199   G1CMBitMapRO* _bm;
1200   size_t _region_marked_bytes;
1201 
1202 public:
1203   CalcLiveObjectsClosure(G1CMBitMapRO *bm, G1CollectedHeap* g1h,
1204                          BitMap* region_bm, BitMap* card_bm) :
1205     G1CMCountDataClosureBase(g1h, region_bm, card_bm),
1206     _bm(bm), _region_marked_bytes(0) { }
1207 
1208   bool doHeapRegion(HeapRegion* hr) {
1209     HeapWord* ntams = hr->next_top_at_mark_start();
1210     HeapWord* start = hr->bottom();
1211 
1212     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1213            "Preconditions not met - "
1214            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1215            p2i(start), p2i(ntams), p2i(hr->end()));
1216 
1217     // Find the first marked object at or after "start".
1218     start = _bm->getNextMarkedWordAddress(start, ntams);
1219 
1220     size_t marked_bytes = 0;
1221 
1222     while (start < ntams) {
1223       oop obj = oop(start);
1224       int obj_sz = obj->size();
1225       HeapWord* obj_end = start + obj_sz;
1226 
1227       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1228       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1229 
1230       // Note: if we're looking at the last region in heap - obj_end
1231       // could be actually just beyond the end of the heap; end_idx
1232       // will then correspond to a (non-existent) card that is also
1233       // just beyond the heap.
1234       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1235         // end of object is not card aligned - increment to cover
1236         // all the cards spanned by the object
1237         end_idx += 1;
1238       }
1239 
1240       // Set the bits in the card BM for the cards spanned by this object.
1241       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1242 
1243       // Add the size of this object to the number of marked bytes.
1244       marked_bytes += (size_t)obj_sz * HeapWordSize;
1245 
1246       // This will happen if we are handling a humongous object that spans
1247       // several heap regions.
1248       if (obj_end > hr->end()) {
1249         break;
1250       }
1251       // Find the next marked object after this one.
1252       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1253     }
1254 
1255     // Mark the allocated-since-marking portion...
1256     HeapWord* top = hr->top();
1257     if (ntams < top) {
1258       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1259       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1260 
1261       // Note: if we're looking at the last region in heap - top
1262       // could be actually just beyond the end of the heap; end_idx
1263       // will then correspond to a (non-existent) card that is also
1264       // just beyond the heap.
1265       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1266         // end of object is not card aligned - increment to cover
1267         // all the cards spanned by the object
1268         end_idx += 1;
1269       }
1270       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1271 
1272       // This definitely means the region has live objects.
1273       set_bit_for_region(hr);
1274     }
1275 
1276     // Update the live region bitmap.
1277     if (marked_bytes > 0) {
1278       set_bit_for_region(hr);
1279     }
1280 
1281     // Set the marked bytes for the current region so that
1282     // it can be queried by a calling verification routine
1283     _region_marked_bytes = marked_bytes;
1284 
1285     return false;
1286   }
1287 
1288   size_t region_marked_bytes() const { return _region_marked_bytes; }
1289 };
1290 
1291 // Heap region closure used for verifying the counting data
1292 // that was accumulated concurrently and aggregated during
1293 // the remark pause. This closure is applied to the heap
1294 // regions during the STW cleanup pause.
1295 
1296 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1297   G1CollectedHeap* _g1h;
1298   G1ConcurrentMark* _cm;
1299   CalcLiveObjectsClosure _calc_cl;
1300   BitMap* _region_bm;   // Region BM to be verified
1301   BitMap* _card_bm;     // Card BM to be verified
1302 
1303   BitMap* _exp_region_bm; // Expected Region BM values
1304   BitMap* _exp_card_bm;   // Expected card BM values
1305 
1306   int _failures;
1307 
1308 public:
1309   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1310                                 BitMap* region_bm,
1311                                 BitMap* card_bm,
1312                                 BitMap* exp_region_bm,
1313                                 BitMap* exp_card_bm) :
1314     _g1h(g1h), _cm(g1h->concurrent_mark()),
1315     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1316     _region_bm(region_bm), _card_bm(card_bm),
1317     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1318     _failures(0) { }
1319 
1320   int failures() const { return _failures; }
1321 
1322   bool doHeapRegion(HeapRegion* hr) {
1323     int failures = 0;
1324 
1325     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1326     // this region and set the corresponding bits in the expected region
1327     // and card bitmaps.
1328     bool res = _calc_cl.doHeapRegion(hr);
1329     assert(res == false, "should be continuing");
1330 
1331     // Verify the marked bytes for this region.
1332     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1333     size_t act_marked_bytes = hr->next_marked_bytes();
1334 
1335     if (exp_marked_bytes > act_marked_bytes) {
1336       if (hr->is_starts_humongous()) {
1337         // For start_humongous regions, the size of the whole object will be
1338         // in exp_marked_bytes.
1339         HeapRegion* region = hr;
1340         int num_regions;
1341         for (num_regions = 0; region != NULL; num_regions++) {
1342           region = _g1h->next_region_in_humongous(region);
1343         }
1344         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1345           failures += 1;
1346         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1347           failures += 1;
1348         }
1349       } else {
1350         // We're not OK if expected marked bytes > actual marked bytes. It means
1351         // we have missed accounting some objects during the actual marking.
1352         failures += 1;
1353       }
1354     }
1355 
1356     // Verify the bit, for this region, in the actual and expected
1357     // (which was just calculated) region bit maps.
1358     // We're not OK if the bit in the calculated expected region
1359     // bitmap is set and the bit in the actual region bitmap is not.
1360     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1361 
1362     bool expected = _exp_region_bm->at(index);
1363     bool actual = _region_bm->at(index);
1364     if (expected && !actual) {
1365       failures += 1;
1366     }
1367 
1368     // Verify that the card bit maps for the cards spanned by the current
1369     // region match. We have an error if we have a set bit in the expected
1370     // bit map and the corresponding bit in the actual bitmap is not set.
1371 
1372     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1373     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1374 
1375     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1376       expected = _exp_card_bm->at(i);
1377       actual = _card_bm->at(i);
1378 
1379       if (expected && !actual) {
1380         failures += 1;
1381       }
1382     }
1383 
1384     _failures += failures;
1385 
1386     // We could stop iteration over the heap when we
1387     // find the first violating region by returning true.
1388     return false;
1389   }
1390 };
1391 
1392 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1393 protected:
1394   G1CollectedHeap* _g1h;
1395   G1ConcurrentMark* _cm;
1396   BitMap* _actual_region_bm;
1397   BitMap* _actual_card_bm;
1398 
1399   uint    _n_workers;
1400 
1401   BitMap* _expected_region_bm;
1402   BitMap* _expected_card_bm;
1403 
1404   int  _failures;
1405 
1406   HeapRegionClaimer _hrclaimer;
1407 
1408 public:
1409   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1410                             BitMap* region_bm, BitMap* card_bm,
1411                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1412     : AbstractGangTask("G1 verify final counting"),
1413       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1414       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1415       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1416       _failures(0),
1417       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1418     assert(VerifyDuringGC, "don't call this otherwise");
1419     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1420     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1421   }
1422 
1423   void work(uint worker_id) {
1424     assert(worker_id < _n_workers, "invariant");
1425 
1426     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1427                                             _actual_region_bm, _actual_card_bm,
1428                                             _expected_region_bm,
1429                                             _expected_card_bm);
1430 
1431     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1432 
1433     Atomic::add(verify_cl.failures(), &_failures);
1434   }
1435 
1436   int failures() const { return _failures; }
1437 };
1438 
1439 // Closure that finalizes the liveness counting data.
1440 // Used during the cleanup pause.
1441 // Sets the bits corresponding to the interval [NTAMS, top]
1442 // (which contains the implicitly live objects) in the
1443 // card liveness bitmap. Also sets the bit for each region,
1444 // containing live data, in the region liveness bitmap.
1445 
1446 class FinalCountDataUpdateClosure: public G1CMCountDataClosureBase {
1447  public:
1448   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1449                               BitMap* region_bm,
1450                               BitMap* card_bm) :
1451     G1CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1452 
1453   bool doHeapRegion(HeapRegion* hr) {
1454     HeapWord* ntams = hr->next_top_at_mark_start();
1455     HeapWord* top   = hr->top();
1456 
1457     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1458 
1459     // Mark the allocated-since-marking portion...
1460     if (ntams < top) {
1461       // This definitely means the region has live objects.
1462       set_bit_for_region(hr);
1463 
1464       // Now set the bits in the card bitmap for [ntams, top)
1465       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1466       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1467 
1468       // Note: if we're looking at the last region in heap - top
1469       // could be actually just beyond the end of the heap; end_idx
1470       // will then correspond to a (non-existent) card that is also
1471       // just beyond the heap.
1472       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1473         // end of object is not card aligned - increment to cover
1474         // all the cards spanned by the object
1475         end_idx += 1;
1476       }
1477 
1478       assert(end_idx <= _card_bm->size(),
1479              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1480              end_idx, _card_bm->size());
1481       assert(start_idx < _card_bm->size(),
1482              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1483              start_idx, _card_bm->size());
1484 
1485       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1486     }
1487 
1488     // Set the bit for the region if it contains live data
1489     if (hr->next_marked_bytes() > 0) {
1490       set_bit_for_region(hr);
1491     }
1492 
1493     return false;
1494   }
1495 };
1496 
1497 class G1ParFinalCountTask: public AbstractGangTask {
1498 protected:
1499   G1CollectedHeap* _g1h;
1500   G1ConcurrentMark* _cm;
1501   BitMap* _actual_region_bm;
1502   BitMap* _actual_card_bm;
1503 
1504   uint    _n_workers;
1505   HeapRegionClaimer _hrclaimer;
1506 
1507 public:
1508   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1509     : AbstractGangTask("G1 final counting"),
1510       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1511       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1512       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1513   }
1514 
1515   void work(uint worker_id) {
1516     assert(worker_id < _n_workers, "invariant");
1517 
1518     FinalCountDataUpdateClosure final_update_cl(_g1h,
1519                                                 _actual_region_bm,
1520                                                 _actual_card_bm);
1521 
1522     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1523   }
1524 };
1525 
1526 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1527   G1CollectedHeap* _g1;
1528   size_t _freed_bytes;
1529   FreeRegionList* _local_cleanup_list;
1530   uint _old_regions_removed;
1531   uint _humongous_regions_removed;
1532   HRRSCleanupTask* _hrrs_cleanup_task;
1533 
1534 public:
1535   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1536                              FreeRegionList* local_cleanup_list,
1537                              HRRSCleanupTask* hrrs_cleanup_task) :
1538     _g1(g1),
1539     _freed_bytes(0),
1540     _local_cleanup_list(local_cleanup_list),
1541     _old_regions_removed(0),
1542     _humongous_regions_removed(0),
1543     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1544 
1545   size_t freed_bytes() { return _freed_bytes; }
1546   const uint old_regions_removed() { return _old_regions_removed; }
1547   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1548 
1549   bool doHeapRegion(HeapRegion *hr) {
1550     if (hr->is_archive()) {
1551       return false;
1552     }
1553     // We use a claim value of zero here because all regions
1554     // were claimed with value 1 in the FinalCount task.
1555     _g1->reset_gc_time_stamps(hr);
1556     hr->note_end_of_marking();
1557 
1558     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1559       _freed_bytes += hr->used();
1560       hr->set_containing_set(NULL);
1561       if (hr->is_humongous()) {
1562         _humongous_regions_removed++;
1563         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1564       } else {
1565         _old_regions_removed++;
1566         _g1->free_region(hr, _local_cleanup_list, true);
1567       }
1568     } else {
1569       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1570     }
1571 
1572     return false;
1573   }
1574 };
1575 
1576 class G1ParNoteEndTask: public AbstractGangTask {
1577   friend class G1NoteEndOfConcMarkClosure;
1578 
1579 protected:
1580   G1CollectedHeap* _g1h;
1581   FreeRegionList* _cleanup_list;
1582   HeapRegionClaimer _hrclaimer;
1583 
1584 public:
1585   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1586       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1587   }
1588 
1589   void work(uint worker_id) {
1590     FreeRegionList local_cleanup_list("Local Cleanup List");
1591     HRRSCleanupTask hrrs_cleanup_task;
1592     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1593                                            &hrrs_cleanup_task);
1594     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1595     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1596 
1597     // Now update the lists
1598     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1599     {
1600       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1601       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1602 
1603       // If we iterate over the global cleanup list at the end of
1604       // cleanup to do this printing we will not guarantee to only
1605       // generate output for the newly-reclaimed regions (the list
1606       // might not be empty at the beginning of cleanup; we might
1607       // still be working on its previous contents). So we do the
1608       // printing here, before we append the new regions to the global
1609       // cleanup list.
1610 
1611       G1HRPrinter* hr_printer = _g1h->hr_printer();
1612       if (hr_printer->is_active()) {
1613         FreeRegionListIterator iter(&local_cleanup_list);
1614         while (iter.more_available()) {
1615           HeapRegion* hr = iter.get_next();
1616           hr_printer->cleanup(hr);
1617         }
1618       }
1619 
1620       _cleanup_list->add_ordered(&local_cleanup_list);
1621       assert(local_cleanup_list.is_empty(), "post-condition");
1622 
1623       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1624     }
1625   }
1626 };
1627 
1628 void G1ConcurrentMark::cleanup() {
1629   // world is stopped at this checkpoint
1630   assert(SafepointSynchronize::is_at_safepoint(),
1631          "world should be stopped");
1632   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1633 
1634   // If a full collection has happened, we shouldn't do this.
1635   if (has_aborted()) {
1636     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1637     return;
1638   }
1639 
1640   g1h->verifier()->verify_region_sets_optional();
1641 
1642   if (VerifyDuringGC) {
1643     HandleMark hm;  // handle scope
1644     g1h->prepare_for_verify();
1645     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1646   }
1647   g1h->verifier()->check_bitmaps("Cleanup Start");
1648 
1649   G1CollectorPolicy* g1p = g1h->g1_policy();
1650   g1p->record_concurrent_mark_cleanup_start();
1651 
1652   double start = os::elapsedTime();
1653 
1654   HeapRegionRemSet::reset_for_cleanup_tasks();
1655 
1656   // Do counting once more with the world stopped for good measure.
1657   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1658 
1659   g1h->workers()->run_task(&g1_par_count_task);
1660 
1661   if (VerifyDuringGC) {
1662     // Verify that the counting data accumulated during marking matches
1663     // that calculated by walking the marking bitmap.
1664 
1665     // Bitmaps to hold expected values
1666     BitMap expected_region_bm(_region_bm.size(), true);
1667     BitMap expected_card_bm(_card_bm.size(), true);
1668 
1669     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1670                                                  &_region_bm,
1671                                                  &_card_bm,
1672                                                  &expected_region_bm,
1673                                                  &expected_card_bm);
1674 
1675     g1h->workers()->run_task(&g1_par_verify_task);
1676 
1677     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1678   }
1679 
1680   size_t start_used_bytes = g1h->used();
1681   g1h->collector_state()->set_mark_in_progress(false);
1682 
1683   double count_end = os::elapsedTime();
1684   double this_final_counting_time = (count_end - start);
1685   _total_counting_time += this_final_counting_time;
1686 
1687   if (log_is_enabled(Trace, gc, liveness)) {
1688     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1689     _g1h->heap_region_iterate(&cl);
1690   }
1691 
1692   // Install newly created mark bitMap as "prev".
1693   swapMarkBitMaps();
1694 
1695   g1h->reset_gc_time_stamp();
1696 
1697   uint n_workers = _g1h->workers()->active_workers();
1698 
1699   // Note end of marking in all heap regions.
1700   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1701   g1h->workers()->run_task(&g1_par_note_end_task);
1702   g1h->check_gc_time_stamps();
1703 
1704   if (!cleanup_list_is_empty()) {
1705     // The cleanup list is not empty, so we'll have to process it
1706     // concurrently. Notify anyone else that might be wanting free
1707     // regions that there will be more free regions coming soon.
1708     g1h->set_free_regions_coming();
1709   }
1710 
1711   // call below, since it affects the metric by which we sort the heap
1712   // regions.
1713   if (G1ScrubRemSets) {
1714     double rs_scrub_start = os::elapsedTime();
1715     g1h->scrub_rem_set(&_region_bm, &_card_bm);
1716     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1717   }
1718 
1719   // this will also free any regions totally full of garbage objects,
1720   // and sort the regions.
1721   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1722 
1723   // Statistics.
1724   double end = os::elapsedTime();
1725   _cleanup_times.add((end - start) * 1000.0);
1726 
1727   // Clean up will have freed any regions completely full of garbage.
1728   // Update the soft reference policy with the new heap occupancy.
1729   Universe::update_heap_info_at_gc();
1730 
1731   if (VerifyDuringGC) {
1732     HandleMark hm;  // handle scope
1733     g1h->prepare_for_verify();
1734     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1735   }
1736 
1737   g1h->verifier()->check_bitmaps("Cleanup End");
1738 
1739   g1h->verifier()->verify_region_sets_optional();
1740 
1741   // We need to make this be a "collection" so any collection pause that
1742   // races with it goes around and waits for completeCleanup to finish.
1743   g1h->increment_total_collections();
1744 
1745   // Clean out dead classes and update Metaspace sizes.
1746   if (ClassUnloadingWithConcurrentMark) {
1747     ClassLoaderDataGraph::purge();
1748   }
1749   MetaspaceGC::compute_new_size();
1750 
1751   // We reclaimed old regions so we should calculate the sizes to make
1752   // sure we update the old gen/space data.
1753   g1h->g1mm()->update_sizes();
1754   g1h->allocation_context_stats().update_after_mark();
1755 
1756   g1h->trace_heap_after_concurrent_cycle();
1757 }
1758 
1759 void G1ConcurrentMark::completeCleanup() {
1760   if (has_aborted()) return;
1761 
1762   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1763 
1764   _cleanup_list.verify_optional();
1765   FreeRegionList tmp_free_list("Tmp Free List");
1766 
1767   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1768                                   "cleanup list has %u entries",
1769                                   _cleanup_list.length());
1770 
1771   // No one else should be accessing the _cleanup_list at this point,
1772   // so it is not necessary to take any locks
1773   while (!_cleanup_list.is_empty()) {
1774     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1775     assert(hr != NULL, "Got NULL from a non-empty list");
1776     hr->par_clear();
1777     tmp_free_list.add_ordered(hr);
1778 
1779     // Instead of adding one region at a time to the secondary_free_list,
1780     // we accumulate them in the local list and move them a few at a
1781     // time. This also cuts down on the number of notify_all() calls
1782     // we do during this process. We'll also append the local list when
1783     // _cleanup_list is empty (which means we just removed the last
1784     // region from the _cleanup_list).
1785     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1786         _cleanup_list.is_empty()) {
1787       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1788                                       "appending %u entries to the secondary_free_list, "
1789                                       "cleanup list still has %u entries",
1790                                       tmp_free_list.length(),
1791                                       _cleanup_list.length());
1792 
1793       {
1794         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1795         g1h->secondary_free_list_add(&tmp_free_list);
1796         SecondaryFreeList_lock->notify_all();
1797       }
1798 #ifndef PRODUCT
1799       if (G1StressConcRegionFreeing) {
1800         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1801           os::sleep(Thread::current(), (jlong) 1, false);
1802         }
1803       }
1804 #endif
1805     }
1806   }
1807   assert(tmp_free_list.is_empty(), "post-condition");
1808 }
1809 
1810 // Supporting Object and Oop closures for reference discovery
1811 // and processing in during marking
1812 
1813 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1814   HeapWord* addr = (HeapWord*)obj;
1815   return addr != NULL &&
1816          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1817 }
1818 
1819 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1820 // Uses the G1CMTask associated with a worker thread (for serial reference
1821 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1822 // trace referent objects.
1823 //
1824 // Using the G1CMTask and embedded local queues avoids having the worker
1825 // threads operating on the global mark stack. This reduces the risk
1826 // of overflowing the stack - which we would rather avoid at this late
1827 // state. Also using the tasks' local queues removes the potential
1828 // of the workers interfering with each other that could occur if
1829 // operating on the global stack.
1830 
1831 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1832   G1ConcurrentMark* _cm;
1833   G1CMTask*         _task;
1834   int               _ref_counter_limit;
1835   int               _ref_counter;
1836   bool              _is_serial;
1837  public:
1838   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1839     _cm(cm), _task(task), _is_serial(is_serial),
1840     _ref_counter_limit(G1RefProcDrainInterval) {
1841     assert(_ref_counter_limit > 0, "sanity");
1842     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1843     _ref_counter = _ref_counter_limit;
1844   }
1845 
1846   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1847   virtual void do_oop(      oop* p) { do_oop_work(p); }
1848 
1849   template <class T> void do_oop_work(T* p) {
1850     if (!_cm->has_overflown()) {
1851       oop obj = oopDesc::load_decode_heap_oop(p);
1852       _task->deal_with_reference(obj);
1853       _ref_counter--;
1854 
1855       if (_ref_counter == 0) {
1856         // We have dealt with _ref_counter_limit references, pushing them
1857         // and objects reachable from them on to the local stack (and
1858         // possibly the global stack). Call G1CMTask::do_marking_step() to
1859         // process these entries.
1860         //
1861         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1862         // there's nothing more to do (i.e. we're done with the entries that
1863         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1864         // above) or we overflow.
1865         //
1866         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1867         // flag while there may still be some work to do. (See the comment at
1868         // the beginning of G1CMTask::do_marking_step() for those conditions -
1869         // one of which is reaching the specified time target.) It is only
1870         // when G1CMTask::do_marking_step() returns without setting the
1871         // has_aborted() flag that the marking step has completed.
1872         do {
1873           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1874           _task->do_marking_step(mark_step_duration_ms,
1875                                  false      /* do_termination */,
1876                                  _is_serial);
1877         } while (_task->has_aborted() && !_cm->has_overflown());
1878         _ref_counter = _ref_counter_limit;
1879       }
1880     }
1881   }
1882 };
1883 
1884 // 'Drain' oop closure used by both serial and parallel reference processing.
1885 // Uses the G1CMTask associated with a given worker thread (for serial
1886 // reference processing the G1CMtask for worker 0 is used). Calls the
1887 // do_marking_step routine, with an unbelievably large timeout value,
1888 // to drain the marking data structures of the remaining entries
1889 // added by the 'keep alive' oop closure above.
1890 
1891 class G1CMDrainMarkingStackClosure: public VoidClosure {
1892   G1ConcurrentMark* _cm;
1893   G1CMTask*         _task;
1894   bool              _is_serial;
1895  public:
1896   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1897     _cm(cm), _task(task), _is_serial(is_serial) {
1898     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1899   }
1900 
1901   void do_void() {
1902     do {
1903       // We call G1CMTask::do_marking_step() to completely drain the local
1904       // and global marking stacks of entries pushed by the 'keep alive'
1905       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1906       //
1907       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1908       // if there's nothing more to do (i.e. we've completely drained the
1909       // entries that were pushed as a a result of applying the 'keep alive'
1910       // closure to the entries on the discovered ref lists) or we overflow
1911       // the global marking stack.
1912       //
1913       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1914       // flag while there may still be some work to do. (See the comment at
1915       // the beginning of G1CMTask::do_marking_step() for those conditions -
1916       // one of which is reaching the specified time target.) It is only
1917       // when G1CMTask::do_marking_step() returns without setting the
1918       // has_aborted() flag that the marking step has completed.
1919 
1920       _task->do_marking_step(1000000000.0 /* something very large */,
1921                              true         /* do_termination */,
1922                              _is_serial);
1923     } while (_task->has_aborted() && !_cm->has_overflown());
1924   }
1925 };
1926 
1927 // Implementation of AbstractRefProcTaskExecutor for parallel
1928 // reference processing at the end of G1 concurrent marking
1929 
1930 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1931 private:
1932   G1CollectedHeap*  _g1h;
1933   G1ConcurrentMark* _cm;
1934   WorkGang*         _workers;
1935   uint              _active_workers;
1936 
1937 public:
1938   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1939                           G1ConcurrentMark* cm,
1940                           WorkGang* workers,
1941                           uint n_workers) :
1942     _g1h(g1h), _cm(cm),
1943     _workers(workers), _active_workers(n_workers) { }
1944 
1945   // Executes the given task using concurrent marking worker threads.
1946   virtual void execute(ProcessTask& task);
1947   virtual void execute(EnqueueTask& task);
1948 };
1949 
1950 class G1CMRefProcTaskProxy: public AbstractGangTask {
1951   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1952   ProcessTask&      _proc_task;
1953   G1CollectedHeap*  _g1h;
1954   G1ConcurrentMark* _cm;
1955 
1956 public:
1957   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1958                        G1CollectedHeap* g1h,
1959                        G1ConcurrentMark* cm) :
1960     AbstractGangTask("Process reference objects in parallel"),
1961     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1962     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1963     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1964   }
1965 
1966   virtual void work(uint worker_id) {
1967     ResourceMark rm;
1968     HandleMark hm;
1969     G1CMTask* task = _cm->task(worker_id);
1970     G1CMIsAliveClosure g1_is_alive(_g1h);
1971     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1972     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1973 
1974     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1975   }
1976 };
1977 
1978 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1979   assert(_workers != NULL, "Need parallel worker threads.");
1980   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1981 
1982   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1983 
1984   // We need to reset the concurrency level before each
1985   // proxy task execution, so that the termination protocol
1986   // and overflow handling in G1CMTask::do_marking_step() knows
1987   // how many workers to wait for.
1988   _cm->set_concurrency(_active_workers);
1989   _workers->run_task(&proc_task_proxy);
1990 }
1991 
1992 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1993   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1994   EnqueueTask& _enq_task;
1995 
1996 public:
1997   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1998     AbstractGangTask("Enqueue reference objects in parallel"),
1999     _enq_task(enq_task) { }
2000 
2001   virtual void work(uint worker_id) {
2002     _enq_task.work(worker_id);
2003   }
2004 };
2005 
2006 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2007   assert(_workers != NULL, "Need parallel worker threads.");
2008   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2009 
2010   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2011 
2012   // Not strictly necessary but...
2013   //
2014   // We need to reset the concurrency level before each
2015   // proxy task execution, so that the termination protocol
2016   // and overflow handling in G1CMTask::do_marking_step() knows
2017   // how many workers to wait for.
2018   _cm->set_concurrency(_active_workers);
2019   _workers->run_task(&enq_task_proxy);
2020 }
2021 
2022 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2023   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2024 }
2025 
2026 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2027   if (has_overflown()) {
2028     // Skip processing the discovered references if we have
2029     // overflown the global marking stack. Reference objects
2030     // only get discovered once so it is OK to not
2031     // de-populate the discovered reference lists. We could have,
2032     // but the only benefit would be that, when marking restarts,
2033     // less reference objects are discovered.
2034     return;
2035   }
2036 
2037   ResourceMark rm;
2038   HandleMark   hm;
2039 
2040   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2041 
2042   // Is alive closure.
2043   G1CMIsAliveClosure g1_is_alive(g1h);
2044 
2045   // Inner scope to exclude the cleaning of the string and symbol
2046   // tables from the displayed time.
2047   {
2048     GCTraceTime(Debug, gc) trace("GC Ref Proc", g1h->gc_timer_cm());
2049 
2050     ReferenceProcessor* rp = g1h->ref_processor_cm();
2051 
2052     // See the comment in G1CollectedHeap::ref_processing_init()
2053     // about how reference processing currently works in G1.
2054 
2055     // Set the soft reference policy
2056     rp->setup_policy(clear_all_soft_refs);
2057     assert(_markStack.isEmpty(), "mark stack should be empty");
2058 
2059     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2060     // in serial reference processing. Note these closures are also
2061     // used for serially processing (by the the current thread) the
2062     // JNI references during parallel reference processing.
2063     //
2064     // These closures do not need to synchronize with the worker
2065     // threads involved in parallel reference processing as these
2066     // instances are executed serially by the current thread (e.g.
2067     // reference processing is not multi-threaded and is thus
2068     // performed by the current thread instead of a gang worker).
2069     //
2070     // The gang tasks involved in parallel reference processing create
2071     // their own instances of these closures, which do their own
2072     // synchronization among themselves.
2073     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2074     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2075 
2076     // We need at least one active thread. If reference processing
2077     // is not multi-threaded we use the current (VMThread) thread,
2078     // otherwise we use the work gang from the G1CollectedHeap and
2079     // we utilize all the worker threads we can.
2080     bool processing_is_mt = rp->processing_is_mt();
2081     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2082     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2083 
2084     // Parallel processing task executor.
2085     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2086                                               g1h->workers(), active_workers);
2087     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2088 
2089     // Set the concurrency level. The phase was already set prior to
2090     // executing the remark task.
2091     set_concurrency(active_workers);
2092 
2093     // Set the degree of MT processing here.  If the discovery was done MT,
2094     // the number of threads involved during discovery could differ from
2095     // the number of active workers.  This is OK as long as the discovered
2096     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2097     rp->set_active_mt_degree(active_workers);
2098 
2099     // Process the weak references.
2100     const ReferenceProcessorStats& stats =
2101         rp->process_discovered_references(&g1_is_alive,
2102                                           &g1_keep_alive,
2103                                           &g1_drain_mark_stack,
2104                                           executor,
2105                                           g1h->gc_timer_cm());
2106     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2107 
2108     // The do_oop work routines of the keep_alive and drain_marking_stack
2109     // oop closures will set the has_overflown flag if we overflow the
2110     // global marking stack.
2111 
2112     assert(_markStack.overflow() || _markStack.isEmpty(),
2113             "mark stack should be empty (unless it overflowed)");
2114 
2115     if (_markStack.overflow()) {
2116       // This should have been done already when we tried to push an
2117       // entry on to the global mark stack. But let's do it again.
2118       set_has_overflown();
2119     }
2120 
2121     assert(rp->num_q() == active_workers, "why not");
2122 
2123     rp->enqueue_discovered_references(executor);
2124 
2125     rp->verify_no_references_recorded();
2126     assert(!rp->discovery_enabled(), "Post condition");
2127   }
2128 
2129   if (has_overflown()) {
2130     // We can not trust g1_is_alive if the marking stack overflowed
2131     return;
2132   }
2133 
2134   assert(_markStack.isEmpty(), "Marking should have completed");
2135 
2136   // Unload Klasses, String, Symbols, Code Cache, etc.
2137   {
2138     GCTraceTime(Debug, gc) trace("Unloading", g1h->gc_timer_cm());
2139 
2140     if (ClassUnloadingWithConcurrentMark) {
2141       bool purged_classes;
2142 
2143       {
2144         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", g1h->gc_timer_cm());
2145         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2146       }
2147 
2148       {
2149         GCTraceTime(Trace, gc) trace("Parallel Unloading", g1h->gc_timer_cm());
2150         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2151       }
2152     }
2153 
2154     if (G1StringDedup::is_enabled()) {
2155       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", g1h->gc_timer_cm());
2156       G1StringDedup::unlink(&g1_is_alive);
2157     }
2158   }
2159 }
2160 
2161 void G1ConcurrentMark::swapMarkBitMaps() {
2162   G1CMBitMapRO* temp = _prevMarkBitMap;
2163   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
2164   _nextMarkBitMap    = (G1CMBitMap*)  temp;
2165 }
2166 
2167 // Closure for marking entries in SATB buffers.
2168 class G1CMSATBBufferClosure : public SATBBufferClosure {
2169 private:
2170   G1CMTask* _task;
2171   G1CollectedHeap* _g1h;
2172 
2173   // This is very similar to G1CMTask::deal_with_reference, but with
2174   // more relaxed requirements for the argument, so this must be more
2175   // circumspect about treating the argument as an object.
2176   void do_entry(void* entry) const {
2177     _task->increment_refs_reached();
2178     HeapRegion* hr = _g1h->heap_region_containing(entry);
2179     if (entry < hr->next_top_at_mark_start()) {
2180       // Until we get here, we don't know whether entry refers to a valid
2181       // object; it could instead have been a stale reference.
2182       oop obj = static_cast<oop>(entry);
2183       assert(obj->is_oop(true /* ignore mark word */),
2184              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2185       _task->make_reference_grey(obj, hr);
2186     }
2187   }
2188 
2189 public:
2190   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
2191     : _task(task), _g1h(g1h) { }
2192 
2193   virtual void do_buffer(void** buffer, size_t size) {
2194     for (size_t i = 0; i < size; ++i) {
2195       do_entry(buffer[i]);
2196     }
2197   }
2198 };
2199 
2200 class G1RemarkThreadsClosure : public ThreadClosure {
2201   G1CMSATBBufferClosure _cm_satb_cl;
2202   G1CMOopClosure _cm_cl;
2203   MarkingCodeBlobClosure _code_cl;
2204   int _thread_parity;
2205 
2206  public:
2207   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
2208     _cm_satb_cl(task, g1h),
2209     _cm_cl(g1h, g1h->concurrent_mark(), task),
2210     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2211     _thread_parity(Threads::thread_claim_parity()) {}
2212 
2213   void do_thread(Thread* thread) {
2214     if (thread->is_Java_thread()) {
2215       if (thread->claim_oops_do(true, _thread_parity)) {
2216         JavaThread* jt = (JavaThread*)thread;
2217 
2218         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2219         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2220         // * Alive if on the stack of an executing method
2221         // * Weakly reachable otherwise
2222         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2223         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2224         jt->nmethods_do(&_code_cl);
2225 
2226         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2227       }
2228     } else if (thread->is_VM_thread()) {
2229       if (thread->claim_oops_do(true, _thread_parity)) {
2230         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2231       }
2232     }
2233   }
2234 };
2235 
2236 class G1CMRemarkTask: public AbstractGangTask {
2237 private:
2238   G1ConcurrentMark* _cm;
2239 public:
2240   void work(uint worker_id) {
2241     // Since all available tasks are actually started, we should
2242     // only proceed if we're supposed to be active.
2243     if (worker_id < _cm->active_tasks()) {
2244       G1CMTask* task = _cm->task(worker_id);
2245       task->record_start_time();
2246       {
2247         ResourceMark rm;
2248         HandleMark hm;
2249 
2250         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2251         Threads::threads_do(&threads_f);
2252       }
2253 
2254       do {
2255         task->do_marking_step(1000000000.0 /* something very large */,
2256                               true         /* do_termination       */,
2257                               false        /* is_serial            */);
2258       } while (task->has_aborted() && !_cm->has_overflown());
2259       // If we overflow, then we do not want to restart. We instead
2260       // want to abort remark and do concurrent marking again.
2261       task->record_end_time();
2262     }
2263   }
2264 
2265   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
2266     AbstractGangTask("Par Remark"), _cm(cm) {
2267     _cm->terminator()->reset_for_reuse(active_workers);
2268   }
2269 };
2270 
2271 void G1ConcurrentMark::checkpointRootsFinalWork() {
2272   ResourceMark rm;
2273   HandleMark   hm;
2274   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2275 
2276   GCTraceTime(Debug, gc) trace("Finalize Marking", g1h->gc_timer_cm());
2277 
2278   g1h->ensure_parsability(false);
2279 
2280   // this is remark, so we'll use up all active threads
2281   uint active_workers = g1h->workers()->active_workers();
2282   set_concurrency_and_phase(active_workers, false /* concurrent */);
2283   // Leave _parallel_marking_threads at it's
2284   // value originally calculated in the G1ConcurrentMark
2285   // constructor and pass values of the active workers
2286   // through the gang in the task.
2287 
2288   {
2289     StrongRootsScope srs(active_workers);
2290 
2291     G1CMRemarkTask remarkTask(this, active_workers);
2292     // We will start all available threads, even if we decide that the
2293     // active_workers will be fewer. The extra ones will just bail out
2294     // immediately.
2295     g1h->workers()->run_task(&remarkTask);
2296   }
2297 
2298   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2299   guarantee(has_overflown() ||
2300             satb_mq_set.completed_buffers_num() == 0,
2301             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
2302             BOOL_TO_STR(has_overflown()),
2303             satb_mq_set.completed_buffers_num());
2304 
2305   print_stats();
2306 }
2307 
2308 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2309   // Note we are overriding the read-only view of the prev map here, via
2310   // the cast.
2311   ((G1CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2312 }
2313 
2314 HeapRegion*
2315 G1ConcurrentMark::claim_region(uint worker_id) {
2316   // "checkpoint" the finger
2317   HeapWord* finger = _finger;
2318 
2319   // _heap_end will not change underneath our feet; it only changes at
2320   // yield points.
2321   while (finger < _heap_end) {
2322     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2323 
2324     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2325 
2326     // Above heap_region_containing may return NULL as we always scan claim
2327     // until the end of the heap. In this case, just jump to the next region.
2328     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2329 
2330     // Is the gap between reading the finger and doing the CAS too long?
2331     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2332     if (res == finger && curr_region != NULL) {
2333       // we succeeded
2334       HeapWord*   bottom        = curr_region->bottom();
2335       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2336 
2337       // notice that _finger == end cannot be guaranteed here since,
2338       // someone else might have moved the finger even further
2339       assert(_finger >= end, "the finger should have moved forward");
2340 
2341       if (limit > bottom) {
2342         return curr_region;
2343       } else {
2344         assert(limit == bottom,
2345                "the region limit should be at bottom");
2346         // we return NULL and the caller should try calling
2347         // claim_region() again.
2348         return NULL;
2349       }
2350     } else {
2351       assert(_finger > finger, "the finger should have moved forward");
2352       // read it again
2353       finger = _finger;
2354     }
2355   }
2356 
2357   return NULL;
2358 }
2359 
2360 #ifndef PRODUCT
2361 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2362 private:
2363   G1CollectedHeap* _g1h;
2364   const char* _phase;
2365   int _info;
2366 
2367 public:
2368   VerifyNoCSetOops(const char* phase, int info = -1) :
2369     _g1h(G1CollectedHeap::heap()),
2370     _phase(phase),
2371     _info(info)
2372   { }
2373 
2374   void operator()(oop obj) const {
2375     guarantee(obj->is_oop(),
2376               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2377               p2i(obj), _phase, _info);
2378     guarantee(!_g1h->obj_in_cs(obj),
2379               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2380               p2i(obj), _phase, _info);
2381   }
2382 };
2383 
2384 void G1ConcurrentMark::verify_no_cset_oops() {
2385   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2386   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2387     return;
2388   }
2389 
2390   // Verify entries on the global mark stack
2391   _markStack.iterate(VerifyNoCSetOops("Stack"));
2392 
2393   // Verify entries on the task queues
2394   for (uint i = 0; i < _max_worker_id; ++i) {
2395     G1CMTaskQueue* queue = _task_queues->queue(i);
2396     queue->iterate(VerifyNoCSetOops("Queue", i));
2397   }
2398 
2399   // Verify the global finger
2400   HeapWord* global_finger = finger();
2401   if (global_finger != NULL && global_finger < _heap_end) {
2402     // Since we always iterate over all regions, we might get a NULL HeapRegion
2403     // here.
2404     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2405     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2406               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2407               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2408   }
2409 
2410   // Verify the task fingers
2411   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2412   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2413     G1CMTask* task = _tasks[i];
2414     HeapWord* task_finger = task->finger();
2415     if (task_finger != NULL && task_finger < _heap_end) {
2416       // See above note on the global finger verification.
2417       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2418       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2419                 !task_hr->in_collection_set(),
2420                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2421                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2422     }
2423   }
2424 }
2425 #endif // PRODUCT
2426 
2427 // Aggregate the counting data that was constructed concurrently
2428 // with marking.
2429 class AggregateCountDataHRClosure: public HeapRegionClosure {
2430   G1CollectedHeap* _g1h;
2431   G1ConcurrentMark* _cm;
2432   CardTableModRefBS* _ct_bs;
2433   BitMap* _cm_card_bm;
2434   uint _max_worker_id;
2435 
2436  public:
2437   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2438                               BitMap* cm_card_bm,
2439                               uint max_worker_id) :
2440     _g1h(g1h), _cm(g1h->concurrent_mark()),
2441     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2442     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2443 
2444   bool doHeapRegion(HeapRegion* hr) {
2445     HeapWord* start = hr->bottom();
2446     HeapWord* limit = hr->next_top_at_mark_start();
2447     HeapWord* end = hr->end();
2448 
2449     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2450            "Preconditions not met - "
2451            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2452            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2453            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2454 
2455     assert(hr->next_marked_bytes() == 0, "Precondition");
2456 
2457     if (start == limit) {
2458       // NTAMS of this region has not been set so nothing to do.
2459       return false;
2460     }
2461 
2462     // 'start' should be in the heap.
2463     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2464     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2465     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2466 
2467     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2468     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2469     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2470 
2471     // If ntams is not card aligned then we bump card bitmap index
2472     // for limit so that we get the all the cards spanned by
2473     // the object ending at ntams.
2474     // Note: if this is the last region in the heap then ntams
2475     // could be actually just beyond the end of the the heap;
2476     // limit_idx will then  correspond to a (non-existent) card
2477     // that is also outside the heap.
2478     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2479       limit_idx += 1;
2480     }
2481 
2482     assert(limit_idx <= end_idx, "or else use atomics");
2483 
2484     // Aggregate the "stripe" in the count data associated with hr.
2485     uint hrm_index = hr->hrm_index();
2486     size_t marked_bytes = 0;
2487 
2488     for (uint i = 0; i < _max_worker_id; i += 1) {
2489       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2490       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2491 
2492       // Fetch the marked_bytes in this region for task i and
2493       // add it to the running total for this region.
2494       marked_bytes += marked_bytes_array[hrm_index];
2495 
2496       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2497       // into the global card bitmap.
2498       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2499 
2500       while (scan_idx < limit_idx) {
2501         assert(task_card_bm->at(scan_idx) == true, "should be");
2502         _cm_card_bm->set_bit(scan_idx);
2503         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2504 
2505         // BitMap::get_next_one_offset() can handle the case when
2506         // its left_offset parameter is greater than its right_offset
2507         // parameter. It does, however, have an early exit if
2508         // left_offset == right_offset. So let's limit the value
2509         // passed in for left offset here.
2510         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2511         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2512       }
2513     }
2514 
2515     // Update the marked bytes for this region.
2516     hr->add_to_marked_bytes(marked_bytes);
2517 
2518     // Next heap region
2519     return false;
2520   }
2521 };
2522 
2523 class G1AggregateCountDataTask: public AbstractGangTask {
2524 protected:
2525   G1CollectedHeap* _g1h;
2526   G1ConcurrentMark* _cm;
2527   BitMap* _cm_card_bm;
2528   uint _max_worker_id;
2529   uint _active_workers;
2530   HeapRegionClaimer _hrclaimer;
2531 
2532 public:
2533   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2534                            G1ConcurrentMark* cm,
2535                            BitMap* cm_card_bm,
2536                            uint max_worker_id,
2537                            uint n_workers) :
2538       AbstractGangTask("Count Aggregation"),
2539       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2540       _max_worker_id(max_worker_id),
2541       _active_workers(n_workers),
2542       _hrclaimer(_active_workers) {
2543   }
2544 
2545   void work(uint worker_id) {
2546     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2547 
2548     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2549   }
2550 };
2551 
2552 
2553 void G1ConcurrentMark::aggregate_count_data() {
2554   uint n_workers = _g1h->workers()->active_workers();
2555 
2556   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2557                                            _max_worker_id, n_workers);
2558 
2559   _g1h->workers()->run_task(&g1_par_agg_task);
2560 }
2561 
2562 // Clear the per-worker arrays used to store the per-region counting data
2563 void G1ConcurrentMark::clear_all_count_data() {
2564   // Clear the global card bitmap - it will be filled during
2565   // liveness count aggregation (during remark) and the
2566   // final counting task.
2567   _card_bm.clear();
2568 
2569   // Clear the global region bitmap - it will be filled as part
2570   // of the final counting task.
2571   _region_bm.clear();
2572 
2573   uint max_regions = _g1h->max_regions();
2574   assert(_max_worker_id > 0, "uninitialized");
2575 
2576   for (uint i = 0; i < _max_worker_id; i += 1) {
2577     BitMap* task_card_bm = count_card_bitmap_for(i);
2578     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2579 
2580     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2581     assert(marked_bytes_array != NULL, "uninitialized");
2582 
2583     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2584     task_card_bm->clear();
2585   }
2586 }
2587 
2588 void G1ConcurrentMark::print_stats() {
2589   if (!log_is_enabled(Debug, gc, stats)) {
2590     return;
2591   }
2592   log_debug(gc, stats)("---------------------------------------------------------------------");
2593   for (size_t i = 0; i < _active_tasks; ++i) {
2594     _tasks[i]->print_stats();
2595     log_debug(gc, stats)("---------------------------------------------------------------------");
2596   }
2597 }
2598 
2599 // abandon current marking iteration due to a Full GC
2600 void G1ConcurrentMark::abort() {
2601   if (!cmThread()->during_cycle() || _has_aborted) {
2602     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2603     return;
2604   }
2605 
2606   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2607   // concurrent bitmap clearing.
2608   _nextMarkBitMap->clearAll();
2609 
2610   // Note we cannot clear the previous marking bitmap here
2611   // since VerifyDuringGC verifies the objects marked during
2612   // a full GC against the previous bitmap.
2613 
2614   // Clear the liveness counting data
2615   clear_all_count_data();
2616   // Empty mark stack
2617   reset_marking_state();
2618   for (uint i = 0; i < _max_worker_id; ++i) {
2619     _tasks[i]->clear_region_fields();
2620   }
2621   _first_overflow_barrier_sync.abort();
2622   _second_overflow_barrier_sync.abort();
2623   _has_aborted = true;
2624 
2625   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2626   satb_mq_set.abandon_partial_marking();
2627   // This can be called either during or outside marking, we'll read
2628   // the expected_active value from the SATB queue set.
2629   satb_mq_set.set_active_all_threads(
2630                                  false, /* new active value */
2631                                  satb_mq_set.is_active() /* expected_active */);
2632 
2633   _g1h->trace_heap_after_concurrent_cycle();



2634 
2635   _g1h->register_concurrent_cycle_end();
2636 }
2637 
2638 static void print_ms_time_info(const char* prefix, const char* name,
2639                                NumberSeq& ns) {
2640   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2641                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2642   if (ns.num() > 0) {
2643     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2644                            prefix, ns.sd(), ns.maximum());
2645   }
2646 }
2647 
2648 void G1ConcurrentMark::print_summary_info() {
2649   LogHandle(gc, marking) log;
2650   if (!log.is_trace()) {
2651     return;
2652   }
2653 
2654   log.trace(" Concurrent marking:");
2655   print_ms_time_info("  ", "init marks", _init_times);
2656   print_ms_time_info("  ", "remarks", _remark_times);
2657   {
2658     print_ms_time_info("     ", "final marks", _remark_mark_times);
2659     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2660 
2661   }
2662   print_ms_time_info("  ", "cleanups", _cleanup_times);
2663   log.trace("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2664             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2665   if (G1ScrubRemSets) {
2666     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2667               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2668   }
2669   log.trace("  Total stop_world time = %8.2f s.",
2670             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2671   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2672             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2673 }
2674 
2675 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2676   _parallel_workers->print_worker_threads_on(st);
2677 }
2678 
2679 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2680   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2681       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2682   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2683   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2684 }
2685 
2686 // We take a break if someone is trying to stop the world.
2687 bool G1ConcurrentMark::do_yield_check(uint worker_id) {
2688   if (SuspendibleThreadSet::should_yield()) {
2689     SuspendibleThreadSet::yield();
2690     return true;
2691   } else {
2692     return false;
2693   }
2694 }
2695 
2696 // Closure for iteration over bitmaps
2697 class G1CMBitMapClosure : public BitMapClosure {
2698 private:
2699   // the bitmap that is being iterated over
2700   G1CMBitMap*                 _nextMarkBitMap;
2701   G1ConcurrentMark*           _cm;
2702   G1CMTask*                   _task;
2703 
2704 public:
2705   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2706     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2707 
2708   bool do_bit(size_t offset) {
2709     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2710     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2711     assert( addr < _cm->finger(), "invariant");
2712     assert(addr >= _task->finger(), "invariant");
2713 
2714     // We move that task's local finger along.
2715     _task->move_finger_to(addr);
2716 
2717     _task->scan_object(oop(addr));
2718     // we only partially drain the local queue and global stack
2719     _task->drain_local_queue(true);
2720     _task->drain_global_stack(true);
2721 
2722     // if the has_aborted flag has been raised, we need to bail out of
2723     // the iteration
2724     return !_task->has_aborted();
2725   }
2726 };
2727 
2728 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2729   ReferenceProcessor* result = g1h->ref_processor_cm();
2730   assert(result != NULL, "CM reference processor should not be NULL");
2731   return result;
2732 }
2733 
2734 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2735                                G1ConcurrentMark* cm,
2736                                G1CMTask* task)
2737   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2738     _g1h(g1h), _cm(cm), _task(task)
2739 { }
2740 
2741 void G1CMTask::setup_for_region(HeapRegion* hr) {
2742   assert(hr != NULL,
2743         "claim_region() should have filtered out NULL regions");
2744   _curr_region  = hr;
2745   _finger       = hr->bottom();
2746   update_region_limit();
2747 }
2748 
2749 void G1CMTask::update_region_limit() {
2750   HeapRegion* hr            = _curr_region;
2751   HeapWord* bottom          = hr->bottom();
2752   HeapWord* limit           = hr->next_top_at_mark_start();
2753 
2754   if (limit == bottom) {
2755     // The region was collected underneath our feet.
2756     // We set the finger to bottom to ensure that the bitmap
2757     // iteration that will follow this will not do anything.
2758     // (this is not a condition that holds when we set the region up,
2759     // as the region is not supposed to be empty in the first place)
2760     _finger = bottom;
2761   } else if (limit >= _region_limit) {
2762     assert(limit >= _finger, "peace of mind");
2763   } else {
2764     assert(limit < _region_limit, "only way to get here");
2765     // This can happen under some pretty unusual circumstances.  An
2766     // evacuation pause empties the region underneath our feet (NTAMS
2767     // at bottom). We then do some allocation in the region (NTAMS
2768     // stays at bottom), followed by the region being used as a GC
2769     // alloc region (NTAMS will move to top() and the objects
2770     // originally below it will be grayed). All objects now marked in
2771     // the region are explicitly grayed, if below the global finger,
2772     // and we do not need in fact to scan anything else. So, we simply
2773     // set _finger to be limit to ensure that the bitmap iteration
2774     // doesn't do anything.
2775     _finger = limit;
2776   }
2777 
2778   _region_limit = limit;
2779 }
2780 
2781 void G1CMTask::giveup_current_region() {
2782   assert(_curr_region != NULL, "invariant");
2783   clear_region_fields();
2784 }
2785 
2786 void G1CMTask::clear_region_fields() {
2787   // Values for these three fields that indicate that we're not
2788   // holding on to a region.
2789   _curr_region   = NULL;
2790   _finger        = NULL;
2791   _region_limit  = NULL;
2792 }
2793 
2794 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2795   if (cm_oop_closure == NULL) {
2796     assert(_cm_oop_closure != NULL, "invariant");
2797   } else {
2798     assert(_cm_oop_closure == NULL, "invariant");
2799   }
2800   _cm_oop_closure = cm_oop_closure;
2801 }
2802 
2803 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2804   guarantee(nextMarkBitMap != NULL, "invariant");
2805   _nextMarkBitMap                = nextMarkBitMap;
2806   clear_region_fields();
2807 
2808   _calls                         = 0;
2809   _elapsed_time_ms               = 0.0;
2810   _termination_time_ms           = 0.0;
2811   _termination_start_time_ms     = 0.0;
2812 }
2813 
2814 bool G1CMTask::should_exit_termination() {
2815   regular_clock_call();
2816   // This is called when we are in the termination protocol. We should
2817   // quit if, for some reason, this task wants to abort or the global
2818   // stack is not empty (this means that we can get work from it).
2819   return !_cm->mark_stack_empty() || has_aborted();
2820 }
2821 
2822 void G1CMTask::reached_limit() {
2823   assert(_words_scanned >= _words_scanned_limit ||
2824          _refs_reached >= _refs_reached_limit ,
2825          "shouldn't have been called otherwise");
2826   regular_clock_call();
2827 }
2828 
2829 void G1CMTask::regular_clock_call() {
2830   if (has_aborted()) return;
2831 
2832   // First, we need to recalculate the words scanned and refs reached
2833   // limits for the next clock call.
2834   recalculate_limits();
2835 
2836   // During the regular clock call we do the following
2837 
2838   // (1) If an overflow has been flagged, then we abort.
2839   if (_cm->has_overflown()) {
2840     set_has_aborted();
2841     return;
2842   }
2843 
2844   // If we are not concurrent (i.e. we're doing remark) we don't need
2845   // to check anything else. The other steps are only needed during
2846   // the concurrent marking phase.
2847   if (!concurrent()) return;
2848 
2849   // (2) If marking has been aborted for Full GC, then we also abort.
2850   if (_cm->has_aborted()) {
2851     set_has_aborted();
2852     return;
2853   }
2854 
2855   double curr_time_ms = os::elapsedVTime() * 1000.0;
2856 
2857   // (4) We check whether we should yield. If we have to, then we abort.
2858   if (SuspendibleThreadSet::should_yield()) {
2859     // We should yield. To do this we abort the task. The caller is
2860     // responsible for yielding.
2861     set_has_aborted();
2862     return;
2863   }
2864 
2865   // (5) We check whether we've reached our time quota. If we have,
2866   // then we abort.
2867   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2868   if (elapsed_time_ms > _time_target_ms) {
2869     set_has_aborted();
2870     _has_timed_out = true;
2871     return;
2872   }
2873 
2874   // (6) Finally, we check whether there are enough completed STAB
2875   // buffers available for processing. If there are, we abort.
2876   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2877   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2878     // we do need to process SATB buffers, we'll abort and restart
2879     // the marking task to do so
2880     set_has_aborted();
2881     return;
2882   }
2883 }
2884 
2885 void G1CMTask::recalculate_limits() {
2886   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2887   _words_scanned_limit      = _real_words_scanned_limit;
2888 
2889   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2890   _refs_reached_limit       = _real_refs_reached_limit;
2891 }
2892 
2893 void G1CMTask::decrease_limits() {
2894   // This is called when we believe that we're going to do an infrequent
2895   // operation which will increase the per byte scanned cost (i.e. move
2896   // entries to/from the global stack). It basically tries to decrease the
2897   // scanning limit so that the clock is called earlier.
2898 
2899   _words_scanned_limit = _real_words_scanned_limit -
2900     3 * words_scanned_period / 4;
2901   _refs_reached_limit  = _real_refs_reached_limit -
2902     3 * refs_reached_period / 4;
2903 }
2904 
2905 void G1CMTask::move_entries_to_global_stack() {
2906   // local array where we'll store the entries that will be popped
2907   // from the local queue
2908   oop buffer[global_stack_transfer_size];
2909 
2910   int n = 0;
2911   oop obj;
2912   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2913     buffer[n] = obj;
2914     ++n;
2915   }
2916 
2917   if (n > 0) {
2918     // we popped at least one entry from the local queue
2919 
2920     if (!_cm->mark_stack_push(buffer, n)) {
2921       set_has_aborted();
2922     }
2923   }
2924 
2925   // this operation was quite expensive, so decrease the limits
2926   decrease_limits();
2927 }
2928 
2929 void G1CMTask::get_entries_from_global_stack() {
2930   // local array where we'll store the entries that will be popped
2931   // from the global stack.
2932   oop buffer[global_stack_transfer_size];
2933   int n;
2934   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2935   assert(n <= global_stack_transfer_size,
2936          "we should not pop more than the given limit");
2937   if (n > 0) {
2938     // yes, we did actually pop at least one entry
2939     for (int i = 0; i < n; ++i) {
2940       bool success = _task_queue->push(buffer[i]);
2941       // We only call this when the local queue is empty or under a
2942       // given target limit. So, we do not expect this push to fail.
2943       assert(success, "invariant");
2944     }
2945   }
2946 
2947   // this operation was quite expensive, so decrease the limits
2948   decrease_limits();
2949 }
2950 
2951 void G1CMTask::drain_local_queue(bool partially) {
2952   if (has_aborted()) return;
2953 
2954   // Decide what the target size is, depending whether we're going to
2955   // drain it partially (so that other tasks can steal if they run out
2956   // of things to do) or totally (at the very end).
2957   size_t target_size;
2958   if (partially) {
2959     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2960   } else {
2961     target_size = 0;
2962   }
2963 
2964   if (_task_queue->size() > target_size) {
2965     oop obj;
2966     bool ret = _task_queue->pop_local(obj);
2967     while (ret) {
2968       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2969       assert(!_g1h->is_on_master_free_list(
2970                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2971 
2972       scan_object(obj);
2973 
2974       if (_task_queue->size() <= target_size || has_aborted()) {
2975         ret = false;
2976       } else {
2977         ret = _task_queue->pop_local(obj);
2978       }
2979     }
2980   }
2981 }
2982 
2983 void G1CMTask::drain_global_stack(bool partially) {
2984   if (has_aborted()) return;
2985 
2986   // We have a policy to drain the local queue before we attempt to
2987   // drain the global stack.
2988   assert(partially || _task_queue->size() == 0, "invariant");
2989 
2990   // Decide what the target size is, depending whether we're going to
2991   // drain it partially (so that other tasks can steal if they run out
2992   // of things to do) or totally (at the very end).  Notice that,
2993   // because we move entries from the global stack in chunks or
2994   // because another task might be doing the same, we might in fact
2995   // drop below the target. But, this is not a problem.
2996   size_t target_size;
2997   if (partially) {
2998     target_size = _cm->partial_mark_stack_size_target();
2999   } else {
3000     target_size = 0;
3001   }
3002 
3003   if (_cm->mark_stack_size() > target_size) {
3004     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3005       get_entries_from_global_stack();
3006       drain_local_queue(partially);
3007     }
3008   }
3009 }
3010 
3011 // SATB Queue has several assumptions on whether to call the par or
3012 // non-par versions of the methods. this is why some of the code is
3013 // replicated. We should really get rid of the single-threaded version
3014 // of the code to simplify things.
3015 void G1CMTask::drain_satb_buffers() {
3016   if (has_aborted()) return;
3017 
3018   // We set this so that the regular clock knows that we're in the
3019   // middle of draining buffers and doesn't set the abort flag when it
3020   // notices that SATB buffers are available for draining. It'd be
3021   // very counter productive if it did that. :-)
3022   _draining_satb_buffers = true;
3023 
3024   G1CMSATBBufferClosure satb_cl(this, _g1h);
3025   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3026 
3027   // This keeps claiming and applying the closure to completed buffers
3028   // until we run out of buffers or we need to abort.
3029   while (!has_aborted() &&
3030          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3031     regular_clock_call();
3032   }
3033 
3034   _draining_satb_buffers = false;
3035 
3036   assert(has_aborted() ||
3037          concurrent() ||
3038          satb_mq_set.completed_buffers_num() == 0, "invariant");
3039 
3040   // again, this was a potentially expensive operation, decrease the
3041   // limits to get the regular clock call early
3042   decrease_limits();
3043 }
3044 
3045 void G1CMTask::print_stats() {
3046   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
3047                        _worker_id, _calls);
3048   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3049                        _elapsed_time_ms, _termination_time_ms);
3050   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3051                        _step_times_ms.num(), _step_times_ms.avg(),
3052                        _step_times_ms.sd());
3053   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
3054                        _step_times_ms.maximum(), _step_times_ms.sum());
3055 }
3056 
3057 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3058   return _task_queues->steal(worker_id, hash_seed, obj);
3059 }
3060 
3061 /*****************************************************************************
3062 
3063     The do_marking_step(time_target_ms, ...) method is the building
3064     block of the parallel marking framework. It can be called in parallel
3065     with other invocations of do_marking_step() on different tasks
3066     (but only one per task, obviously) and concurrently with the
3067     mutator threads, or during remark, hence it eliminates the need
3068     for two versions of the code. When called during remark, it will
3069     pick up from where the task left off during the concurrent marking
3070     phase. Interestingly, tasks are also claimable during evacuation
3071     pauses too, since do_marking_step() ensures that it aborts before
3072     it needs to yield.
3073 
3074     The data structures that it uses to do marking work are the
3075     following:
3076 
3077       (1) Marking Bitmap. If there are gray objects that appear only
3078       on the bitmap (this happens either when dealing with an overflow
3079       or when the initial marking phase has simply marked the roots
3080       and didn't push them on the stack), then tasks claim heap
3081       regions whose bitmap they then scan to find gray objects. A
3082       global finger indicates where the end of the last claimed region
3083       is. A local finger indicates how far into the region a task has
3084       scanned. The two fingers are used to determine how to gray an
3085       object (i.e. whether simply marking it is OK, as it will be
3086       visited by a task in the future, or whether it needs to be also
3087       pushed on a stack).
3088 
3089       (2) Local Queue. The local queue of the task which is accessed
3090       reasonably efficiently by the task. Other tasks can steal from
3091       it when they run out of work. Throughout the marking phase, a
3092       task attempts to keep its local queue short but not totally
3093       empty, so that entries are available for stealing by other
3094       tasks. Only when there is no more work, a task will totally
3095       drain its local queue.
3096 
3097       (3) Global Mark Stack. This handles local queue overflow. During
3098       marking only sets of entries are moved between it and the local
3099       queues, as access to it requires a mutex and more fine-grain
3100       interaction with it which might cause contention. If it
3101       overflows, then the marking phase should restart and iterate
3102       over the bitmap to identify gray objects. Throughout the marking
3103       phase, tasks attempt to keep the global mark stack at a small
3104       length but not totally empty, so that entries are available for
3105       popping by other tasks. Only when there is no more work, tasks
3106       will totally drain the global mark stack.
3107 
3108       (4) SATB Buffer Queue. This is where completed SATB buffers are
3109       made available. Buffers are regularly removed from this queue
3110       and scanned for roots, so that the queue doesn't get too
3111       long. During remark, all completed buffers are processed, as
3112       well as the filled in parts of any uncompleted buffers.
3113 
3114     The do_marking_step() method tries to abort when the time target
3115     has been reached. There are a few other cases when the
3116     do_marking_step() method also aborts:
3117 
3118       (1) When the marking phase has been aborted (after a Full GC).
3119 
3120       (2) When a global overflow (on the global stack) has been
3121       triggered. Before the task aborts, it will actually sync up with
3122       the other tasks to ensure that all the marking data structures
3123       (local queues, stacks, fingers etc.)  are re-initialized so that
3124       when do_marking_step() completes, the marking phase can
3125       immediately restart.
3126 
3127       (3) When enough completed SATB buffers are available. The
3128       do_marking_step() method only tries to drain SATB buffers right
3129       at the beginning. So, if enough buffers are available, the
3130       marking step aborts and the SATB buffers are processed at
3131       the beginning of the next invocation.
3132 
3133       (4) To yield. when we have to yield then we abort and yield
3134       right at the end of do_marking_step(). This saves us from a lot
3135       of hassle as, by yielding we might allow a Full GC. If this
3136       happens then objects will be compacted underneath our feet, the
3137       heap might shrink, etc. We save checking for this by just
3138       aborting and doing the yield right at the end.
3139 
3140     From the above it follows that the do_marking_step() method should
3141     be called in a loop (or, otherwise, regularly) until it completes.
3142 
3143     If a marking step completes without its has_aborted() flag being
3144     true, it means it has completed the current marking phase (and
3145     also all other marking tasks have done so and have all synced up).
3146 
3147     A method called regular_clock_call() is invoked "regularly" (in
3148     sub ms intervals) throughout marking. It is this clock method that
3149     checks all the abort conditions which were mentioned above and
3150     decides when the task should abort. A work-based scheme is used to
3151     trigger this clock method: when the number of object words the
3152     marking phase has scanned or the number of references the marking
3153     phase has visited reach a given limit. Additional invocations to
3154     the method clock have been planted in a few other strategic places
3155     too. The initial reason for the clock method was to avoid calling
3156     vtime too regularly, as it is quite expensive. So, once it was in
3157     place, it was natural to piggy-back all the other conditions on it
3158     too and not constantly check them throughout the code.
3159 
3160     If do_termination is true then do_marking_step will enter its
3161     termination protocol.
3162 
3163     The value of is_serial must be true when do_marking_step is being
3164     called serially (i.e. by the VMThread) and do_marking_step should
3165     skip any synchronization in the termination and overflow code.
3166     Examples include the serial remark code and the serial reference
3167     processing closures.
3168 
3169     The value of is_serial must be false when do_marking_step is
3170     being called by any of the worker threads in a work gang.
3171     Examples include the concurrent marking code (CMMarkingTask),
3172     the MT remark code, and the MT reference processing closures.
3173 
3174  *****************************************************************************/
3175 
3176 void G1CMTask::do_marking_step(double time_target_ms,
3177                                bool do_termination,
3178                                bool is_serial) {
3179   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3180   assert(concurrent() == _cm->concurrent(), "they should be the same");
3181 
3182   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3183   assert(_task_queues != NULL, "invariant");
3184   assert(_task_queue != NULL, "invariant");
3185   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3186 
3187   assert(!_claimed,
3188          "only one thread should claim this task at any one time");
3189 
3190   // OK, this doesn't safeguard again all possible scenarios, as it is
3191   // possible for two threads to set the _claimed flag at the same
3192   // time. But it is only for debugging purposes anyway and it will
3193   // catch most problems.
3194   _claimed = true;
3195 
3196   _start_time_ms = os::elapsedVTime() * 1000.0;
3197 
3198   // If do_stealing is true then do_marking_step will attempt to
3199   // steal work from the other G1CMTasks. It only makes sense to
3200   // enable stealing when the termination protocol is enabled
3201   // and do_marking_step() is not being called serially.
3202   bool do_stealing = do_termination && !is_serial;
3203 
3204   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3205   _time_target_ms = time_target_ms - diff_prediction_ms;
3206 
3207   // set up the variables that are used in the work-based scheme to
3208   // call the regular clock method
3209   _words_scanned = 0;
3210   _refs_reached  = 0;
3211   recalculate_limits();
3212 
3213   // clear all flags
3214   clear_has_aborted();
3215   _has_timed_out = false;
3216   _draining_satb_buffers = false;
3217 
3218   ++_calls;
3219 
3220   // Set up the bitmap and oop closures. Anything that uses them is
3221   // eventually called from this method, so it is OK to allocate these
3222   // statically.
3223   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3224   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
3225   set_cm_oop_closure(&cm_oop_closure);
3226 
3227   if (_cm->has_overflown()) {
3228     // This can happen if the mark stack overflows during a GC pause
3229     // and this task, after a yield point, restarts. We have to abort
3230     // as we need to get into the overflow protocol which happens
3231     // right at the end of this task.
3232     set_has_aborted();
3233   }
3234 
3235   // First drain any available SATB buffers. After this, we will not
3236   // look at SATB buffers before the next invocation of this method.
3237   // If enough completed SATB buffers are queued up, the regular clock
3238   // will abort this task so that it restarts.
3239   drain_satb_buffers();
3240   // ...then partially drain the local queue and the global stack
3241   drain_local_queue(true);
3242   drain_global_stack(true);
3243 
3244   do {
3245     if (!has_aborted() && _curr_region != NULL) {
3246       // This means that we're already holding on to a region.
3247       assert(_finger != NULL, "if region is not NULL, then the finger "
3248              "should not be NULL either");
3249 
3250       // We might have restarted this task after an evacuation pause
3251       // which might have evacuated the region we're holding on to
3252       // underneath our feet. Let's read its limit again to make sure
3253       // that we do not iterate over a region of the heap that
3254       // contains garbage (update_region_limit() will also move
3255       // _finger to the start of the region if it is found empty).
3256       update_region_limit();
3257       // We will start from _finger not from the start of the region,
3258       // as we might be restarting this task after aborting half-way
3259       // through scanning this region. In this case, _finger points to
3260       // the address where we last found a marked object. If this is a
3261       // fresh region, _finger points to start().
3262       MemRegion mr = MemRegion(_finger, _region_limit);
3263 
3264       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3265              "humongous regions should go around loop once only");
3266 
3267       // Some special cases:
3268       // If the memory region is empty, we can just give up the region.
3269       // If the current region is humongous then we only need to check
3270       // the bitmap for the bit associated with the start of the object,
3271       // scan the object if it's live, and give up the region.
3272       // Otherwise, let's iterate over the bitmap of the part of the region
3273       // that is left.
3274       // If the iteration is successful, give up the region.
3275       if (mr.is_empty()) {
3276         giveup_current_region();
3277         regular_clock_call();
3278       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3279         if (_nextMarkBitMap->isMarked(mr.start())) {
3280           // The object is marked - apply the closure
3281           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3282           bitmap_closure.do_bit(offset);
3283         }
3284         // Even if this task aborted while scanning the humongous object
3285         // we can (and should) give up the current region.
3286         giveup_current_region();
3287         regular_clock_call();
3288       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3289         giveup_current_region();
3290         regular_clock_call();
3291       } else {
3292         assert(has_aborted(), "currently the only way to do so");
3293         // The only way to abort the bitmap iteration is to return
3294         // false from the do_bit() method. However, inside the
3295         // do_bit() method we move the _finger to point to the
3296         // object currently being looked at. So, if we bail out, we
3297         // have definitely set _finger to something non-null.
3298         assert(_finger != NULL, "invariant");
3299 
3300         // Region iteration was actually aborted. So now _finger
3301         // points to the address of the object we last scanned. If we
3302         // leave it there, when we restart this task, we will rescan
3303         // the object. It is easy to avoid this. We move the finger by
3304         // enough to point to the next possible object header (the
3305         // bitmap knows by how much we need to move it as it knows its
3306         // granularity).
3307         assert(_finger < _region_limit, "invariant");
3308         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3309         // Check if bitmap iteration was aborted while scanning the last object
3310         if (new_finger >= _region_limit) {
3311           giveup_current_region();
3312         } else {
3313           move_finger_to(new_finger);
3314         }
3315       }
3316     }
3317     // At this point we have either completed iterating over the
3318     // region we were holding on to, or we have aborted.
3319 
3320     // We then partially drain the local queue and the global stack.
3321     // (Do we really need this?)
3322     drain_local_queue(true);
3323     drain_global_stack(true);
3324 
3325     // Read the note on the claim_region() method on why it might
3326     // return NULL with potentially more regions available for
3327     // claiming and why we have to check out_of_regions() to determine
3328     // whether we're done or not.
3329     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3330       // We are going to try to claim a new region. We should have
3331       // given up on the previous one.
3332       // Separated the asserts so that we know which one fires.
3333       assert(_curr_region  == NULL, "invariant");
3334       assert(_finger       == NULL, "invariant");
3335       assert(_region_limit == NULL, "invariant");
3336       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3337       if (claimed_region != NULL) {
3338         // Yes, we managed to claim one
3339         setup_for_region(claimed_region);
3340         assert(_curr_region == claimed_region, "invariant");
3341       }
3342       // It is important to call the regular clock here. It might take
3343       // a while to claim a region if, for example, we hit a large
3344       // block of empty regions. So we need to call the regular clock
3345       // method once round the loop to make sure it's called
3346       // frequently enough.
3347       regular_clock_call();
3348     }
3349 
3350     if (!has_aborted() && _curr_region == NULL) {
3351       assert(_cm->out_of_regions(),
3352              "at this point we should be out of regions");
3353     }
3354   } while ( _curr_region != NULL && !has_aborted());
3355 
3356   if (!has_aborted()) {
3357     // We cannot check whether the global stack is empty, since other
3358     // tasks might be pushing objects to it concurrently.
3359     assert(_cm->out_of_regions(),
3360            "at this point we should be out of regions");
3361     // Try to reduce the number of available SATB buffers so that
3362     // remark has less work to do.
3363     drain_satb_buffers();
3364   }
3365 
3366   // Since we've done everything else, we can now totally drain the
3367   // local queue and global stack.
3368   drain_local_queue(false);
3369   drain_global_stack(false);
3370 
3371   // Attempt at work stealing from other task's queues.
3372   if (do_stealing && !has_aborted()) {
3373     // We have not aborted. This means that we have finished all that
3374     // we could. Let's try to do some stealing...
3375 
3376     // We cannot check whether the global stack is empty, since other
3377     // tasks might be pushing objects to it concurrently.
3378     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3379            "only way to reach here");
3380     while (!has_aborted()) {
3381       oop obj;
3382       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3383         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3384                "any stolen object should be marked");
3385         scan_object(obj);
3386 
3387         // And since we're towards the end, let's totally drain the
3388         // local queue and global stack.
3389         drain_local_queue(false);
3390         drain_global_stack(false);
3391       } else {
3392         break;
3393       }
3394     }
3395   }
3396 
3397   // We still haven't aborted. Now, let's try to get into the
3398   // termination protocol.
3399   if (do_termination && !has_aborted()) {
3400     // We cannot check whether the global stack is empty, since other
3401     // tasks might be concurrently pushing objects on it.
3402     // Separated the asserts so that we know which one fires.
3403     assert(_cm->out_of_regions(), "only way to reach here");
3404     assert(_task_queue->size() == 0, "only way to reach here");
3405     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3406 
3407     // The G1CMTask class also extends the TerminatorTerminator class,
3408     // hence its should_exit_termination() method will also decide
3409     // whether to exit the termination protocol or not.
3410     bool finished = (is_serial ||
3411                      _cm->terminator()->offer_termination(this));
3412     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3413     _termination_time_ms +=
3414       termination_end_time_ms - _termination_start_time_ms;
3415 
3416     if (finished) {
3417       // We're all done.
3418 
3419       if (_worker_id == 0) {
3420         // let's allow task 0 to do this
3421         if (concurrent()) {
3422           assert(_cm->concurrent_marking_in_progress(), "invariant");
3423           // we need to set this to false before the next
3424           // safepoint. This way we ensure that the marking phase
3425           // doesn't observe any more heap expansions.
3426           _cm->clear_concurrent_marking_in_progress();
3427         }
3428       }
3429 
3430       // We can now guarantee that the global stack is empty, since
3431       // all other tasks have finished. We separated the guarantees so
3432       // that, if a condition is false, we can immediately find out
3433       // which one.
3434       guarantee(_cm->out_of_regions(), "only way to reach here");
3435       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3436       guarantee(_task_queue->size() == 0, "only way to reach here");
3437       guarantee(!_cm->has_overflown(), "only way to reach here");
3438       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3439     } else {
3440       // Apparently there's more work to do. Let's abort this task. It
3441       // will restart it and we can hopefully find more things to do.
3442       set_has_aborted();
3443     }
3444   }
3445 
3446   // Mainly for debugging purposes to make sure that a pointer to the
3447   // closure which was statically allocated in this frame doesn't
3448   // escape it by accident.
3449   set_cm_oop_closure(NULL);
3450   double end_time_ms = os::elapsedVTime() * 1000.0;
3451   double elapsed_time_ms = end_time_ms - _start_time_ms;
3452   // Update the step history.
3453   _step_times_ms.add(elapsed_time_ms);
3454 
3455   if (has_aborted()) {
3456     // The task was aborted for some reason.
3457     if (_has_timed_out) {
3458       double diff_ms = elapsed_time_ms - _time_target_ms;
3459       // Keep statistics of how well we did with respect to hitting
3460       // our target only if we actually timed out (if we aborted for
3461       // other reasons, then the results might get skewed).
3462       _marking_step_diffs_ms.add(diff_ms);
3463     }
3464 
3465     if (_cm->has_overflown()) {
3466       // This is the interesting one. We aborted because a global
3467       // overflow was raised. This means we have to restart the
3468       // marking phase and start iterating over regions. However, in
3469       // order to do this we have to make sure that all tasks stop
3470       // what they are doing and re-initialize in a safe manner. We
3471       // will achieve this with the use of two barrier sync points.
3472 
3473       if (!is_serial) {
3474         // We only need to enter the sync barrier if being called
3475         // from a parallel context
3476         _cm->enter_first_sync_barrier(_worker_id);
3477 
3478         // When we exit this sync barrier we know that all tasks have
3479         // stopped doing marking work. So, it's now safe to
3480         // re-initialize our data structures. At the end of this method,
3481         // task 0 will clear the global data structures.
3482       }
3483 
3484       // We clear the local state of this task...
3485       clear_region_fields();
3486 
3487       if (!is_serial) {
3488         // ...and enter the second barrier.
3489         _cm->enter_second_sync_barrier(_worker_id);
3490       }
3491       // At this point, if we're during the concurrent phase of
3492       // marking, everything has been re-initialized and we're
3493       // ready to restart.
3494     }
3495   }
3496 
3497   _claimed = false;
3498 }
3499 
3500 G1CMTask::G1CMTask(uint worker_id,
3501                    G1ConcurrentMark* cm,
3502                    size_t* marked_bytes,
3503                    BitMap* card_bm,
3504                    G1CMTaskQueue* task_queue,
3505                    G1CMTaskQueueSet* task_queues)
3506   : _g1h(G1CollectedHeap::heap()),
3507     _worker_id(worker_id), _cm(cm),
3508     _claimed(false),
3509     _nextMarkBitMap(NULL), _hash_seed(17),
3510     _task_queue(task_queue),
3511     _task_queues(task_queues),
3512     _cm_oop_closure(NULL),
3513     _marked_bytes_array(marked_bytes),
3514     _card_bm(card_bm) {
3515   guarantee(task_queue != NULL, "invariant");
3516   guarantee(task_queues != NULL, "invariant");
3517 
3518   _marking_step_diffs_ms.add(0.5);
3519 }
3520 
3521 // These are formatting macros that are used below to ensure
3522 // consistent formatting. The *_H_* versions are used to format the
3523 // header for a particular value and they should be kept consistent
3524 // with the corresponding macro. Also note that most of the macros add
3525 // the necessary white space (as a prefix) which makes them a bit
3526 // easier to compose.
3527 
3528 // All the output lines are prefixed with this string to be able to
3529 // identify them easily in a large log file.
3530 #define G1PPRL_LINE_PREFIX            "###"
3531 
3532 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3533 #ifdef _LP64
3534 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3535 #else // _LP64
3536 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3537 #endif // _LP64
3538 
3539 // For per-region info
3540 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3541 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3542 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3543 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3544 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3545 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3546 
3547 // For summary info
3548 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3549 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3550 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3551 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3552 
3553 G1PrintRegionLivenessInfoClosure::
3554 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3555   : _total_used_bytes(0), _total_capacity_bytes(0),
3556     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3557     _hum_used_bytes(0), _hum_capacity_bytes(0),
3558     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
3559     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3560   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3561   MemRegion g1_reserved = g1h->g1_reserved();
3562   double now = os::elapsedTime();
3563 
3564   // Print the header of the output.
3565   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3566   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3567                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3568                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3569                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3570                           HeapRegion::GrainBytes);
3571   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3572   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3573                           G1PPRL_TYPE_H_FORMAT
3574                           G1PPRL_ADDR_BASE_H_FORMAT
3575                           G1PPRL_BYTE_H_FORMAT
3576                           G1PPRL_BYTE_H_FORMAT
3577                           G1PPRL_BYTE_H_FORMAT
3578                           G1PPRL_DOUBLE_H_FORMAT
3579                           G1PPRL_BYTE_H_FORMAT
3580                           G1PPRL_BYTE_H_FORMAT,
3581                           "type", "address-range",
3582                           "used", "prev-live", "next-live", "gc-eff",
3583                           "remset", "code-roots");
3584   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3585                           G1PPRL_TYPE_H_FORMAT
3586                           G1PPRL_ADDR_BASE_H_FORMAT
3587                           G1PPRL_BYTE_H_FORMAT
3588                           G1PPRL_BYTE_H_FORMAT
3589                           G1PPRL_BYTE_H_FORMAT
3590                           G1PPRL_DOUBLE_H_FORMAT
3591                           G1PPRL_BYTE_H_FORMAT
3592                           G1PPRL_BYTE_H_FORMAT,
3593                           "", "",
3594                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3595                           "(bytes)", "(bytes)");
3596 }
3597 
3598 // It takes as a parameter a reference to one of the _hum_* fields, it
3599 // deduces the corresponding value for a region in a humongous region
3600 // series (either the region size, or what's left if the _hum_* field
3601 // is < the region size), and updates the _hum_* field accordingly.
3602 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
3603   size_t bytes = 0;
3604   // The > 0 check is to deal with the prev and next live bytes which
3605   // could be 0.
3606   if (*hum_bytes > 0) {
3607     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
3608     *hum_bytes -= bytes;
3609   }
3610   return bytes;
3611 }
3612 
3613 // It deduces the values for a region in a humongous region series
3614 // from the _hum_* fields and updates those accordingly. It assumes
3615 // that that _hum_* fields have already been set up from the "starts
3616 // humongous" region and we visit the regions in address order.
3617 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
3618                                                      size_t* capacity_bytes,
3619                                                      size_t* prev_live_bytes,
3620                                                      size_t* next_live_bytes) {
3621   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
3622   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
3623   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
3624   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
3625   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
3626 }
3627 
3628 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3629   const char* type       = r->get_type_str();
3630   HeapWord* bottom       = r->bottom();
3631   HeapWord* end          = r->end();
3632   size_t capacity_bytes  = r->capacity();
3633   size_t used_bytes      = r->used();
3634   size_t prev_live_bytes = r->live_bytes();
3635   size_t next_live_bytes = r->next_live_bytes();
3636   double gc_eff          = r->gc_efficiency();
3637   size_t remset_bytes    = r->rem_set()->mem_size();
3638   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3639 
3640   if (r->is_starts_humongous()) {
3641     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
3642            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
3643            "they should have been zeroed after the last time we used them");
3644     // Set up the _hum_* fields.
3645     _hum_capacity_bytes  = capacity_bytes;
3646     _hum_used_bytes      = used_bytes;
3647     _hum_prev_live_bytes = prev_live_bytes;
3648     _hum_next_live_bytes = next_live_bytes;
3649     get_hum_bytes(&used_bytes, &capacity_bytes,
3650                   &prev_live_bytes, &next_live_bytes);
3651     end = bottom + HeapRegion::GrainWords;
3652   } else if (r->is_continues_humongous()) {
3653     get_hum_bytes(&used_bytes, &capacity_bytes,
3654                   &prev_live_bytes, &next_live_bytes);
3655     assert(end == bottom + HeapRegion::GrainWords, "invariant");
3656   }
3657 
3658   _total_used_bytes      += used_bytes;
3659   _total_capacity_bytes  += capacity_bytes;
3660   _total_prev_live_bytes += prev_live_bytes;
3661   _total_next_live_bytes += next_live_bytes;
3662   _total_remset_bytes    += remset_bytes;
3663   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3664 
3665   // Print a line for this particular region.
3666   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3667                           G1PPRL_TYPE_FORMAT
3668                           G1PPRL_ADDR_BASE_FORMAT
3669                           G1PPRL_BYTE_FORMAT
3670                           G1PPRL_BYTE_FORMAT
3671                           G1PPRL_BYTE_FORMAT
3672                           G1PPRL_DOUBLE_FORMAT
3673                           G1PPRL_BYTE_FORMAT
3674                           G1PPRL_BYTE_FORMAT,
3675                           type, p2i(bottom), p2i(end),
3676                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3677                           remset_bytes, strong_code_roots_bytes);
3678 
3679   return false;
3680 }
3681 
3682 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3683   // add static memory usages to remembered set sizes
3684   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3685   // Print the footer of the output.
3686   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3687   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3688                          " SUMMARY"
3689                          G1PPRL_SUM_MB_FORMAT("capacity")
3690                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3691                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3692                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3693                          G1PPRL_SUM_MB_FORMAT("remset")
3694                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3695                          bytes_to_mb(_total_capacity_bytes),
3696                          bytes_to_mb(_total_used_bytes),
3697                          perc(_total_used_bytes, _total_capacity_bytes),
3698                          bytes_to_mb(_total_prev_live_bytes),
3699                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3700                          bytes_to_mb(_total_next_live_bytes),
3701                          perc(_total_next_live_bytes, _total_capacity_bytes),
3702                          bytes_to_mb(_total_remset_bytes),
3703                          bytes_to_mb(_total_strong_code_roots_bytes));
3704 }
--- EOF ---