1 /*
   2  * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMark.inline.hpp"
  30 #include "gc/g1/concurrentMarkThread.inline.hpp"
  31 #include "gc/g1/g1CollectedHeap.inline.hpp"
  32 #include "gc/g1/g1CollectorPolicy.hpp"
  33 #include "gc/g1/g1CollectorState.hpp"
  34 #include "gc/g1/g1ErgoVerbose.hpp"
  35 #include "gc/g1/g1Log.hpp"
  36 #include "gc/g1/g1OopClosures.inline.hpp"
  37 #include "gc/g1/g1RemSet.hpp"
  38 #include "gc/g1/g1StringDedup.hpp"
  39 #include "gc/g1/heapRegion.inline.hpp"
  40 #include "gc/g1/heapRegionManager.inline.hpp"
  41 #include "gc/g1/heapRegionRemSet.hpp"
  42 #include "gc/g1/heapRegionSet.inline.hpp"
  43 #include "gc/g1/suspendibleThreadSet.hpp"
  44 #include "gc/shared/gcId.hpp"
  45 #include "gc/shared/gcTimer.hpp"
  46 #include "gc/shared/gcTrace.hpp"
  47 #include "gc/shared/gcTraceTime.hpp"
  48 #include "gc/shared/genOopClosures.inline.hpp"
  49 #include "gc/shared/referencePolicy.hpp"
  50 #include "gc/shared/strongRootsScope.hpp"
  51 #include "gc/shared/taskqueue.inline.hpp"
  52 #include "gc/shared/vmGCOperations.hpp"
  53 #include "memory/allocation.hpp"
  54 #include "memory/resourceArea.hpp"
  55 #include "oops/oop.inline.hpp"
  56 #include "runtime/atomic.inline.hpp"
  57 #include "runtime/handles.inline.hpp"
  58 #include "runtime/java.hpp"
  59 #include "runtime/prefetch.inline.hpp"
  60 #include "services/memTracker.hpp"
  61 
  62 // Concurrent marking bit map wrapper
  63 
  64 CMBitMapRO::CMBitMapRO(int shifter) :
  65   _bm(),
  66   _shifter(shifter) {
  67   _bmStartWord = 0;
  68   _bmWordSize = 0;
  69 }
  70 
  71 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  72                                                const HeapWord* limit) const {
  73   // First we must round addr *up* to a possible object boundary.
  74   addr = (HeapWord*)align_size_up((intptr_t)addr,
  75                                   HeapWordSize << _shifter);
  76   size_t addrOffset = heapWordToOffset(addr);
  77   if (limit == NULL) {
  78     limit = _bmStartWord + _bmWordSize;
  79   }
  80   size_t limitOffset = heapWordToOffset(limit);
  81   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  82   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  83   assert(nextAddr >= addr, "get_next_one postcondition");
  84   assert(nextAddr == limit || isMarked(nextAddr),
  85          "get_next_one postcondition");
  86   return nextAddr;
  87 }
  88 
  89 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  90                                                  const HeapWord* limit) const {
  91   size_t addrOffset = heapWordToOffset(addr);
  92   if (limit == NULL) {
  93     limit = _bmStartWord + _bmWordSize;
  94   }
  95   size_t limitOffset = heapWordToOffset(limit);
  96   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  97   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  98   assert(nextAddr >= addr, "get_next_one postcondition");
  99   assert(nextAddr == limit || !isMarked(nextAddr),
 100          "get_next_one postcondition");
 101   return nextAddr;
 102 }
 103 
 104 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 105   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 106   return (int) (diff >> _shifter);
 107 }
 108 
 109 #ifndef PRODUCT
 110 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 111   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 112   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 113          "size inconsistency");
 114   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 115          _bmWordSize  == heap_rs.word_size();
 116 }
 117 #endif
 118 
 119 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 120   _bm.print_on_error(st, prefix);
 121 }
 122 
 123 size_t CMBitMap::compute_size(size_t heap_size) {
 124   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 125 }
 126 
 127 size_t CMBitMap::mark_distance() {
 128   return MinObjAlignmentInBytes * BitsPerByte;
 129 }
 130 
 131 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 132   _bmStartWord = heap.start();
 133   _bmWordSize = heap.word_size();
 134 
 135   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 136   _bm.set_size(_bmWordSize >> _shifter);
 137 
 138   storage->set_mapping_changed_listener(&_listener);
 139 }
 140 
 141 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 142   if (zero_filled) {
 143     return;
 144   }
 145   // We need to clear the bitmap on commit, removing any existing information.
 146   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 147   _bm->clearRange(mr);
 148 }
 149 
 150 // Closure used for clearing the given mark bitmap.
 151 class ClearBitmapHRClosure : public HeapRegionClosure {
 152  private:
 153   ConcurrentMark* _cm;
 154   CMBitMap* _bitmap;
 155   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 156  public:
 157   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 158     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 159   }
 160 
 161   virtual bool doHeapRegion(HeapRegion* r) {
 162     size_t const chunk_size_in_words = M / HeapWordSize;
 163 
 164     HeapWord* cur = r->bottom();
 165     HeapWord* const end = r->end();
 166 
 167     while (cur < end) {
 168       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 169       _bitmap->clearRange(mr);
 170 
 171       cur += chunk_size_in_words;
 172 
 173       // Abort iteration if after yielding the marking has been aborted.
 174       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 175         return true;
 176       }
 177       // Repeat the asserts from before the start of the closure. We will do them
 178       // as asserts here to minimize their overhead on the product. However, we
 179       // will have them as guarantees at the beginning / end of the bitmap
 180       // clearing to get some checking in the product.
 181       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 182       assert(!_may_yield || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 183     }
 184 
 185     return false;
 186   }
 187 };
 188 
 189 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 190   ClearBitmapHRClosure* _cl;
 191   HeapRegionClaimer     _hrclaimer;
 192   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 193 
 194 public:
 195   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) :
 196       _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task"), _hrclaimer(n_workers) {}
 197 
 198   void work(uint worker_id) {
 199     SuspendibleThreadSetJoiner sts_join(_suspendible);
 200     G1CollectedHeap::heap()->heap_region_par_iterate(_cl, worker_id, &_hrclaimer, true);
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   uint n_workers = g1h->workers()->active_workers();
 208   ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 209   g1h->workers()->run_task(&task);
 210   guarantee(cl.complete(), "Must have completed iteration.");
 211   return;
 212 }
 213 
 214 void CMBitMap::markRange(MemRegion mr) {
 215   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 216   assert(!mr.is_empty(), "unexpected empty region");
 217   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 218           ((HeapWord *) mr.end())),
 219          "markRange memory region end is not card aligned");
 220   // convert address range into offset range
 221   _bm.at_put_range(heapWordToOffset(mr.start()),
 222                    heapWordToOffset(mr.end()), true);
 223 }
 224 
 225 void CMBitMap::clearRange(MemRegion mr) {
 226   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 227   assert(!mr.is_empty(), "unexpected empty region");
 228   // convert address range into offset range
 229   _bm.at_put_range(heapWordToOffset(mr.start()),
 230                    heapWordToOffset(mr.end()), false);
 231 }
 232 
 233 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 234                                             HeapWord* end_addr) {
 235   HeapWord* start = getNextMarkedWordAddress(addr);
 236   start = MIN2(start, end_addr);
 237   HeapWord* end   = getNextUnmarkedWordAddress(start);
 238   end = MIN2(end, end_addr);
 239   assert(start <= end, "Consistency check");
 240   MemRegion mr(start, end);
 241   if (!mr.is_empty()) {
 242     clearRange(mr);
 243   }
 244   return mr;
 245 }
 246 
 247 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 248   _base(NULL), _cm(cm)
 249 {}
 250 
 251 bool CMMarkStack::allocate(size_t capacity) {
 252   // allocate a stack of the requisite depth
 253   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 254   if (!rs.is_reserved()) {
 255     warning("ConcurrentMark MarkStack allocation failure");
 256     return false;
 257   }
 258   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 259   if (!_virtual_space.initialize(rs, rs.size())) {
 260     warning("ConcurrentMark MarkStack backing store failure");
 261     // Release the virtual memory reserved for the marking stack
 262     rs.release();
 263     return false;
 264   }
 265   assert(_virtual_space.committed_size() == rs.size(),
 266          "Didn't reserve backing store for all of ConcurrentMark stack?");
 267   _base = (oop*) _virtual_space.low();
 268   setEmpty();
 269   _capacity = (jint) capacity;
 270   _saved_index = -1;
 271   _should_expand = false;
 272   return true;
 273 }
 274 
 275 void CMMarkStack::expand() {
 276   // Called, during remark, if we've overflown the marking stack during marking.
 277   assert(isEmpty(), "stack should been emptied while handling overflow");
 278   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 279   // Clear expansion flag
 280   _should_expand = false;
 281   if (_capacity == (jint) MarkStackSizeMax) {
 282     if (PrintGCDetails && Verbose) {
 283       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 284     }
 285     return;
 286   }
 287   // Double capacity if possible
 288   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 289   // Do not give up existing stack until we have managed to
 290   // get the double capacity that we desired.
 291   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 292                                                            sizeof(oop)));
 293   if (rs.is_reserved()) {
 294     // Release the backing store associated with old stack
 295     _virtual_space.release();
 296     // Reinitialize virtual space for new stack
 297     if (!_virtual_space.initialize(rs, rs.size())) {
 298       fatal("Not enough swap for expanded marking stack capacity");
 299     }
 300     _base = (oop*)(_virtual_space.low());
 301     _index = 0;
 302     _capacity = new_capacity;
 303   } else {
 304     if (PrintGCDetails && Verbose) {
 305       // Failed to double capacity, continue;
 306       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 307                           SIZE_FORMAT "K to " SIZE_FORMAT "K",
 308                           _capacity / K, new_capacity / K);
 309     }
 310   }
 311 }
 312 
 313 void CMMarkStack::set_should_expand() {
 314   // If we're resetting the marking state because of an
 315   // marking stack overflow, record that we should, if
 316   // possible, expand the stack.
 317   _should_expand = _cm->has_overflown();
 318 }
 319 
 320 CMMarkStack::~CMMarkStack() {
 321   if (_base != NULL) {
 322     _base = NULL;
 323     _virtual_space.release();
 324   }
 325 }
 326 
 327 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 328   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 329   jint start = _index;
 330   jint next_index = start + n;
 331   if (next_index > _capacity) {
 332     _overflow = true;
 333     return;
 334   }
 335   // Otherwise.
 336   _index = next_index;
 337   for (int i = 0; i < n; i++) {
 338     int ind = start + i;
 339     assert(ind < _capacity, "By overflow test above.");
 340     _base[ind] = ptr_arr[i];
 341   }
 342 }
 343 
 344 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 345   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 346   jint index = _index;
 347   if (index == 0) {
 348     *n = 0;
 349     return false;
 350   } else {
 351     int k = MIN2(max, index);
 352     jint  new_ind = index - k;
 353     for (int j = 0; j < k; j++) {
 354       ptr_arr[j] = _base[new_ind + j];
 355     }
 356     _index = new_ind;
 357     *n = k;
 358     return true;
 359   }
 360 }
 361 
 362 void CMMarkStack::note_start_of_gc() {
 363   assert(_saved_index == -1,
 364          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 365   _saved_index = _index;
 366 }
 367 
 368 void CMMarkStack::note_end_of_gc() {
 369   // This is intentionally a guarantee, instead of an assert. If we
 370   // accidentally add something to the mark stack during GC, it
 371   // will be a correctness issue so it's better if we crash. we'll
 372   // only check this once per GC anyway, so it won't be a performance
 373   // issue in any way.
 374   guarantee(_saved_index == _index,
 375             "saved index: %d index: %d", _saved_index, _index);
 376   _saved_index = -1;
 377 }
 378 
 379 CMRootRegions::CMRootRegions() :
 380   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 381   _should_abort(false),  _next_survivor(NULL) { }
 382 
 383 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 384   _young_list = g1h->young_list();
 385   _cm = cm;
 386 }
 387 
 388 void CMRootRegions::prepare_for_scan() {
 389   assert(!scan_in_progress(), "pre-condition");
 390 
 391   // Currently, only survivors can be root regions.
 392   assert(_next_survivor == NULL, "pre-condition");
 393   _next_survivor = _young_list->first_survivor_region();
 394   _scan_in_progress = (_next_survivor != NULL);
 395   _should_abort = false;
 396 }
 397 
 398 HeapRegion* CMRootRegions::claim_next() {
 399   if (_should_abort) {
 400     // If someone has set the should_abort flag, we return NULL to
 401     // force the caller to bail out of their loop.
 402     return NULL;
 403   }
 404 
 405   // Currently, only survivors can be root regions.
 406   HeapRegion* res = _next_survivor;
 407   if (res != NULL) {
 408     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 409     // Read it again in case it changed while we were waiting for the lock.
 410     res = _next_survivor;
 411     if (res != NULL) {
 412       if (res == _young_list->last_survivor_region()) {
 413         // We just claimed the last survivor so store NULL to indicate
 414         // that we're done.
 415         _next_survivor = NULL;
 416       } else {
 417         _next_survivor = res->get_next_young_region();
 418       }
 419     } else {
 420       // Someone else claimed the last survivor while we were trying
 421       // to take the lock so nothing else to do.
 422     }
 423   }
 424   assert(res == NULL || res->is_survivor(), "post-condition");
 425 
 426   return res;
 427 }
 428 
 429 void CMRootRegions::scan_finished() {
 430   assert(scan_in_progress(), "pre-condition");
 431 
 432   // Currently, only survivors can be root regions.
 433   if (!_should_abort) {
 434     assert(_next_survivor == NULL, "we should have claimed all survivors");
 435   }
 436   _next_survivor = NULL;
 437 
 438   {
 439     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 440     _scan_in_progress = false;
 441     RootRegionScan_lock->notify_all();
 442   }
 443 }
 444 
 445 bool CMRootRegions::wait_until_scan_finished() {
 446   if (!scan_in_progress()) return false;
 447 
 448   {
 449     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 450     while (scan_in_progress()) {
 451       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 452     }
 453   }
 454   return true;
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 462   return MAX2((n_par_threads + 2) / 4, 1U);
 463 }
 464 
 465 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 466   _g1h(g1h),
 467   _markBitMap1(),
 468   _markBitMap2(),
 469   _parallel_marking_threads(0),
 470   _max_parallel_marking_threads(0),
 471   _sleep_factor(0.0),
 472   _marking_task_overhead(1.0),
 473   _cleanup_sleep_factor(0.0),
 474   _cleanup_task_overhead(1.0),
 475   _cleanup_list("Cleanup List"),
 476   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 477   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 478             CardTableModRefBS::card_shift,
 479             false /* in_resource_area*/),
 480 
 481   _prevMarkBitMap(&_markBitMap1),
 482   _nextMarkBitMap(&_markBitMap2),
 483 
 484   _markStack(this),
 485   // _finger set in set_non_marking_state
 486 
 487   _max_worker_id(ParallelGCThreads),
 488   // _active_tasks set in set_non_marking_state
 489   // _tasks set inside the constructor
 490   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 491   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 492 
 493   _has_overflown(false),
 494   _concurrent(false),
 495   _has_aborted(false),
 496   _restart_for_overflow(false),
 497   _concurrent_marking_in_progress(false),
 498 
 499   // _verbose_level set below
 500 
 501   _init_times(),
 502   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 503   _cleanup_times(),
 504   _total_counting_time(0.0),
 505   _total_rs_scrub_time(0.0),
 506 
 507   _parallel_workers(NULL),
 508 
 509   _count_card_bitmaps(NULL),
 510   _count_marked_bytes(NULL),
 511   _completed_initialization(false) {
 512   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 513   if (verbose_level < no_verbose) {
 514     verbose_level = no_verbose;
 515   }
 516   if (verbose_level > high_verbose) {
 517     verbose_level = high_verbose;
 518   }
 519   _verbose_level = verbose_level;
 520 
 521   if (verbose_low()) {
 522     gclog_or_tty->print_cr("[global] init, heap start = " PTR_FORMAT ", "
 523                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 524   }
 525 
 526   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 527   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 528 
 529   // Create & start a ConcurrentMark thread.
 530   _cmThread = new ConcurrentMarkThread(this);
 531   assert(cmThread() != NULL, "CM Thread should have been created");
 532   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 533   if (_cmThread->osthread() == NULL) {
 534       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 535   }
 536 
 537   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 538   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 539   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 540 
 541   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 542   satb_qs.set_buffer_size(G1SATBBufferSize);
 543 
 544   _root_regions.init(_g1h, this);
 545 
 546   if (ConcGCThreads > ParallelGCThreads) {
 547     warning("Can't have more ConcGCThreads (%u) "
 548             "than ParallelGCThreads (%u).",
 549             ConcGCThreads, ParallelGCThreads);
 550     return;
 551   }
 552   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 553     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 554     // if both are set
 555     _sleep_factor             = 0.0;
 556     _marking_task_overhead    = 1.0;
 557   } else if (G1MarkingOverheadPercent > 0) {
 558     // We will calculate the number of parallel marking threads based
 559     // on a target overhead with respect to the soft real-time goal
 560     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 561     double overall_cm_overhead =
 562       (double) MaxGCPauseMillis * marking_overhead /
 563       (double) GCPauseIntervalMillis;
 564     double cpu_ratio = 1.0 / (double) os::processor_count();
 565     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 566     double marking_task_overhead =
 567       overall_cm_overhead / marking_thread_num *
 568                                               (double) os::processor_count();
 569     double sleep_factor =
 570                        (1.0 - marking_task_overhead) / marking_task_overhead;
 571 
 572     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 573     _sleep_factor             = sleep_factor;
 574     _marking_task_overhead    = marking_task_overhead;
 575   } else {
 576     // Calculate the number of parallel marking threads by scaling
 577     // the number of parallel GC threads.
 578     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 579     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 580     _sleep_factor             = 0.0;
 581     _marking_task_overhead    = 1.0;
 582   }
 583 
 584   assert(ConcGCThreads > 0, "Should have been set");
 585   _parallel_marking_threads = ConcGCThreads;
 586   _max_parallel_marking_threads = _parallel_marking_threads;
 587 
 588   if (parallel_marking_threads() > 1) {
 589     _cleanup_task_overhead = 1.0;
 590   } else {
 591     _cleanup_task_overhead = marking_task_overhead();
 592   }
 593   _cleanup_sleep_factor =
 594                    (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 595 
 596 #if 0
 597   gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 598   gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 599   gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 600   gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 601   gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 602 #endif
 603 
 604   _parallel_workers = new WorkGang("G1 Marker",
 605        _max_parallel_marking_threads, false, true);
 606   if (_parallel_workers == NULL) {
 607     vm_exit_during_initialization("Failed necessary allocation.");
 608   } else {
 609     _parallel_workers->initialize_workers();
 610   }
 611 
 612   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 613     size_t mark_stack_size =
 614       MIN2(MarkStackSizeMax,
 615           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 616     // Verify that the calculated value for MarkStackSize is in range.
 617     // It would be nice to use the private utility routine from Arguments.
 618     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 619       warning("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 620               "must be between 1 and " SIZE_FORMAT,
 621               mark_stack_size, MarkStackSizeMax);
 622       return;
 623     }
 624     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 625   } else {
 626     // Verify MarkStackSize is in range.
 627     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 628       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 629         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 630           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 631                   "must be between 1 and " SIZE_FORMAT,
 632                   MarkStackSize, MarkStackSizeMax);
 633           return;
 634         }
 635       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 636         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 637           warning("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 638                   " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 639                   MarkStackSize, MarkStackSizeMax);
 640           return;
 641         }
 642       }
 643     }
 644   }
 645 
 646   if (!_markStack.allocate(MarkStackSize)) {
 647     warning("Failed to allocate CM marking stack");
 648     return;
 649   }
 650 
 651   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 652   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 653 
 654   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 655   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 656 
 657   BitMap::idx_t card_bm_size = _card_bm.size();
 658 
 659   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 660   _active_tasks = _max_worker_id;
 661 
 662   uint max_regions = _g1h->max_regions();
 663   for (uint i = 0; i < _max_worker_id; ++i) {
 664     CMTaskQueue* task_queue = new CMTaskQueue();
 665     task_queue->initialize();
 666     _task_queues->register_queue(i, task_queue);
 667 
 668     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 669     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 670 
 671     _tasks[i] = new CMTask(i, this,
 672                            _count_marked_bytes[i],
 673                            &_count_card_bitmaps[i],
 674                            task_queue, _task_queues);
 675 
 676     _accum_task_vtime[i] = 0.0;
 677   }
 678 
 679   // Calculate the card number for the bottom of the heap. Used
 680   // in biasing indexes into the accounting card bitmaps.
 681   _heap_bottom_card_num =
 682     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 683                                 CardTableModRefBS::card_shift);
 684 
 685   // Clear all the liveness counting data
 686   clear_all_count_data();
 687 
 688   // so that the call below can read a sensible value
 689   _heap_start = g1h->reserved_region().start();
 690   set_non_marking_state();
 691   _completed_initialization = true;
 692 }
 693 
 694 void ConcurrentMark::reset() {
 695   // Starting values for these two. This should be called in a STW
 696   // phase.
 697   MemRegion reserved = _g1h->g1_reserved();
 698   _heap_start = reserved.start();
 699   _heap_end   = reserved.end();
 700 
 701   // Separated the asserts so that we know which one fires.
 702   assert(_heap_start != NULL, "heap bounds should look ok");
 703   assert(_heap_end != NULL, "heap bounds should look ok");
 704   assert(_heap_start < _heap_end, "heap bounds should look ok");
 705 
 706   // Reset all the marking data structures and any necessary flags
 707   reset_marking_state();
 708 
 709   if (verbose_low()) {
 710     gclog_or_tty->print_cr("[global] resetting");
 711   }
 712 
 713   // We do reset all of them, since different phases will use
 714   // different number of active threads. So, it's easiest to have all
 715   // of them ready.
 716   for (uint i = 0; i < _max_worker_id; ++i) {
 717     _tasks[i]->reset(_nextMarkBitMap);
 718   }
 719 
 720   // we need this to make sure that the flag is on during the evac
 721   // pause with initial mark piggy-backed
 722   set_concurrent_marking_in_progress();
 723 }
 724 
 725 
 726 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 727   _markStack.set_should_expand();
 728   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 729   if (clear_overflow) {
 730     clear_has_overflown();
 731   } else {
 732     assert(has_overflown(), "pre-condition");
 733   }
 734   _finger = _heap_start;
 735 
 736   for (uint i = 0; i < _max_worker_id; ++i) {
 737     CMTaskQueue* queue = _task_queues->queue(i);
 738     queue->set_empty();
 739   }
 740 }
 741 
 742 void ConcurrentMark::set_concurrency(uint active_tasks) {
 743   assert(active_tasks <= _max_worker_id, "we should not have more");
 744 
 745   _active_tasks = active_tasks;
 746   // Need to update the three data structures below according to the
 747   // number of active threads for this phase.
 748   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 749   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 750   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 751 }
 752 
 753 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 754   set_concurrency(active_tasks);
 755 
 756   _concurrent = concurrent;
 757   // We propagate this to all tasks, not just the active ones.
 758   for (uint i = 0; i < _max_worker_id; ++i)
 759     _tasks[i]->set_concurrent(concurrent);
 760 
 761   if (concurrent) {
 762     set_concurrent_marking_in_progress();
 763   } else {
 764     // We currently assume that the concurrent flag has been set to
 765     // false before we start remark. At this point we should also be
 766     // in a STW phase.
 767     assert(!concurrent_marking_in_progress(), "invariant");
 768     assert(out_of_regions(),
 769            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 770            p2i(_finger), p2i(_heap_end));
 771   }
 772 }
 773 
 774 void ConcurrentMark::set_non_marking_state() {
 775   // We set the global marking state to some default values when we're
 776   // not doing marking.
 777   reset_marking_state();
 778   _active_tasks = 0;
 779   clear_concurrent_marking_in_progress();
 780 }
 781 
 782 ConcurrentMark::~ConcurrentMark() {
 783   // The ConcurrentMark instance is never freed.
 784   ShouldNotReachHere();
 785 }
 786 
 787 void ConcurrentMark::clearNextBitmap() {
 788   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 789 
 790   // Make sure that the concurrent mark thread looks to still be in
 791   // the current cycle.
 792   guarantee(cmThread()->during_cycle(), "invariant");
 793 
 794   // We are finishing up the current cycle by clearing the next
 795   // marking bitmap and getting it ready for the next cycle. During
 796   // this time no other cycle can start. So, let's make sure that this
 797   // is the case.
 798   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 799 
 800   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 801   ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 802   _parallel_workers->run_task(&task);
 803 
 804   // Clear the liveness counting data. If the marking has been aborted, the abort()
 805   // call already did that.
 806   if (cl.complete()) {
 807     clear_all_count_data();
 808   }
 809 
 810   // Repeat the asserts from above.
 811   guarantee(cmThread()->during_cycle(), "invariant");
 812   guarantee(!g1h->collector_state()->mark_in_progress(), "invariant");
 813 }
 814 
 815 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 816   CMBitMap* _bitmap;
 817   bool _error;
 818  public:
 819   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 820   }
 821 
 822   virtual bool doHeapRegion(HeapRegion* r) {
 823     // This closure can be called concurrently to the mutator, so we must make sure
 824     // that the result of the getNextMarkedWordAddress() call is compared to the
 825     // value passed to it as limit to detect any found bits.
 826     HeapWord* end = r->end();
 827     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 828   }
 829 };
 830 
 831 bool ConcurrentMark::nextMarkBitmapIsClear() {
 832   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 833   _g1h->heap_region_iterate(&cl);
 834   return cl.complete();
 835 }
 836 
 837 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 838 public:
 839   bool doHeapRegion(HeapRegion* r) {
 840     r->note_start_of_marking();
 841     return false;
 842   }
 843 };
 844 
 845 void ConcurrentMark::checkpointRootsInitialPre() {
 846   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 847   G1CollectorPolicy* g1p = g1h->g1_policy();
 848 
 849   _has_aborted = false;
 850 
 851   // Initialize marking structures. This has to be done in a STW phase.
 852   reset();
 853 
 854   // For each region note start of marking.
 855   NoteStartOfMarkHRClosure startcl;
 856   g1h->heap_region_iterate(&startcl);
 857 }
 858 
 859 
 860 void ConcurrentMark::checkpointRootsInitialPost() {
 861   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 862 
 863   // If we force an overflow during remark, the remark operation will
 864   // actually abort and we'll restart concurrent marking. If we always
 865   // force an overflow during remark we'll never actually complete the
 866   // marking phase. So, we initialize this here, at the start of the
 867   // cycle, so that at the remaining overflow number will decrease at
 868   // every remark and we'll eventually not need to cause one.
 869   force_overflow_stw()->init();
 870 
 871   // Start Concurrent Marking weak-reference discovery.
 872   ReferenceProcessor* rp = g1h->ref_processor_cm();
 873   // enable ("weak") refs discovery
 874   rp->enable_discovery();
 875   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 876 
 877   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 878   // This is the start of  the marking cycle, we're expected all
 879   // threads to have SATB queues with active set to false.
 880   satb_mq_set.set_active_all_threads(true, /* new active value */
 881                                      false /* expected_active */);
 882 
 883   _root_regions.prepare_for_scan();
 884 
 885   // update_g1_committed() will be called at the end of an evac pause
 886   // when marking is on. So, it's also called at the end of the
 887   // initial-mark pause to update the heap end, if the heap expands
 888   // during it. No need to call it here.
 889 }
 890 
 891 /*
 892  * Notice that in the next two methods, we actually leave the STS
 893  * during the barrier sync and join it immediately afterwards. If we
 894  * do not do this, the following deadlock can occur: one thread could
 895  * be in the barrier sync code, waiting for the other thread to also
 896  * sync up, whereas another one could be trying to yield, while also
 897  * waiting for the other threads to sync up too.
 898  *
 899  * Note, however, that this code is also used during remark and in
 900  * this case we should not attempt to leave / enter the STS, otherwise
 901  * we'll either hit an assert (debug / fastdebug) or deadlock
 902  * (product). So we should only leave / enter the STS if we are
 903  * operating concurrently.
 904  *
 905  * Because the thread that does the sync barrier has left the STS, it
 906  * is possible to be suspended for a Full GC or an evacuation pause
 907  * could occur. This is actually safe, since the entering the sync
 908  * barrier is one of the last things do_marking_step() does, and it
 909  * doesn't manipulate any data structures afterwards.
 910  */
 911 
 912 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 913   bool barrier_aborted;
 914 
 915   if (verbose_low()) {
 916     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 917   }
 918 
 919   {
 920     SuspendibleThreadSetLeaver sts_leave(concurrent());
 921     barrier_aborted = !_first_overflow_barrier_sync.enter();
 922   }
 923 
 924   // at this point everyone should have synced up and not be doing any
 925   // more work
 926 
 927   if (verbose_low()) {
 928     if (barrier_aborted) {
 929       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
 930     } else {
 931       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
 932     }
 933   }
 934 
 935   if (barrier_aborted) {
 936     // If the barrier aborted we ignore the overflow condition and
 937     // just abort the whole marking phase as quickly as possible.
 938     return;
 939   }
 940 
 941   // If we're executing the concurrent phase of marking, reset the marking
 942   // state; otherwise the marking state is reset after reference processing,
 943   // during the remark pause.
 944   // If we reset here as a result of an overflow during the remark we will
 945   // see assertion failures from any subsequent set_concurrency_and_phase()
 946   // calls.
 947   if (concurrent()) {
 948     // let the task associated with with worker 0 do this
 949     if (worker_id == 0) {
 950       // task 0 is responsible for clearing the global data structures
 951       // We should be here because of an overflow. During STW we should
 952       // not clear the overflow flag since we rely on it being true when
 953       // we exit this method to abort the pause and restart concurrent
 954       // marking.
 955       reset_marking_state(true /* clear_overflow */);
 956       force_overflow()->update();
 957 
 958       if (G1Log::fine()) {
 959         gclog_or_tty->gclog_stamp();
 960         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 961       }
 962     }
 963   }
 964 
 965   // after this, each task should reset its own data structures then
 966   // then go into the second barrier
 967 }
 968 
 969 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 970   bool barrier_aborted;
 971 
 972   if (verbose_low()) {
 973     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
 974   }
 975 
 976   {
 977     SuspendibleThreadSetLeaver sts_leave(concurrent());
 978     barrier_aborted = !_second_overflow_barrier_sync.enter();
 979   }
 980 
 981   // at this point everything should be re-initialized and ready to go
 982 
 983   if (verbose_low()) {
 984     if (barrier_aborted) {
 985       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
 986     } else {
 987       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
 988     }
 989   }
 990 }
 991 
 992 #ifndef PRODUCT
 993 void ForceOverflowSettings::init() {
 994   _num_remaining = G1ConcMarkForceOverflow;
 995   _force = false;
 996   update();
 997 }
 998 
 999 void ForceOverflowSettings::update() {
1000   if (_num_remaining > 0) {
1001     _num_remaining -= 1;
1002     _force = true;
1003   } else {
1004     _force = false;
1005   }
1006 }
1007 
1008 bool ForceOverflowSettings::should_force() {
1009   if (_force) {
1010     _force = false;
1011     return true;
1012   } else {
1013     return false;
1014   }
1015 }
1016 #endif // !PRODUCT
1017 
1018 class CMConcurrentMarkingTask: public AbstractGangTask {
1019 private:
1020   ConcurrentMark*       _cm;
1021   ConcurrentMarkThread* _cmt;
1022 
1023 public:
1024   void work(uint worker_id) {
1025     assert(Thread::current()->is_ConcurrentGC_thread(),
1026            "this should only be done by a conc GC thread");
1027     ResourceMark rm;
1028 
1029     double start_vtime = os::elapsedVTime();
1030 
1031     {
1032       SuspendibleThreadSetJoiner sts_join;
1033 
1034       assert(worker_id < _cm->active_tasks(), "invariant");
1035       CMTask* the_task = _cm->task(worker_id);
1036       the_task->record_start_time();
1037       if (!_cm->has_aborted()) {
1038         do {
1039           double start_vtime_sec = os::elapsedVTime();
1040           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1041 
1042           the_task->do_marking_step(mark_step_duration_ms,
1043                                     true  /* do_termination */,
1044                                     false /* is_serial*/);
1045 
1046           double end_vtime_sec = os::elapsedVTime();
1047           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1048           _cm->clear_has_overflown();
1049 
1050           _cm->do_yield_check(worker_id);
1051 
1052           jlong sleep_time_ms;
1053           if (!_cm->has_aborted() && the_task->has_aborted()) {
1054             sleep_time_ms =
1055               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1056             {
1057               SuspendibleThreadSetLeaver sts_leave;
1058               os::sleep(Thread::current(), sleep_time_ms, false);
1059             }
1060           }
1061         } while (!_cm->has_aborted() && the_task->has_aborted());
1062       }
1063       the_task->record_end_time();
1064       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1065     }
1066 
1067     double end_vtime = os::elapsedVTime();
1068     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1069   }
1070 
1071   CMConcurrentMarkingTask(ConcurrentMark* cm,
1072                           ConcurrentMarkThread* cmt) :
1073       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1074 
1075   ~CMConcurrentMarkingTask() { }
1076 };
1077 
1078 // Calculates the number of active workers for a concurrent
1079 // phase.
1080 uint ConcurrentMark::calc_parallel_marking_threads() {
1081   uint n_conc_workers = 0;
1082   if (!UseDynamicNumberOfGCThreads ||
1083       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1084        !ForceDynamicNumberOfGCThreads)) {
1085     n_conc_workers = max_parallel_marking_threads();
1086   } else {
1087     n_conc_workers =
1088       AdaptiveSizePolicy::calc_default_active_workers(
1089                                    max_parallel_marking_threads(),
1090                                    1, /* Minimum workers */
1091                                    parallel_marking_threads(),
1092                                    Threads::number_of_non_daemon_threads());
1093     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1094     // that scaling has already gone into "_max_parallel_marking_threads".
1095   }
1096   assert(n_conc_workers > 0, "Always need at least 1");
1097   return n_conc_workers;
1098 }
1099 
1100 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1101   // Currently, only survivors can be root regions.
1102   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1103   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1104 
1105   const uintx interval = PrefetchScanIntervalInBytes;
1106   HeapWord* curr = hr->bottom();
1107   const HeapWord* end = hr->top();
1108   while (curr < end) {
1109     Prefetch::read(curr, interval);
1110     oop obj = oop(curr);
1111     int size = obj->oop_iterate_size(&cl);
1112     assert(size == obj->size(), "sanity");
1113     curr += size;
1114   }
1115 }
1116 
1117 class CMRootRegionScanTask : public AbstractGangTask {
1118 private:
1119   ConcurrentMark* _cm;
1120 
1121 public:
1122   CMRootRegionScanTask(ConcurrentMark* cm) :
1123     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1124 
1125   void work(uint worker_id) {
1126     assert(Thread::current()->is_ConcurrentGC_thread(),
1127            "this should only be done by a conc GC thread");
1128 
1129     CMRootRegions* root_regions = _cm->root_regions();
1130     HeapRegion* hr = root_regions->claim_next();
1131     while (hr != NULL) {
1132       _cm->scanRootRegion(hr, worker_id);
1133       hr = root_regions->claim_next();
1134     }
1135   }
1136 };
1137 
1138 void ConcurrentMark::scanRootRegions() {
1139   double scan_start = os::elapsedTime();
1140 
1141   // Start of concurrent marking.
1142   ClassLoaderDataGraph::clear_claimed_marks();
1143 
1144   // scan_in_progress() will have been set to true only if there was
1145   // at least one root region to scan. So, if it's false, we
1146   // should not attempt to do any further work.
1147   if (root_regions()->scan_in_progress()) {
1148     if (G1Log::fine()) {
1149       gclog_or_tty->gclog_stamp();
1150       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-start]");
1151     }
1152 
1153     _parallel_marking_threads = calc_parallel_marking_threads();
1154     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1155            "Maximum number of marking threads exceeded");
1156     uint active_workers = MAX2(1U, parallel_marking_threads());
1157 
1158     CMRootRegionScanTask task(this);
1159     _parallel_workers->set_active_workers(active_workers);
1160     _parallel_workers->run_task(&task);
1161 
1162     if (G1Log::fine()) {
1163       gclog_or_tty->gclog_stamp();
1164       gclog_or_tty->print_cr("[GC concurrent-root-region-scan-end, %1.7lf secs]", os::elapsedTime() - scan_start);
1165     }
1166 
1167     // It's possible that has_aborted() is true here without actually
1168     // aborting the survivor scan earlier. This is OK as it's
1169     // mainly used for sanity checking.
1170     root_regions()->scan_finished();
1171   }
1172 }
1173 
1174 void ConcurrentMark::markFromRoots() {
1175   // we might be tempted to assert that:
1176   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1177   //        "inconsistent argument?");
1178   // However that wouldn't be right, because it's possible that
1179   // a safepoint is indeed in progress as a younger generation
1180   // stop-the-world GC happens even as we mark in this generation.
1181 
1182   _restart_for_overflow = false;
1183   force_overflow_conc()->init();
1184 
1185   // _g1h has _n_par_threads
1186   _parallel_marking_threads = calc_parallel_marking_threads();
1187   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1188     "Maximum number of marking threads exceeded");
1189 
1190   uint active_workers = MAX2(1U, parallel_marking_threads());
1191   assert(active_workers > 0, "Should have been set");
1192 
1193   // Parallel task terminator is set in "set_concurrency_and_phase()"
1194   set_concurrency_and_phase(active_workers, true /* concurrent */);
1195 
1196   CMConcurrentMarkingTask markingTask(this, cmThread());
1197   _parallel_workers->set_active_workers(active_workers);
1198   _parallel_workers->run_task(&markingTask);
1199   print_stats();
1200 }
1201 
1202 // Helper class to get rid of some boilerplate code.
1203 class G1CMTraceTime : public StackObj {
1204   GCTraceTimeImpl _gc_trace_time;
1205   static bool doit_and_prepend(bool doit) {
1206     if (doit) {
1207       gclog_or_tty->put(' ');
1208     }
1209     return doit;
1210   }
1211 
1212  public:
1213   G1CMTraceTime(const char* title, bool doit)
1214     : _gc_trace_time(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm()) {
1215   }
1216 };
1217 
1218 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1219   // world is stopped at this checkpoint
1220   assert(SafepointSynchronize::is_at_safepoint(),
1221          "world should be stopped");
1222 
1223   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1224 
1225   // If a full collection has happened, we shouldn't do this.
1226   if (has_aborted()) {
1227     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1228     return;
1229   }
1230 
1231   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1232 
1233   if (VerifyDuringGC) {
1234     HandleMark hm;  // handle scope
1235     g1h->prepare_for_verify();
1236     Universe::verify(VerifyOption_G1UsePrevMarking,
1237                      " VerifyDuringGC:(before)");
1238   }
1239   g1h->check_bitmaps("Remark Start");
1240 
1241   G1CollectorPolicy* g1p = g1h->g1_policy();
1242   g1p->record_concurrent_mark_remark_start();
1243 
1244   double start = os::elapsedTime();
1245 
1246   checkpointRootsFinalWork();
1247 
1248   double mark_work_end = os::elapsedTime();
1249 
1250   weakRefsWork(clear_all_soft_refs);
1251 
1252   if (has_overflown()) {
1253     // Oops.  We overflowed.  Restart concurrent marking.
1254     _restart_for_overflow = true;
1255     if (G1TraceMarkStackOverflow) {
1256       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1257     }
1258 
1259     // Verify the heap w.r.t. the previous marking bitmap.
1260     if (VerifyDuringGC) {
1261       HandleMark hm;  // handle scope
1262       g1h->prepare_for_verify();
1263       Universe::verify(VerifyOption_G1UsePrevMarking,
1264                        " VerifyDuringGC:(overflow)");
1265     }
1266 
1267     // Clear the marking state because we will be restarting
1268     // marking due to overflowing the global mark stack.
1269     reset_marking_state();
1270   } else {
1271     {
1272       G1CMTraceTime trace("GC aggregate-data", G1Log::finer());
1273 
1274       // Aggregate the per-task counting data that we have accumulated
1275       // while marking.
1276       aggregate_count_data();
1277     }
1278 
1279     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1280     // We're done with marking.
1281     // This is the end of  the marking cycle, we're expected all
1282     // threads to have SATB queues with active set to true.
1283     satb_mq_set.set_active_all_threads(false, /* new active value */
1284                                        true /* expected_active */);
1285 
1286     if (VerifyDuringGC) {
1287       HandleMark hm;  // handle scope
1288       g1h->prepare_for_verify();
1289       Universe::verify(VerifyOption_G1UseNextMarking,
1290                        " VerifyDuringGC:(after)");
1291     }
1292     g1h->check_bitmaps("Remark End");
1293     assert(!restart_for_overflow(), "sanity");
1294     // Completely reset the marking state since marking completed
1295     set_non_marking_state();
1296   }
1297 
1298   // Expand the marking stack, if we have to and if we can.
1299   if (_markStack.should_expand()) {
1300     _markStack.expand();
1301   }
1302 
1303   // Statistics
1304   double now = os::elapsedTime();
1305   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1306   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1307   _remark_times.add((now - start) * 1000.0);
1308 
1309   g1p->record_concurrent_mark_remark_end();
1310 
1311   G1CMIsAliveClosure is_alive(g1h);
1312   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1313 }
1314 
1315 // Base class of the closures that finalize and verify the
1316 // liveness counting data.
1317 class CMCountDataClosureBase: public HeapRegionClosure {
1318 protected:
1319   G1CollectedHeap* _g1h;
1320   ConcurrentMark* _cm;
1321   CardTableModRefBS* _ct_bs;
1322 
1323   BitMap* _region_bm;
1324   BitMap* _card_bm;
1325 
1326   // Takes a region that's not empty (i.e., it has at least one
1327   // live object in it and sets its corresponding bit on the region
1328   // bitmap to 1.
1329   void set_bit_for_region(HeapRegion* hr) {
1330     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1331     _region_bm->par_at_put(index, true);
1332   }
1333 
1334 public:
1335   CMCountDataClosureBase(G1CollectedHeap* g1h,
1336                          BitMap* region_bm, BitMap* card_bm):
1337     _g1h(g1h), _cm(g1h->concurrent_mark()),
1338     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
1339     _region_bm(region_bm), _card_bm(card_bm) { }
1340 };
1341 
1342 // Closure that calculates the # live objects per region. Used
1343 // for verification purposes during the cleanup pause.
1344 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1345   CMBitMapRO* _bm;
1346   size_t _region_marked_bytes;
1347 
1348 public:
1349   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1350                          BitMap* region_bm, BitMap* card_bm) :
1351     CMCountDataClosureBase(g1h, region_bm, card_bm),
1352     _bm(bm), _region_marked_bytes(0) { }
1353 
1354   bool doHeapRegion(HeapRegion* hr) {
1355     HeapWord* ntams = hr->next_top_at_mark_start();
1356     HeapWord* start = hr->bottom();
1357 
1358     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1359            "Preconditions not met - "
1360            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1361            p2i(start), p2i(ntams), p2i(hr->end()));
1362 
1363     // Find the first marked object at or after "start".
1364     start = _bm->getNextMarkedWordAddress(start, ntams);
1365 
1366     size_t marked_bytes = 0;
1367 
1368     while (start < ntams) {
1369       oop obj = oop(start);
1370       int obj_sz = obj->size();
1371       HeapWord* obj_end = start + obj_sz;
1372 
1373       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1374       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1375 
1376       // Note: if we're looking at the last region in heap - obj_end
1377       // could be actually just beyond the end of the heap; end_idx
1378       // will then correspond to a (non-existent) card that is also
1379       // just beyond the heap.
1380       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1381         // end of object is not card aligned - increment to cover
1382         // all the cards spanned by the object
1383         end_idx += 1;
1384       }
1385 
1386       // Set the bits in the card BM for the cards spanned by this object.
1387       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1388 
1389       // Add the size of this object to the number of marked bytes.
1390       marked_bytes += (size_t)obj_sz * HeapWordSize;
1391 
1392       if (obj_end > hr->end()) {
1393         break;
1394       }
1395       // Find the next marked object after this one.
1396       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1397     }
1398 
1399     // Mark the allocated-since-marking portion...
1400     HeapWord* top = hr->top();
1401     if (ntams < top) {
1402       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1403       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1404 
1405       // Note: if we're looking at the last region in heap - top
1406       // could be actually just beyond the end of the heap; end_idx
1407       // will then correspond to a (non-existent) card that is also
1408       // just beyond the heap.
1409       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1410         // end of object is not card aligned - increment to cover
1411         // all the cards spanned by the object
1412         end_idx += 1;
1413       }
1414       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1415 
1416       // This definitely means the region has live objects.
1417       set_bit_for_region(hr);
1418     }
1419 
1420     // Update the live region bitmap.
1421     if (marked_bytes > 0) {
1422       set_bit_for_region(hr);
1423     }
1424 
1425     // Set the marked bytes for the current region so that
1426     // it can be queried by a calling verification routine
1427     _region_marked_bytes = marked_bytes;
1428 
1429     return false;
1430   }
1431 
1432   size_t region_marked_bytes() const { return _region_marked_bytes; }
1433 };
1434 
1435 // Heap region closure used for verifying the counting data
1436 // that was accumulated concurrently and aggregated during
1437 // the remark pause. This closure is applied to the heap
1438 // regions during the STW cleanup pause.
1439 
1440 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1441   G1CollectedHeap* _g1h;
1442   ConcurrentMark* _cm;
1443   CalcLiveObjectsClosure _calc_cl;
1444   BitMap* _region_bm;   // Region BM to be verified
1445   BitMap* _card_bm;     // Card BM to be verified
1446   bool _verbose;        // verbose output?
1447 
1448   BitMap* _exp_region_bm; // Expected Region BM values
1449   BitMap* _exp_card_bm;   // Expected card BM values
1450 
1451   int _failures;
1452 
1453 public:
1454   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1455                                 BitMap* region_bm,
1456                                 BitMap* card_bm,
1457                                 BitMap* exp_region_bm,
1458                                 BitMap* exp_card_bm,
1459                                 bool verbose) :
1460     _g1h(g1h), _cm(g1h->concurrent_mark()),
1461     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1462     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1463     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1464     _failures(0) { }
1465 
1466   int failures() const { return _failures; }
1467 
1468   bool doHeapRegion(HeapRegion* hr) {
1469     int failures = 0;
1470 
1471     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1472     // this region and set the corresponding bits in the expected region
1473     // and card bitmaps.
1474     bool res = _calc_cl.doHeapRegion(hr);
1475     assert(res == false, "should be continuing");
1476 
1477     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1478                     Mutex::_no_safepoint_check_flag);
1479 
1480     // Verify the marked bytes for this region.
1481     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1482     size_t act_marked_bytes = hr->next_marked_bytes();
1483 
1484     // We're not OK if expected marked bytes > actual marked bytes. It means
1485     // we have missed accounting some objects during the actual marking.
1486     if (exp_marked_bytes > act_marked_bytes) {
1487       if (_verbose) {
1488         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1489                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1490                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1491       }
1492       failures += 1;
1493     }
1494 
1495     // Verify the bit, for this region, in the actual and expected
1496     // (which was just calculated) region bit maps.
1497     // We're not OK if the bit in the calculated expected region
1498     // bitmap is set and the bit in the actual region bitmap is not.
1499     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1500 
1501     bool expected = _exp_region_bm->at(index);
1502     bool actual = _region_bm->at(index);
1503     if (expected && !actual) {
1504       if (_verbose) {
1505         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1506                                "expected: %s, actual: %s",
1507                                hr->hrm_index(),
1508                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1509       }
1510       failures += 1;
1511     }
1512 
1513     // Verify that the card bit maps for the cards spanned by the current
1514     // region match. We have an error if we have a set bit in the expected
1515     // bit map and the corresponding bit in the actual bitmap is not set.
1516 
1517     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1518     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1519 
1520     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1521       expected = _exp_card_bm->at(i);
1522       actual = _card_bm->at(i);
1523 
1524       if (expected && !actual) {
1525         if (_verbose) {
1526           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1527                                  "expected: %s, actual: %s",
1528                                  hr->hrm_index(), i,
1529                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1530         }
1531         failures += 1;
1532       }
1533     }
1534 
1535     if (failures > 0 && _verbose)  {
1536       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1537                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1538                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1539                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1540     }
1541 
1542     _failures += failures;
1543 
1544     // We could stop iteration over the heap when we
1545     // find the first violating region by returning true.
1546     return false;
1547   }
1548 };
1549 
1550 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1551 protected:
1552   G1CollectedHeap* _g1h;
1553   ConcurrentMark* _cm;
1554   BitMap* _actual_region_bm;
1555   BitMap* _actual_card_bm;
1556 
1557   uint    _n_workers;
1558 
1559   BitMap* _expected_region_bm;
1560   BitMap* _expected_card_bm;
1561 
1562   int  _failures;
1563   bool _verbose;
1564 
1565   HeapRegionClaimer _hrclaimer;
1566 
1567 public:
1568   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1569                             BitMap* region_bm, BitMap* card_bm,
1570                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1571     : AbstractGangTask("G1 verify final counting"),
1572       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1573       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1574       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1575       _failures(0), _verbose(false),
1576       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1577     assert(VerifyDuringGC, "don't call this otherwise");
1578     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1579     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1580 
1581     _verbose = _cm->verbose_medium();
1582   }
1583 
1584   void work(uint worker_id) {
1585     assert(worker_id < _n_workers, "invariant");
1586 
1587     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1588                                             _actual_region_bm, _actual_card_bm,
1589                                             _expected_region_bm,
1590                                             _expected_card_bm,
1591                                             _verbose);
1592 
1593     _g1h->heap_region_par_iterate(&verify_cl, worker_id, &_hrclaimer);
1594 
1595     Atomic::add(verify_cl.failures(), &_failures);
1596   }
1597 
1598   int failures() const { return _failures; }
1599 };
1600 
1601 // Closure that finalizes the liveness counting data.
1602 // Used during the cleanup pause.
1603 // Sets the bits corresponding to the interval [NTAMS, top]
1604 // (which contains the implicitly live objects) in the
1605 // card liveness bitmap. Also sets the bit for each region,
1606 // containing live data, in the region liveness bitmap.
1607 
1608 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1609  public:
1610   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1611                               BitMap* region_bm,
1612                               BitMap* card_bm) :
1613     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1614 
1615   bool doHeapRegion(HeapRegion* hr) {
1616     HeapWord* ntams = hr->next_top_at_mark_start();
1617     HeapWord* top   = hr->top();
1618 
1619     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1620 
1621     // Mark the allocated-since-marking portion...
1622     if (ntams < top) {
1623       // This definitely means the region has live objects.
1624       set_bit_for_region(hr);
1625 
1626       // Now set the bits in the card bitmap for [ntams, top)
1627       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1628       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1629 
1630       // Note: if we're looking at the last region in heap - top
1631       // could be actually just beyond the end of the heap; end_idx
1632       // will then correspond to a (non-existent) card that is also
1633       // just beyond the heap.
1634       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1635         // end of object is not card aligned - increment to cover
1636         // all the cards spanned by the object
1637         end_idx += 1;
1638       }
1639 
1640       assert(end_idx <= _card_bm->size(),
1641              "oob: end_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1642              end_idx, _card_bm->size());
1643       assert(start_idx < _card_bm->size(),
1644              "oob: start_idx=  " SIZE_FORMAT ", bitmap size= " SIZE_FORMAT,
1645              start_idx, _card_bm->size());
1646 
1647       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1648     }
1649 
1650     // Set the bit for the region if it contains live data
1651     if (hr->next_marked_bytes() > 0) {
1652       set_bit_for_region(hr);
1653     }
1654 
1655     return false;
1656   }
1657 };
1658 
1659 class G1ParFinalCountTask: public AbstractGangTask {
1660 protected:
1661   G1CollectedHeap* _g1h;
1662   ConcurrentMark* _cm;
1663   BitMap* _actual_region_bm;
1664   BitMap* _actual_card_bm;
1665 
1666   uint    _n_workers;
1667   HeapRegionClaimer _hrclaimer;
1668 
1669 public:
1670   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1671     : AbstractGangTask("G1 final counting"),
1672       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1673       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1674       _n_workers(_g1h->workers()->active_workers()), _hrclaimer(_n_workers) {
1675   }
1676 
1677   void work(uint worker_id) {
1678     assert(worker_id < _n_workers, "invariant");
1679 
1680     FinalCountDataUpdateClosure final_update_cl(_g1h,
1681                                                 _actual_region_bm,
1682                                                 _actual_card_bm);
1683 
1684     _g1h->heap_region_par_iterate(&final_update_cl, worker_id, &_hrclaimer);
1685   }
1686 };
1687 
1688 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1689   G1CollectedHeap* _g1;
1690   size_t _freed_bytes;
1691   FreeRegionList* _local_cleanup_list;
1692   HeapRegionSetCount _old_regions_removed;
1693   HeapRegionSetCount _humongous_regions_removed;
1694   HRRSCleanupTask* _hrrs_cleanup_task;
1695 
1696 public:
1697   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1698                              FreeRegionList* local_cleanup_list,
1699                              HRRSCleanupTask* hrrs_cleanup_task) :
1700     _g1(g1),
1701     _freed_bytes(0),
1702     _local_cleanup_list(local_cleanup_list),
1703     _old_regions_removed(),
1704     _humongous_regions_removed(),
1705     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1706 
1707   size_t freed_bytes() { return _freed_bytes; }
1708   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1709   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1710 
1711   bool doHeapRegion(HeapRegion *hr) {
1712     if (hr->is_archive()) {
1713       return false;
1714     }
1715     // We use a claim value of zero here because all regions
1716     // were claimed with value 1 in the FinalCount task.
1717     _g1->reset_gc_time_stamps(hr);
1718     hr->note_end_of_marking();
1719 
1720     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1721       _freed_bytes += hr->used();
1722       hr->set_containing_set(NULL);
1723       if (hr->is_humongous()) {
1724         _humongous_regions_removed.increment(1u, hr->capacity());
1725         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1726       } else {
1727         _old_regions_removed.increment(1u, hr->capacity());
1728         _g1->free_region(hr, _local_cleanup_list, true);
1729       }
1730     } else {
1731       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1732     }
1733 
1734     return false;
1735   }
1736 };
1737 
1738 class G1ParNoteEndTask: public AbstractGangTask {
1739   friend class G1NoteEndOfConcMarkClosure;
1740 
1741 protected:
1742   G1CollectedHeap* _g1h;
1743   FreeRegionList* _cleanup_list;
1744   HeapRegionClaimer _hrclaimer;
1745 
1746 public:
1747   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1748       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1749   }
1750 
1751   void work(uint worker_id) {
1752     FreeRegionList local_cleanup_list("Local Cleanup List");
1753     HRRSCleanupTask hrrs_cleanup_task;
1754     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1755                                            &hrrs_cleanup_task);
1756     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1757     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1758 
1759     // Now update the lists
1760     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1761     {
1762       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1763       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1764 
1765       // If we iterate over the global cleanup list at the end of
1766       // cleanup to do this printing we will not guarantee to only
1767       // generate output for the newly-reclaimed regions (the list
1768       // might not be empty at the beginning of cleanup; we might
1769       // still be working on its previous contents). So we do the
1770       // printing here, before we append the new regions to the global
1771       // cleanup list.
1772 
1773       G1HRPrinter* hr_printer = _g1h->hr_printer();
1774       if (hr_printer->is_active()) {
1775         FreeRegionListIterator iter(&local_cleanup_list);
1776         while (iter.more_available()) {
1777           HeapRegion* hr = iter.get_next();
1778           hr_printer->cleanup(hr);
1779         }
1780       }
1781 
1782       _cleanup_list->add_ordered(&local_cleanup_list);
1783       assert(local_cleanup_list.is_empty(), "post-condition");
1784 
1785       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1786     }
1787   }
1788 };
1789 
1790 class G1ParScrubRemSetTask: public AbstractGangTask {
1791 protected:
1792   G1RemSet* _g1rs;
1793   BitMap* _region_bm;
1794   BitMap* _card_bm;
1795   HeapRegionClaimer _hrclaimer;
1796 
1797 public:
1798   G1ParScrubRemSetTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1799       AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()), _region_bm(region_bm), _card_bm(card_bm), _hrclaimer(n_workers) {
1800   }
1801 
1802   void work(uint worker_id) {
1803     _g1rs->scrub(_region_bm, _card_bm, worker_id, &_hrclaimer);
1804   }
1805 
1806 };
1807 
1808 void ConcurrentMark::cleanup() {
1809   // world is stopped at this checkpoint
1810   assert(SafepointSynchronize::is_at_safepoint(),
1811          "world should be stopped");
1812   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1813 
1814   // If a full collection has happened, we shouldn't do this.
1815   if (has_aborted()) {
1816     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1817     return;
1818   }
1819 
1820   g1h->verify_region_sets_optional();
1821 
1822   if (VerifyDuringGC) {
1823     HandleMark hm;  // handle scope
1824     g1h->prepare_for_verify();
1825     Universe::verify(VerifyOption_G1UsePrevMarking,
1826                      " VerifyDuringGC:(before)");
1827   }
1828   g1h->check_bitmaps("Cleanup Start");
1829 
1830   G1CollectorPolicy* g1p = g1h->g1_policy();
1831   g1p->record_concurrent_mark_cleanup_start();
1832 
1833   double start = os::elapsedTime();
1834 
1835   HeapRegionRemSet::reset_for_cleanup_tasks();
1836 
1837   // Do counting once more with the world stopped for good measure.
1838   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1839 
1840   g1h->workers()->run_task(&g1_par_count_task);
1841 
1842   if (VerifyDuringGC) {
1843     // Verify that the counting data accumulated during marking matches
1844     // that calculated by walking the marking bitmap.
1845 
1846     // Bitmaps to hold expected values
1847     BitMap expected_region_bm(_region_bm.size(), true);
1848     BitMap expected_card_bm(_card_bm.size(), true);
1849 
1850     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1851                                                  &_region_bm,
1852                                                  &_card_bm,
1853                                                  &expected_region_bm,
1854                                                  &expected_card_bm);
1855 
1856     g1h->workers()->run_task(&g1_par_verify_task);
1857 
1858     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1859   }
1860 
1861   size_t start_used_bytes = g1h->used();
1862   g1h->collector_state()->set_mark_in_progress(false);
1863 
1864   double count_end = os::elapsedTime();
1865   double this_final_counting_time = (count_end - start);
1866   _total_counting_time += this_final_counting_time;
1867 
1868   if (G1PrintRegionLivenessInfo) {
1869     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1870     _g1h->heap_region_iterate(&cl);
1871   }
1872 
1873   // Install newly created mark bitMap as "prev".
1874   swapMarkBitMaps();
1875 
1876   g1h->reset_gc_time_stamp();
1877 
1878   uint n_workers = _g1h->workers()->active_workers();
1879 
1880   // Note end of marking in all heap regions.
1881   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1882   g1h->workers()->run_task(&g1_par_note_end_task);
1883   g1h->check_gc_time_stamps();
1884 
1885   if (!cleanup_list_is_empty()) {
1886     // The cleanup list is not empty, so we'll have to process it
1887     // concurrently. Notify anyone else that might be wanting free
1888     // regions that there will be more free regions coming soon.
1889     g1h->set_free_regions_coming();
1890   }
1891 
1892   // call below, since it affects the metric by which we sort the heap
1893   // regions.
1894   if (G1ScrubRemSets) {
1895     double rs_scrub_start = os::elapsedTime();
1896     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
1897     g1h->workers()->run_task(&g1_par_scrub_rs_task);
1898 
1899     double rs_scrub_end = os::elapsedTime();
1900     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1901     _total_rs_scrub_time += this_rs_scrub_time;
1902   }
1903 
1904   // this will also free any regions totally full of garbage objects,
1905   // and sort the regions.
1906   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1907 
1908   // Statistics.
1909   double end = os::elapsedTime();
1910   _cleanup_times.add((end - start) * 1000.0);
1911 
1912   if (G1Log::fine()) {
1913     g1h->g1_policy()->print_heap_transition(start_used_bytes);
1914   }
1915 
1916   // Clean up will have freed any regions completely full of garbage.
1917   // Update the soft reference policy with the new heap occupancy.
1918   Universe::update_heap_info_at_gc();
1919 
1920   if (VerifyDuringGC) {
1921     HandleMark hm;  // handle scope
1922     g1h->prepare_for_verify();
1923     Universe::verify(VerifyOption_G1UsePrevMarking,
1924                      " VerifyDuringGC:(after)");
1925   }
1926 
1927   g1h->check_bitmaps("Cleanup End");
1928 
1929   g1h->verify_region_sets_optional();
1930 
1931   // We need to make this be a "collection" so any collection pause that
1932   // races with it goes around and waits for completeCleanup to finish.
1933   g1h->increment_total_collections();
1934 
1935   // Clean out dead classes and update Metaspace sizes.
1936   if (ClassUnloadingWithConcurrentMark) {
1937     ClassLoaderDataGraph::purge();
1938   }
1939   MetaspaceGC::compute_new_size();
1940 
1941   // We reclaimed old regions so we should calculate the sizes to make
1942   // sure we update the old gen/space data.
1943   g1h->g1mm()->update_sizes();
1944   g1h->allocation_context_stats().update_after_mark();
1945 
1946   g1h->trace_heap_after_concurrent_cycle();
1947 }
1948 
1949 void ConcurrentMark::completeCleanup() {
1950   if (has_aborted()) return;
1951 
1952   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1953 
1954   _cleanup_list.verify_optional();
1955   FreeRegionList tmp_free_list("Tmp Free List");
1956 
1957   if (G1ConcRegionFreeingVerbose) {
1958     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1959                            "cleanup list has %u entries",
1960                            _cleanup_list.length());
1961   }
1962 
1963   // No one else should be accessing the _cleanup_list at this point,
1964   // so it is not necessary to take any locks
1965   while (!_cleanup_list.is_empty()) {
1966     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1967     assert(hr != NULL, "Got NULL from a non-empty list");
1968     hr->par_clear();
1969     tmp_free_list.add_ordered(hr);
1970 
1971     // Instead of adding one region at a time to the secondary_free_list,
1972     // we accumulate them in the local list and move them a few at a
1973     // time. This also cuts down on the number of notify_all() calls
1974     // we do during this process. We'll also append the local list when
1975     // _cleanup_list is empty (which means we just removed the last
1976     // region from the _cleanup_list).
1977     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1978         _cleanup_list.is_empty()) {
1979       if (G1ConcRegionFreeingVerbose) {
1980         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1981                                "appending %u entries to the secondary_free_list, "
1982                                "cleanup list still has %u entries",
1983                                tmp_free_list.length(),
1984                                _cleanup_list.length());
1985       }
1986 
1987       {
1988         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1989         g1h->secondary_free_list_add(&tmp_free_list);
1990         SecondaryFreeList_lock->notify_all();
1991       }
1992 #ifndef PRODUCT
1993       if (G1StressConcRegionFreeing) {
1994         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1995           os::sleep(Thread::current(), (jlong) 1, false);
1996         }
1997       }
1998 #endif
1999     }
2000   }
2001   assert(tmp_free_list.is_empty(), "post-condition");
2002 }
2003 
2004 // Supporting Object and Oop closures for reference discovery
2005 // and processing in during marking
2006 
2007 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2008   HeapWord* addr = (HeapWord*)obj;
2009   return addr != NULL &&
2010          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2011 }
2012 
2013 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2014 // Uses the CMTask associated with a worker thread (for serial reference
2015 // processing the CMTask for worker 0 is used) to preserve (mark) and
2016 // trace referent objects.
2017 //
2018 // Using the CMTask and embedded local queues avoids having the worker
2019 // threads operating on the global mark stack. This reduces the risk
2020 // of overflowing the stack - which we would rather avoid at this late
2021 // state. Also using the tasks' local queues removes the potential
2022 // of the workers interfering with each other that could occur if
2023 // operating on the global stack.
2024 
2025 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2026   ConcurrentMark* _cm;
2027   CMTask*         _task;
2028   int             _ref_counter_limit;
2029   int             _ref_counter;
2030   bool            _is_serial;
2031  public:
2032   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2033     _cm(cm), _task(task), _is_serial(is_serial),
2034     _ref_counter_limit(G1RefProcDrainInterval) {
2035     assert(_ref_counter_limit > 0, "sanity");
2036     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2037     _ref_counter = _ref_counter_limit;
2038   }
2039 
2040   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2041   virtual void do_oop(      oop* p) { do_oop_work(p); }
2042 
2043   template <class T> void do_oop_work(T* p) {
2044     if (!_cm->has_overflown()) {
2045       oop obj = oopDesc::load_decode_heap_oop(p);
2046       if (_cm->verbose_high()) {
2047         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2048                                "*" PTR_FORMAT " = " PTR_FORMAT,
2049                                _task->worker_id(), p2i(p), p2i((void*) obj));
2050       }
2051 
2052       _task->deal_with_reference(obj);
2053       _ref_counter--;
2054 
2055       if (_ref_counter == 0) {
2056         // We have dealt with _ref_counter_limit references, pushing them
2057         // and objects reachable from them on to the local stack (and
2058         // possibly the global stack). Call CMTask::do_marking_step() to
2059         // process these entries.
2060         //
2061         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2062         // there's nothing more to do (i.e. we're done with the entries that
2063         // were pushed as a result of the CMTask::deal_with_reference() calls
2064         // above) or we overflow.
2065         //
2066         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2067         // flag while there may still be some work to do. (See the comment at
2068         // the beginning of CMTask::do_marking_step() for those conditions -
2069         // one of which is reaching the specified time target.) It is only
2070         // when CMTask::do_marking_step() returns without setting the
2071         // has_aborted() flag that the marking step has completed.
2072         do {
2073           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2074           _task->do_marking_step(mark_step_duration_ms,
2075                                  false      /* do_termination */,
2076                                  _is_serial);
2077         } while (_task->has_aborted() && !_cm->has_overflown());
2078         _ref_counter = _ref_counter_limit;
2079       }
2080     } else {
2081       if (_cm->verbose_high()) {
2082          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2083       }
2084     }
2085   }
2086 };
2087 
2088 // 'Drain' oop closure used by both serial and parallel reference processing.
2089 // Uses the CMTask associated with a given worker thread (for serial
2090 // reference processing the CMtask for worker 0 is used). Calls the
2091 // do_marking_step routine, with an unbelievably large timeout value,
2092 // to drain the marking data structures of the remaining entries
2093 // added by the 'keep alive' oop closure above.
2094 
2095 class G1CMDrainMarkingStackClosure: public VoidClosure {
2096   ConcurrentMark* _cm;
2097   CMTask*         _task;
2098   bool            _is_serial;
2099  public:
2100   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2101     _cm(cm), _task(task), _is_serial(is_serial) {
2102     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2103   }
2104 
2105   void do_void() {
2106     do {
2107       if (_cm->verbose_high()) {
2108         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2109                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2110       }
2111 
2112       // We call CMTask::do_marking_step() to completely drain the local
2113       // and global marking stacks of entries pushed by the 'keep alive'
2114       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2115       //
2116       // CMTask::do_marking_step() is called in a loop, which we'll exit
2117       // if there's nothing more to do (i.e. we've completely drained the
2118       // entries that were pushed as a a result of applying the 'keep alive'
2119       // closure to the entries on the discovered ref lists) or we overflow
2120       // the global marking stack.
2121       //
2122       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2123       // flag while there may still be some work to do. (See the comment at
2124       // the beginning of CMTask::do_marking_step() for those conditions -
2125       // one of which is reaching the specified time target.) It is only
2126       // when CMTask::do_marking_step() returns without setting the
2127       // has_aborted() flag that the marking step has completed.
2128 
2129       _task->do_marking_step(1000000000.0 /* something very large */,
2130                              true         /* do_termination */,
2131                              _is_serial);
2132     } while (_task->has_aborted() && !_cm->has_overflown());
2133   }
2134 };
2135 
2136 // Implementation of AbstractRefProcTaskExecutor for parallel
2137 // reference processing at the end of G1 concurrent marking
2138 
2139 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2140 private:
2141   G1CollectedHeap* _g1h;
2142   ConcurrentMark*  _cm;
2143   WorkGang*        _workers;
2144   uint             _active_workers;
2145 
2146 public:
2147   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2148                           ConcurrentMark* cm,
2149                           WorkGang* workers,
2150                           uint n_workers) :
2151     _g1h(g1h), _cm(cm),
2152     _workers(workers), _active_workers(n_workers) { }
2153 
2154   // Executes the given task using concurrent marking worker threads.
2155   virtual void execute(ProcessTask& task);
2156   virtual void execute(EnqueueTask& task);
2157 };
2158 
2159 class G1CMRefProcTaskProxy: public AbstractGangTask {
2160   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2161   ProcessTask&     _proc_task;
2162   G1CollectedHeap* _g1h;
2163   ConcurrentMark*  _cm;
2164 
2165 public:
2166   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2167                      G1CollectedHeap* g1h,
2168                      ConcurrentMark* cm) :
2169     AbstractGangTask("Process reference objects in parallel"),
2170     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2171     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2172     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2173   }
2174 
2175   virtual void work(uint worker_id) {
2176     ResourceMark rm;
2177     HandleMark hm;
2178     CMTask* task = _cm->task(worker_id);
2179     G1CMIsAliveClosure g1_is_alive(_g1h);
2180     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2181     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2182 
2183     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2184   }
2185 };
2186 
2187 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2188   assert(_workers != NULL, "Need parallel worker threads.");
2189   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2190 
2191   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2192 
2193   // We need to reset the concurrency level before each
2194   // proxy task execution, so that the termination protocol
2195   // and overflow handling in CMTask::do_marking_step() knows
2196   // how many workers to wait for.
2197   _cm->set_concurrency(_active_workers);
2198   _workers->run_task(&proc_task_proxy);
2199 }
2200 
2201 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2202   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2203   EnqueueTask& _enq_task;
2204 
2205 public:
2206   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2207     AbstractGangTask("Enqueue reference objects in parallel"),
2208     _enq_task(enq_task) { }
2209 
2210   virtual void work(uint worker_id) {
2211     _enq_task.work(worker_id);
2212   }
2213 };
2214 
2215 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2216   assert(_workers != NULL, "Need parallel worker threads.");
2217   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2218 
2219   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2220 
2221   // Not strictly necessary but...
2222   //
2223   // We need to reset the concurrency level before each
2224   // proxy task execution, so that the termination protocol
2225   // and overflow handling in CMTask::do_marking_step() knows
2226   // how many workers to wait for.
2227   _cm->set_concurrency(_active_workers);
2228   _workers->run_task(&enq_task_proxy);
2229 }
2230 
2231 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2232   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2233 }
2234 
2235 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2236   if (has_overflown()) {
2237     // Skip processing the discovered references if we have
2238     // overflown the global marking stack. Reference objects
2239     // only get discovered once so it is OK to not
2240     // de-populate the discovered reference lists. We could have,
2241     // but the only benefit would be that, when marking restarts,
2242     // less reference objects are discovered.
2243     return;
2244   }
2245 
2246   ResourceMark rm;
2247   HandleMark   hm;
2248 
2249   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2250 
2251   // Is alive closure.
2252   G1CMIsAliveClosure g1_is_alive(g1h);
2253 
2254   // Inner scope to exclude the cleaning of the string and symbol
2255   // tables from the displayed time.
2256   {
2257     G1CMTraceTime t("GC ref-proc", G1Log::finer());
2258 
2259     ReferenceProcessor* rp = g1h->ref_processor_cm();
2260 
2261     // See the comment in G1CollectedHeap::ref_processing_init()
2262     // about how reference processing currently works in G1.
2263 
2264     // Set the soft reference policy
2265     rp->setup_policy(clear_all_soft_refs);
2266     assert(_markStack.isEmpty(), "mark stack should be empty");
2267 
2268     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2269     // in serial reference processing. Note these closures are also
2270     // used for serially processing (by the the current thread) the
2271     // JNI references during parallel reference processing.
2272     //
2273     // These closures do not need to synchronize with the worker
2274     // threads involved in parallel reference processing as these
2275     // instances are executed serially by the current thread (e.g.
2276     // reference processing is not multi-threaded and is thus
2277     // performed by the current thread instead of a gang worker).
2278     //
2279     // The gang tasks involved in parallel reference processing create
2280     // their own instances of these closures, which do their own
2281     // synchronization among themselves.
2282     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2283     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2284 
2285     // We need at least one active thread. If reference processing
2286     // is not multi-threaded we use the current (VMThread) thread,
2287     // otherwise we use the work gang from the G1CollectedHeap and
2288     // we utilize all the worker threads we can.
2289     bool processing_is_mt = rp->processing_is_mt();
2290     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2291     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2292 
2293     // Parallel processing task executor.
2294     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2295                                               g1h->workers(), active_workers);
2296     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2297 
2298     // Set the concurrency level. The phase was already set prior to
2299     // executing the remark task.
2300     set_concurrency(active_workers);
2301 
2302     // Set the degree of MT processing here.  If the discovery was done MT,
2303     // the number of threads involved during discovery could differ from
2304     // the number of active workers.  This is OK as long as the discovered
2305     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2306     rp->set_active_mt_degree(active_workers);
2307 
2308     // Process the weak references.
2309     const ReferenceProcessorStats& stats =
2310         rp->process_discovered_references(&g1_is_alive,
2311                                           &g1_keep_alive,
2312                                           &g1_drain_mark_stack,
2313                                           executor,
2314                                           g1h->gc_timer_cm());
2315     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2316 
2317     // The do_oop work routines of the keep_alive and drain_marking_stack
2318     // oop closures will set the has_overflown flag if we overflow the
2319     // global marking stack.
2320 
2321     assert(_markStack.overflow() || _markStack.isEmpty(),
2322             "mark stack should be empty (unless it overflowed)");
2323 
2324     if (_markStack.overflow()) {
2325       // This should have been done already when we tried to push an
2326       // entry on to the global mark stack. But let's do it again.
2327       set_has_overflown();
2328     }
2329 
2330     assert(rp->num_q() == active_workers, "why not");
2331 
2332     rp->enqueue_discovered_references(executor);
2333 
2334     rp->verify_no_references_recorded();
2335     assert(!rp->discovery_enabled(), "Post condition");
2336   }
2337 
2338   if (has_overflown()) {
2339     // We can not trust g1_is_alive if the marking stack overflowed
2340     return;
2341   }
2342 
2343   assert(_markStack.isEmpty(), "Marking should have completed");
2344 
2345   // Unload Klasses, String, Symbols, Code Cache, etc.
2346   {
2347     G1CMTraceTime trace("Unloading", G1Log::finer());
2348 
2349     if (ClassUnloadingWithConcurrentMark) {
2350       bool purged_classes;
2351 
2352       {
2353         G1CMTraceTime trace("System Dictionary Unloading", G1Log::finest());
2354         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2355       }
2356 
2357       {
2358         G1CMTraceTime trace("Parallel Unloading", G1Log::finest());
2359         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2360       }
2361     }
2362 
2363     if (G1StringDedup::is_enabled()) {
2364       G1CMTraceTime trace("String Deduplication Unlink", G1Log::finest());
2365       G1StringDedup::unlink(&g1_is_alive);
2366     }
2367   }
2368 }
2369 
2370 void ConcurrentMark::swapMarkBitMaps() {
2371   CMBitMapRO* temp = _prevMarkBitMap;
2372   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2373   _nextMarkBitMap  = (CMBitMap*)  temp;
2374 }
2375 
2376 // Closure for marking entries in SATB buffers.
2377 class CMSATBBufferClosure : public SATBBufferClosure {
2378 private:
2379   CMTask* _task;
2380   G1CollectedHeap* _g1h;
2381 
2382   // This is very similar to CMTask::deal_with_reference, but with
2383   // more relaxed requirements for the argument, so this must be more
2384   // circumspect about treating the argument as an object.
2385   void do_entry(void* entry) const {
2386     _task->increment_refs_reached();
2387     HeapRegion* hr = _g1h->heap_region_containing(entry);
2388     if (entry < hr->next_top_at_mark_start()) {
2389       // Until we get here, we don't know whether entry refers to a valid
2390       // object; it could instead have been a stale reference.
2391       oop obj = static_cast<oop>(entry);
2392       assert(obj->is_oop(true /* ignore mark word */),
2393              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2394       _task->make_reference_grey(obj, hr);
2395     }
2396   }
2397 
2398 public:
2399   CMSATBBufferClosure(CMTask* task, G1CollectedHeap* g1h)
2400     : _task(task), _g1h(g1h) { }
2401 
2402   virtual void do_buffer(void** buffer, size_t size) {
2403     for (size_t i = 0; i < size; ++i) {
2404       do_entry(buffer[i]);
2405     }
2406   }
2407 };
2408 
2409 class G1RemarkThreadsClosure : public ThreadClosure {
2410   CMSATBBufferClosure _cm_satb_cl;
2411   G1CMOopClosure _cm_cl;
2412   MarkingCodeBlobClosure _code_cl;
2413   int _thread_parity;
2414 
2415  public:
2416   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task) :
2417     _cm_satb_cl(task, g1h),
2418     _cm_cl(g1h, g1h->concurrent_mark(), task),
2419     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2420     _thread_parity(Threads::thread_claim_parity()) {}
2421 
2422   void do_thread(Thread* thread) {
2423     if (thread->is_Java_thread()) {
2424       if (thread->claim_oops_do(true, _thread_parity)) {
2425         JavaThread* jt = (JavaThread*)thread;
2426 
2427         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2428         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2429         // * Alive if on the stack of an executing method
2430         // * Weakly reachable otherwise
2431         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2432         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2433         jt->nmethods_do(&_code_cl);
2434 
2435         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2436       }
2437     } else if (thread->is_VM_thread()) {
2438       if (thread->claim_oops_do(true, _thread_parity)) {
2439         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2440       }
2441     }
2442   }
2443 };
2444 
2445 class CMRemarkTask: public AbstractGangTask {
2446 private:
2447   ConcurrentMark* _cm;
2448 public:
2449   void work(uint worker_id) {
2450     // Since all available tasks are actually started, we should
2451     // only proceed if we're supposed to be active.
2452     if (worker_id < _cm->active_tasks()) {
2453       CMTask* task = _cm->task(worker_id);
2454       task->record_start_time();
2455       {
2456         ResourceMark rm;
2457         HandleMark hm;
2458 
2459         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2460         Threads::threads_do(&threads_f);
2461       }
2462 
2463       do {
2464         task->do_marking_step(1000000000.0 /* something very large */,
2465                               true         /* do_termination       */,
2466                               false        /* is_serial            */);
2467       } while (task->has_aborted() && !_cm->has_overflown());
2468       // If we overflow, then we do not want to restart. We instead
2469       // want to abort remark and do concurrent marking again.
2470       task->record_end_time();
2471     }
2472   }
2473 
2474   CMRemarkTask(ConcurrentMark* cm, uint active_workers) :
2475     AbstractGangTask("Par Remark"), _cm(cm) {
2476     _cm->terminator()->reset_for_reuse(active_workers);
2477   }
2478 };
2479 
2480 void ConcurrentMark::checkpointRootsFinalWork() {
2481   ResourceMark rm;
2482   HandleMark   hm;
2483   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2484 
2485   G1CMTraceTime trace("Finalize Marking", G1Log::finer());
2486 
2487   g1h->ensure_parsability(false);
2488 
2489   // this is remark, so we'll use up all active threads
2490   uint active_workers = g1h->workers()->active_workers();
2491   set_concurrency_and_phase(active_workers, false /* concurrent */);
2492   // Leave _parallel_marking_threads at it's
2493   // value originally calculated in the ConcurrentMark
2494   // constructor and pass values of the active workers
2495   // through the gang in the task.
2496 
2497   {
2498     StrongRootsScope srs(active_workers);
2499 
2500     CMRemarkTask remarkTask(this, active_workers);
2501     // We will start all available threads, even if we decide that the
2502     // active_workers will be fewer. The extra ones will just bail out
2503     // immediately.
2504     g1h->workers()->run_task(&remarkTask);
2505   }
2506 
2507   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2508   guarantee(has_overflown() ||
2509             satb_mq_set.completed_buffers_num() == 0,
2510             "Invariant: has_overflown = %s, num buffers = %d",
2511             BOOL_TO_STR(has_overflown()),
2512             satb_mq_set.completed_buffers_num());
2513 
2514   print_stats();
2515 }
2516 
2517 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2518   // Note we are overriding the read-only view of the prev map here, via
2519   // the cast.
2520   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2521 }
2522 
2523 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2524   _nextMarkBitMap->clearRange(mr);
2525 }
2526 
2527 HeapRegion*
2528 ConcurrentMark::claim_region(uint worker_id) {
2529   // "checkpoint" the finger
2530   HeapWord* finger = _finger;
2531 
2532   // _heap_end will not change underneath our feet; it only changes at
2533   // yield points.
2534   while (finger < _heap_end) {
2535     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2536 
2537     // Note on how this code handles humongous regions. In the
2538     // normal case the finger will reach the start of a "starts
2539     // humongous" (SH) region. Its end will either be the end of the
2540     // last "continues humongous" (CH) region in the sequence, or the
2541     // standard end of the SH region (if the SH is the only region in
2542     // the sequence). That way claim_region() will skip over the CH
2543     // regions. However, there is a subtle race between a CM thread
2544     // executing this method and a mutator thread doing a humongous
2545     // object allocation. The two are not mutually exclusive as the CM
2546     // thread does not need to hold the Heap_lock when it gets
2547     // here. So there is a chance that claim_region() will come across
2548     // a free region that's in the progress of becoming a SH or a CH
2549     // region. In the former case, it will either
2550     //   a) Miss the update to the region's end, in which case it will
2551     //      visit every subsequent CH region, will find their bitmaps
2552     //      empty, and do nothing, or
2553     //   b) Will observe the update of the region's end (in which case
2554     //      it will skip the subsequent CH regions).
2555     // If it comes across a region that suddenly becomes CH, the
2556     // scenario will be similar to b). So, the race between
2557     // claim_region() and a humongous object allocation might force us
2558     // to do a bit of unnecessary work (due to some unnecessary bitmap
2559     // iterations) but it should not introduce and correctness issues.
2560     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2561 
2562     // Above heap_region_containing may return NULL as we always scan claim
2563     // until the end of the heap. In this case, just jump to the next region.
2564     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2565 
2566     // Is the gap between reading the finger and doing the CAS too long?
2567     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2568     if (res == finger && curr_region != NULL) {
2569       // we succeeded
2570       HeapWord*   bottom        = curr_region->bottom();
2571       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2572 
2573       if (verbose_low()) {
2574         gclog_or_tty->print_cr("[%u] curr_region = " PTR_FORMAT " "
2575                                "[" PTR_FORMAT ", " PTR_FORMAT "), "
2576                                "limit = " PTR_FORMAT,
2577                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
2578       }
2579 
2580       // notice that _finger == end cannot be guaranteed here since,
2581       // someone else might have moved the finger even further
2582       assert(_finger >= end, "the finger should have moved forward");
2583 
2584       if (verbose_low()) {
2585         gclog_or_tty->print_cr("[%u] we were successful with region = "
2586                                PTR_FORMAT, worker_id, p2i(curr_region));
2587       }
2588 
2589       if (limit > bottom) {
2590         if (verbose_low()) {
2591           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is not empty, "
2592                                  "returning it ", worker_id, p2i(curr_region));
2593         }
2594         return curr_region;
2595       } else {
2596         assert(limit == bottom,
2597                "the region limit should be at bottom");
2598         if (verbose_low()) {
2599           gclog_or_tty->print_cr("[%u] region " PTR_FORMAT " is empty, "
2600                                  "returning NULL", worker_id, p2i(curr_region));
2601         }
2602         // we return NULL and the caller should try calling
2603         // claim_region() again.
2604         return NULL;
2605       }
2606     } else {
2607       assert(_finger > finger, "the finger should have moved forward");
2608       if (verbose_low()) {
2609         if (curr_region == NULL) {
2610           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
2611                                  "global finger = " PTR_FORMAT ", "
2612                                  "our finger = " PTR_FORMAT,
2613                                  worker_id, p2i(_finger), p2i(finger));
2614         } else {
2615           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
2616                                  "global finger = " PTR_FORMAT ", "
2617                                  "our finger = " PTR_FORMAT,
2618                                  worker_id, p2i(_finger), p2i(finger));
2619         }
2620       }
2621 
2622       // read it again
2623       finger = _finger;
2624     }
2625   }
2626 
2627   return NULL;
2628 }
2629 
2630 #ifndef PRODUCT
2631 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2632 private:
2633   G1CollectedHeap* _g1h;
2634   const char* _phase;
2635   int _info;
2636 
2637 public:
2638   VerifyNoCSetOops(const char* phase, int info = -1) :
2639     _g1h(G1CollectedHeap::heap()),
2640     _phase(phase),
2641     _info(info)
2642   { }
2643 
2644   void operator()(oop obj) const {
2645     guarantee(obj->is_oop(),
2646               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2647               p2i(obj), _phase, _info);
2648     guarantee(!_g1h->obj_in_cs(obj),
2649               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2650               p2i(obj), _phase, _info);
2651   }
2652 };
2653 
2654 void ConcurrentMark::verify_no_cset_oops() {
2655   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2656   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2657     return;
2658   }
2659 
2660   // Verify entries on the global mark stack
2661   _markStack.iterate(VerifyNoCSetOops("Stack"));
2662 
2663   // Verify entries on the task queues
2664   for (uint i = 0; i < _max_worker_id; ++i) {
2665     CMTaskQueue* queue = _task_queues->queue(i);
2666     queue->iterate(VerifyNoCSetOops("Queue", i));
2667   }
2668 
2669   // Verify the global finger
2670   HeapWord* global_finger = finger();
2671   if (global_finger != NULL && global_finger < _heap_end) {
2672     // The global finger always points to a heap region boundary. We
2673     // use heap_region_containing() to get the containing region
2674     // given that the global finger could be pointing to a free region
2675     // which subsequently becomes continues humongous. If that
2676     // happens, heap_region_containing() will return the bottom of the
2677     // corresponding starts humongous region and the check below will
2678     // not hold any more.
2679     // Since we always iterate over all regions, we might get a NULL HeapRegion
2680     // here.
2681     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2682     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2683               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2684               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2685   }
2686 
2687   // Verify the task fingers
2688   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2689   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2690     CMTask* task = _tasks[i];
2691     HeapWord* task_finger = task->finger();
2692     if (task_finger != NULL && task_finger < _heap_end) {
2693       // See above note on the global finger verification.
2694       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2695       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2696                 !task_hr->in_collection_set(),
2697                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2698                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2699     }
2700   }
2701 }
2702 #endif // PRODUCT
2703 
2704 // Aggregate the counting data that was constructed concurrently
2705 // with marking.
2706 class AggregateCountDataHRClosure: public HeapRegionClosure {
2707   G1CollectedHeap* _g1h;
2708   ConcurrentMark* _cm;
2709   CardTableModRefBS* _ct_bs;
2710   BitMap* _cm_card_bm;
2711   uint _max_worker_id;
2712 
2713  public:
2714   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2715                               BitMap* cm_card_bm,
2716                               uint max_worker_id) :
2717     _g1h(g1h), _cm(g1h->concurrent_mark()),
2718     _ct_bs(barrier_set_cast<CardTableModRefBS>(g1h->barrier_set())),
2719     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
2720 
2721   bool doHeapRegion(HeapRegion* hr) {
2722     HeapWord* start = hr->bottom();
2723     HeapWord* limit = hr->next_top_at_mark_start();
2724     HeapWord* end = hr->end();
2725 
2726     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2727            "Preconditions not met - "
2728            "start: " PTR_FORMAT ", limit: " PTR_FORMAT ", "
2729            "top: " PTR_FORMAT ", end: " PTR_FORMAT,
2730            p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end()));
2731 
2732     assert(hr->next_marked_bytes() == 0, "Precondition");
2733 
2734     if (start == limit) {
2735       // NTAMS of this region has not been set so nothing to do.
2736       return false;
2737     }
2738 
2739     // 'start' should be in the heap.
2740     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2741     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
2742     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2743 
2744     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2745     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2746     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2747 
2748     // If ntams is not card aligned then we bump card bitmap index
2749     // for limit so that we get the all the cards spanned by
2750     // the object ending at ntams.
2751     // Note: if this is the last region in the heap then ntams
2752     // could be actually just beyond the end of the the heap;
2753     // limit_idx will then  correspond to a (non-existent) card
2754     // that is also outside the heap.
2755     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2756       limit_idx += 1;
2757     }
2758 
2759     assert(limit_idx <= end_idx, "or else use atomics");
2760 
2761     // Aggregate the "stripe" in the count data associated with hr.
2762     uint hrm_index = hr->hrm_index();
2763     size_t marked_bytes = 0;
2764 
2765     for (uint i = 0; i < _max_worker_id; i += 1) {
2766       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2767       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2768 
2769       // Fetch the marked_bytes in this region for task i and
2770       // add it to the running total for this region.
2771       marked_bytes += marked_bytes_array[hrm_index];
2772 
2773       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
2774       // into the global card bitmap.
2775       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2776 
2777       while (scan_idx < limit_idx) {
2778         assert(task_card_bm->at(scan_idx) == true, "should be");
2779         _cm_card_bm->set_bit(scan_idx);
2780         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2781 
2782         // BitMap::get_next_one_offset() can handle the case when
2783         // its left_offset parameter is greater than its right_offset
2784         // parameter. It does, however, have an early exit if
2785         // left_offset == right_offset. So let's limit the value
2786         // passed in for left offset here.
2787         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2788         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2789       }
2790     }
2791 
2792     // Update the marked bytes for this region.
2793     hr->add_to_marked_bytes(marked_bytes);
2794 
2795     // Next heap region
2796     return false;
2797   }
2798 };
2799 
2800 class G1AggregateCountDataTask: public AbstractGangTask {
2801 protected:
2802   G1CollectedHeap* _g1h;
2803   ConcurrentMark* _cm;
2804   BitMap* _cm_card_bm;
2805   uint _max_worker_id;
2806   uint _active_workers;
2807   HeapRegionClaimer _hrclaimer;
2808 
2809 public:
2810   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2811                            ConcurrentMark* cm,
2812                            BitMap* cm_card_bm,
2813                            uint max_worker_id,
2814                            uint n_workers) :
2815       AbstractGangTask("Count Aggregation"),
2816       _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2817       _max_worker_id(max_worker_id),
2818       _active_workers(n_workers),
2819       _hrclaimer(_active_workers) {
2820   }
2821 
2822   void work(uint worker_id) {
2823     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
2824 
2825     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2826   }
2827 };
2828 
2829 
2830 void ConcurrentMark::aggregate_count_data() {
2831   uint n_workers = _g1h->workers()->active_workers();
2832 
2833   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
2834                                            _max_worker_id, n_workers);
2835 
2836   _g1h->workers()->run_task(&g1_par_agg_task);
2837 }
2838 
2839 // Clear the per-worker arrays used to store the per-region counting data
2840 void ConcurrentMark::clear_all_count_data() {
2841   // Clear the global card bitmap - it will be filled during
2842   // liveness count aggregation (during remark) and the
2843   // final counting task.
2844   _card_bm.clear();
2845 
2846   // Clear the global region bitmap - it will be filled as part
2847   // of the final counting task.
2848   _region_bm.clear();
2849 
2850   uint max_regions = _g1h->max_regions();
2851   assert(_max_worker_id > 0, "uninitialized");
2852 
2853   for (uint i = 0; i < _max_worker_id; i += 1) {
2854     BitMap* task_card_bm = count_card_bitmap_for(i);
2855     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
2856 
2857     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
2858     assert(marked_bytes_array != NULL, "uninitialized");
2859 
2860     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
2861     task_card_bm->clear();
2862   }
2863 }
2864 
2865 void ConcurrentMark::print_stats() {
2866   if (verbose_stats()) {
2867     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2868     for (size_t i = 0; i < _active_tasks; ++i) {
2869       _tasks[i]->print_stats();
2870       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2871     }
2872   }
2873 }
2874 
2875 // abandon current marking iteration due to a Full GC
2876 void ConcurrentMark::abort() {
2877   if (!cmThread()->during_cycle() || _has_aborted) {
2878     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2879     return;
2880   }
2881 
2882   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2883   // concurrent bitmap clearing.
2884   _nextMarkBitMap->clearAll();
2885 
2886   // Note we cannot clear the previous marking bitmap here
2887   // since VerifyDuringGC verifies the objects marked during
2888   // a full GC against the previous bitmap.
2889 
2890   // Clear the liveness counting data
2891   clear_all_count_data();
2892   // Empty mark stack
2893   reset_marking_state();
2894   for (uint i = 0; i < _max_worker_id; ++i) {
2895     _tasks[i]->clear_region_fields();
2896   }
2897   _first_overflow_barrier_sync.abort();
2898   _second_overflow_barrier_sync.abort();
2899   _has_aborted = true;
2900 
2901   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2902   satb_mq_set.abandon_partial_marking();
2903   // This can be called either during or outside marking, we'll read
2904   // the expected_active value from the SATB queue set.
2905   satb_mq_set.set_active_all_threads(
2906                                  false, /* new active value */
2907                                  satb_mq_set.is_active() /* expected_active */);
2908 
2909   _g1h->trace_heap_after_concurrent_cycle();
2910   _g1h->register_concurrent_cycle_end();
2911 }
2912 
2913 static void print_ms_time_info(const char* prefix, const char* name,
2914                                NumberSeq& ns) {
2915   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2916                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2917   if (ns.num() > 0) {
2918     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2919                            prefix, ns.sd(), ns.maximum());
2920   }
2921 }
2922 
2923 void ConcurrentMark::print_summary_info() {
2924   gclog_or_tty->print_cr(" Concurrent marking:");
2925   print_ms_time_info("  ", "init marks", _init_times);
2926   print_ms_time_info("  ", "remarks", _remark_times);
2927   {
2928     print_ms_time_info("     ", "final marks", _remark_mark_times);
2929     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2930 
2931   }
2932   print_ms_time_info("  ", "cleanups", _cleanup_times);
2933   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2934                          _total_counting_time,
2935                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2936                           (double)_cleanup_times.num()
2937                          : 0.0));
2938   if (G1ScrubRemSets) {
2939     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2940                            _total_rs_scrub_time,
2941                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2942                             (double)_cleanup_times.num()
2943                            : 0.0));
2944   }
2945   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2946                          (_init_times.sum() + _remark_times.sum() +
2947                           _cleanup_times.sum())/1000.0);
2948   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2949                 "(%8.2f s marking).",
2950                 cmThread()->vtime_accum(),
2951                 cmThread()->vtime_mark_accum());
2952 }
2953 
2954 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2955   _parallel_workers->print_worker_threads_on(st);
2956 }
2957 
2958 void ConcurrentMark::print_on_error(outputStream* st) const {
2959   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2960       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2961   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2962   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2963 }
2964 
2965 // We take a break if someone is trying to stop the world.
2966 bool ConcurrentMark::do_yield_check(uint worker_id) {
2967   if (SuspendibleThreadSet::should_yield()) {
2968     if (worker_id == 0) {
2969       _g1h->g1_policy()->record_concurrent_pause();
2970     }
2971     SuspendibleThreadSet::yield();
2972     return true;
2973   } else {
2974     return false;
2975   }
2976 }
2977 
2978 #ifndef PRODUCT
2979 // for debugging purposes
2980 void ConcurrentMark::print_finger() {
2981   gclog_or_tty->print_cr("heap [" PTR_FORMAT ", " PTR_FORMAT "), global finger = " PTR_FORMAT,
2982                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
2983   for (uint i = 0; i < _max_worker_id; ++i) {
2984     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
2985   }
2986   gclog_or_tty->cr();
2987 }
2988 #endif
2989 
2990 // Closure for iteration over bitmaps
2991 class CMBitMapClosure : public BitMapClosure {
2992 private:
2993   // the bitmap that is being iterated over
2994   CMBitMap*                   _nextMarkBitMap;
2995   ConcurrentMark*             _cm;
2996   CMTask*                     _task;
2997 
2998 public:
2999   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3000     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3001 
3002   bool do_bit(size_t offset) {
3003     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3004     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3005     assert( addr < _cm->finger(), "invariant");
3006 
3007     statsOnly( _task->increase_objs_found_on_bitmap() );
3008     assert(addr >= _task->finger(), "invariant");
3009 
3010     // We move that task's local finger along.
3011     _task->move_finger_to(addr);
3012 
3013     _task->scan_object(oop(addr));
3014     // we only partially drain the local queue and global stack
3015     _task->drain_local_queue(true);
3016     _task->drain_global_stack(true);
3017 
3018     // if the has_aborted flag has been raised, we need to bail out of
3019     // the iteration
3020     return !_task->has_aborted();
3021   }
3022 };
3023 
3024 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
3025   ReferenceProcessor* result = NULL;
3026   if (G1UseConcMarkReferenceProcessing) {
3027     result = g1h->ref_processor_cm();
3028     assert(result != NULL, "should not be NULL");
3029   }
3030   return result;
3031 }
3032 
3033 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3034                                ConcurrentMark* cm,
3035                                CMTask* task)
3036   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
3037     _g1h(g1h), _cm(cm), _task(task)
3038 { }
3039 
3040 void CMTask::setup_for_region(HeapRegion* hr) {
3041   assert(hr != NULL,
3042         "claim_region() should have filtered out NULL regions");
3043 
3044   if (_cm->verbose_low()) {
3045     gclog_or_tty->print_cr("[%u] setting up for region " PTR_FORMAT,
3046                            _worker_id, p2i(hr));
3047   }
3048 
3049   _curr_region  = hr;
3050   _finger       = hr->bottom();
3051   update_region_limit();
3052 }
3053 
3054 void CMTask::update_region_limit() {
3055   HeapRegion* hr            = _curr_region;
3056   HeapWord* bottom          = hr->bottom();
3057   HeapWord* limit           = hr->next_top_at_mark_start();
3058 
3059   if (limit == bottom) {
3060     if (_cm->verbose_low()) {
3061       gclog_or_tty->print_cr("[%u] found an empty region "
3062                              "[" PTR_FORMAT ", " PTR_FORMAT ")",
3063                              _worker_id, p2i(bottom), p2i(limit));
3064     }
3065     // The region was collected underneath our feet.
3066     // We set the finger to bottom to ensure that the bitmap
3067     // iteration that will follow this will not do anything.
3068     // (this is not a condition that holds when we set the region up,
3069     // as the region is not supposed to be empty in the first place)
3070     _finger = bottom;
3071   } else if (limit >= _region_limit) {
3072     assert(limit >= _finger, "peace of mind");
3073   } else {
3074     assert(limit < _region_limit, "only way to get here");
3075     // This can happen under some pretty unusual circumstances.  An
3076     // evacuation pause empties the region underneath our feet (NTAMS
3077     // at bottom). We then do some allocation in the region (NTAMS
3078     // stays at bottom), followed by the region being used as a GC
3079     // alloc region (NTAMS will move to top() and the objects
3080     // originally below it will be grayed). All objects now marked in
3081     // the region are explicitly grayed, if below the global finger,
3082     // and we do not need in fact to scan anything else. So, we simply
3083     // set _finger to be limit to ensure that the bitmap iteration
3084     // doesn't do anything.
3085     _finger = limit;
3086   }
3087 
3088   _region_limit = limit;
3089 }
3090 
3091 void CMTask::giveup_current_region() {
3092   assert(_curr_region != NULL, "invariant");
3093   if (_cm->verbose_low()) {
3094     gclog_or_tty->print_cr("[%u] giving up region " PTR_FORMAT,
3095                            _worker_id, p2i(_curr_region));
3096   }
3097   clear_region_fields();
3098 }
3099 
3100 void CMTask::clear_region_fields() {
3101   // Values for these three fields that indicate that we're not
3102   // holding on to a region.
3103   _curr_region   = NULL;
3104   _finger        = NULL;
3105   _region_limit  = NULL;
3106 }
3107 
3108 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3109   if (cm_oop_closure == NULL) {
3110     assert(_cm_oop_closure != NULL, "invariant");
3111   } else {
3112     assert(_cm_oop_closure == NULL, "invariant");
3113   }
3114   _cm_oop_closure = cm_oop_closure;
3115 }
3116 
3117 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3118   guarantee(nextMarkBitMap != NULL, "invariant");
3119 
3120   if (_cm->verbose_low()) {
3121     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3122   }
3123 
3124   _nextMarkBitMap                = nextMarkBitMap;
3125   clear_region_fields();
3126 
3127   _calls                         = 0;
3128   _elapsed_time_ms               = 0.0;
3129   _termination_time_ms           = 0.0;
3130   _termination_start_time_ms     = 0.0;
3131 
3132 #if _MARKING_STATS_
3133   _aborted                       = 0;
3134   _aborted_overflow              = 0;
3135   _aborted_cm_aborted            = 0;
3136   _aborted_yield                 = 0;
3137   _aborted_timed_out             = 0;
3138   _aborted_satb                  = 0;
3139   _aborted_termination           = 0;
3140   _steal_attempts                = 0;
3141   _steals                        = 0;
3142   _local_pushes                  = 0;
3143   _local_pops                    = 0;
3144   _local_max_size                = 0;
3145   _objs_scanned                  = 0;
3146   _global_pushes                 = 0;
3147   _global_pops                   = 0;
3148   _global_max_size               = 0;
3149   _global_transfers_to           = 0;
3150   _global_transfers_from         = 0;
3151   _regions_claimed               = 0;
3152   _objs_found_on_bitmap          = 0;
3153   _satb_buffers_processed        = 0;
3154 #endif // _MARKING_STATS_
3155 }
3156 
3157 bool CMTask::should_exit_termination() {
3158   regular_clock_call();
3159   // This is called when we are in the termination protocol. We should
3160   // quit if, for some reason, this task wants to abort or the global
3161   // stack is not empty (this means that we can get work from it).
3162   return !_cm->mark_stack_empty() || has_aborted();
3163 }
3164 
3165 void CMTask::reached_limit() {
3166   assert(_words_scanned >= _words_scanned_limit ||
3167          _refs_reached >= _refs_reached_limit ,
3168          "shouldn't have been called otherwise");
3169   regular_clock_call();
3170 }
3171 
3172 void CMTask::regular_clock_call() {
3173   if (has_aborted()) return;
3174 
3175   // First, we need to recalculate the words scanned and refs reached
3176   // limits for the next clock call.
3177   recalculate_limits();
3178 
3179   // During the regular clock call we do the following
3180 
3181   // (1) If an overflow has been flagged, then we abort.
3182   if (_cm->has_overflown()) {
3183     set_has_aborted();
3184     return;
3185   }
3186 
3187   // If we are not concurrent (i.e. we're doing remark) we don't need
3188   // to check anything else. The other steps are only needed during
3189   // the concurrent marking phase.
3190   if (!concurrent()) return;
3191 
3192   // (2) If marking has been aborted for Full GC, then we also abort.
3193   if (_cm->has_aborted()) {
3194     set_has_aborted();
3195     statsOnly( ++_aborted_cm_aborted );
3196     return;
3197   }
3198 
3199   double curr_time_ms = os::elapsedVTime() * 1000.0;
3200 
3201   // (3) If marking stats are enabled, then we update the step history.
3202 #if _MARKING_STATS_
3203   if (_words_scanned >= _words_scanned_limit) {
3204     ++_clock_due_to_scanning;
3205   }
3206   if (_refs_reached >= _refs_reached_limit) {
3207     ++_clock_due_to_marking;
3208   }
3209 
3210   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3211   _interval_start_time_ms = curr_time_ms;
3212   _all_clock_intervals_ms.add(last_interval_ms);
3213 
3214   if (_cm->verbose_medium()) {
3215       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3216                         "scanned = " SIZE_FORMAT "%s, refs reached = " SIZE_FORMAT "%s",
3217                         _worker_id, last_interval_ms,
3218                         _words_scanned,
3219                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3220                         _refs_reached,
3221                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3222   }
3223 #endif // _MARKING_STATS_
3224 
3225   // (4) We check whether we should yield. If we have to, then we abort.
3226   if (SuspendibleThreadSet::should_yield()) {
3227     // We should yield. To do this we abort the task. The caller is
3228     // responsible for yielding.
3229     set_has_aborted();
3230     statsOnly( ++_aborted_yield );
3231     return;
3232   }
3233 
3234   // (5) We check whether we've reached our time quota. If we have,
3235   // then we abort.
3236   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3237   if (elapsed_time_ms > _time_target_ms) {
3238     set_has_aborted();
3239     _has_timed_out = true;
3240     statsOnly( ++_aborted_timed_out );
3241     return;
3242   }
3243 
3244   // (6) Finally, we check whether there are enough completed STAB
3245   // buffers available for processing. If there are, we abort.
3246   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3247   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3248     if (_cm->verbose_low()) {
3249       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3250                              _worker_id);
3251     }
3252     // we do need to process SATB buffers, we'll abort and restart
3253     // the marking task to do so
3254     set_has_aborted();
3255     statsOnly( ++_aborted_satb );
3256     return;
3257   }
3258 }
3259 
3260 void CMTask::recalculate_limits() {
3261   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3262   _words_scanned_limit      = _real_words_scanned_limit;
3263 
3264   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3265   _refs_reached_limit       = _real_refs_reached_limit;
3266 }
3267 
3268 void CMTask::decrease_limits() {
3269   // This is called when we believe that we're going to do an infrequent
3270   // operation which will increase the per byte scanned cost (i.e. move
3271   // entries to/from the global stack). It basically tries to decrease the
3272   // scanning limit so that the clock is called earlier.
3273 
3274   if (_cm->verbose_medium()) {
3275     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3276   }
3277 
3278   _words_scanned_limit = _real_words_scanned_limit -
3279     3 * words_scanned_period / 4;
3280   _refs_reached_limit  = _real_refs_reached_limit -
3281     3 * refs_reached_period / 4;
3282 }
3283 
3284 void CMTask::move_entries_to_global_stack() {
3285   // local array where we'll store the entries that will be popped
3286   // from the local queue
3287   oop buffer[global_stack_transfer_size];
3288 
3289   int n = 0;
3290   oop obj;
3291   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3292     buffer[n] = obj;
3293     ++n;
3294   }
3295 
3296   if (n > 0) {
3297     // we popped at least one entry from the local queue
3298 
3299     statsOnly( ++_global_transfers_to; _local_pops += n );
3300 
3301     if (!_cm->mark_stack_push(buffer, n)) {
3302       if (_cm->verbose_low()) {
3303         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3304                                _worker_id);
3305       }
3306       set_has_aborted();
3307     } else {
3308       // the transfer was successful
3309 
3310       if (_cm->verbose_medium()) {
3311         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3312                                _worker_id, n);
3313       }
3314       statsOnly( size_t tmp_size = _cm->mark_stack_size();
3315                  if (tmp_size > _global_max_size) {
3316                    _global_max_size = tmp_size;
3317                  }
3318                  _global_pushes += n );
3319     }
3320   }
3321 
3322   // this operation was quite expensive, so decrease the limits
3323   decrease_limits();
3324 }
3325 
3326 void CMTask::get_entries_from_global_stack() {
3327   // local array where we'll store the entries that will be popped
3328   // from the global stack.
3329   oop buffer[global_stack_transfer_size];
3330   int n;
3331   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3332   assert(n <= global_stack_transfer_size,
3333          "we should not pop more than the given limit");
3334   if (n > 0) {
3335     // yes, we did actually pop at least one entry
3336 
3337     statsOnly( ++_global_transfers_from; _global_pops += n );
3338     if (_cm->verbose_medium()) {
3339       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3340                              _worker_id, n);
3341     }
3342     for (int i = 0; i < n; ++i) {
3343       bool success = _task_queue->push(buffer[i]);
3344       // We only call this when the local queue is empty or under a
3345       // given target limit. So, we do not expect this push to fail.
3346       assert(success, "invariant");
3347     }
3348 
3349     statsOnly( size_t tmp_size = (size_t)_task_queue->size();
3350                if (tmp_size > _local_max_size) {
3351                  _local_max_size = tmp_size;
3352                }
3353                _local_pushes += n );
3354   }
3355 
3356   // this operation was quite expensive, so decrease the limits
3357   decrease_limits();
3358 }
3359 
3360 void CMTask::drain_local_queue(bool partially) {
3361   if (has_aborted()) return;
3362 
3363   // Decide what the target size is, depending whether we're going to
3364   // drain it partially (so that other tasks can steal if they run out
3365   // of things to do) or totally (at the very end).
3366   size_t target_size;
3367   if (partially) {
3368     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3369   } else {
3370     target_size = 0;
3371   }
3372 
3373   if (_task_queue->size() > target_size) {
3374     if (_cm->verbose_high()) {
3375       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3376                              _worker_id, target_size);
3377     }
3378 
3379     oop obj;
3380     bool ret = _task_queue->pop_local(obj);
3381     while (ret) {
3382       statsOnly( ++_local_pops );
3383 
3384       if (_cm->verbose_high()) {
3385         gclog_or_tty->print_cr("[%u] popped " PTR_FORMAT, _worker_id,
3386                                p2i((void*) obj));
3387       }
3388 
3389       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3390       assert(!_g1h->is_on_master_free_list(
3391                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3392 
3393       scan_object(obj);
3394 
3395       if (_task_queue->size() <= target_size || has_aborted()) {
3396         ret = false;
3397       } else {
3398         ret = _task_queue->pop_local(obj);
3399       }
3400     }
3401 
3402     if (_cm->verbose_high()) {
3403       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3404                              _worker_id, _task_queue->size());
3405     }
3406   }
3407 }
3408 
3409 void CMTask::drain_global_stack(bool partially) {
3410   if (has_aborted()) return;
3411 
3412   // We have a policy to drain the local queue before we attempt to
3413   // drain the global stack.
3414   assert(partially || _task_queue->size() == 0, "invariant");
3415 
3416   // Decide what the target size is, depending whether we're going to
3417   // drain it partially (so that other tasks can steal if they run out
3418   // of things to do) or totally (at the very end).  Notice that,
3419   // because we move entries from the global stack in chunks or
3420   // because another task might be doing the same, we might in fact
3421   // drop below the target. But, this is not a problem.
3422   size_t target_size;
3423   if (partially) {
3424     target_size = _cm->partial_mark_stack_size_target();
3425   } else {
3426     target_size = 0;
3427   }
3428 
3429   if (_cm->mark_stack_size() > target_size) {
3430     if (_cm->verbose_low()) {
3431       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3432                              _worker_id, target_size);
3433     }
3434 
3435     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3436       get_entries_from_global_stack();
3437       drain_local_queue(partially);
3438     }
3439 
3440     if (_cm->verbose_low()) {
3441       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3442                              _worker_id, _cm->mark_stack_size());
3443     }
3444   }
3445 }
3446 
3447 // SATB Queue has several assumptions on whether to call the par or
3448 // non-par versions of the methods. this is why some of the code is
3449 // replicated. We should really get rid of the single-threaded version
3450 // of the code to simplify things.
3451 void CMTask::drain_satb_buffers() {
3452   if (has_aborted()) return;
3453 
3454   // We set this so that the regular clock knows that we're in the
3455   // middle of draining buffers and doesn't set the abort flag when it
3456   // notices that SATB buffers are available for draining. It'd be
3457   // very counter productive if it did that. :-)
3458   _draining_satb_buffers = true;
3459 
3460   CMSATBBufferClosure satb_cl(this, _g1h);
3461   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3462 
3463   // This keeps claiming and applying the closure to completed buffers
3464   // until we run out of buffers or we need to abort.
3465   while (!has_aborted() &&
3466          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
3467     if (_cm->verbose_medium()) {
3468       gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
3469     }
3470     statsOnly( ++_satb_buffers_processed );
3471     regular_clock_call();
3472   }
3473 
3474   _draining_satb_buffers = false;
3475 
3476   assert(has_aborted() ||
3477          concurrent() ||
3478          satb_mq_set.completed_buffers_num() == 0, "invariant");
3479 
3480   // again, this was a potentially expensive operation, decrease the
3481   // limits to get the regular clock call early
3482   decrease_limits();
3483 }
3484 
3485 void CMTask::print_stats() {
3486   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
3487                          _worker_id, _calls);
3488   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3489                          _elapsed_time_ms, _termination_time_ms);
3490   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3491                          _step_times_ms.num(), _step_times_ms.avg(),
3492                          _step_times_ms.sd());
3493   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3494                          _step_times_ms.maximum(), _step_times_ms.sum());
3495 
3496 #if _MARKING_STATS_
3497   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3498                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3499                          _all_clock_intervals_ms.sd());
3500   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3501                          _all_clock_intervals_ms.maximum(),
3502                          _all_clock_intervals_ms.sum());
3503   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = " SIZE_FORMAT ", marking = " SIZE_FORMAT,
3504                          _clock_due_to_scanning, _clock_due_to_marking);
3505   gclog_or_tty->print_cr("  Objects: scanned = " SIZE_FORMAT ", found on the bitmap = " SIZE_FORMAT,
3506                          _objs_scanned, _objs_found_on_bitmap);
3507   gclog_or_tty->print_cr("  Local Queue:  pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3508                          _local_pushes, _local_pops, _local_max_size);
3509   gclog_or_tty->print_cr("  Global Stack: pushes = " SIZE_FORMAT ", pops = " SIZE_FORMAT ", max size = " SIZE_FORMAT,
3510                          _global_pushes, _global_pops, _global_max_size);
3511   gclog_or_tty->print_cr("                transfers to = " SIZE_FORMAT ", transfers from = " SIZE_FORMAT,
3512                          _global_transfers_to,_global_transfers_from);
3513   gclog_or_tty->print_cr("  Regions: claimed = " SIZE_FORMAT, _regions_claimed);
3514   gclog_or_tty->print_cr("  SATB buffers: processed = " SIZE_FORMAT, _satb_buffers_processed);
3515   gclog_or_tty->print_cr("  Steals: attempts = " SIZE_FORMAT ", successes = " SIZE_FORMAT,
3516                          _steal_attempts, _steals);
3517   gclog_or_tty->print_cr("  Aborted: " SIZE_FORMAT ", due to", _aborted);
3518   gclog_or_tty->print_cr("    overflow: " SIZE_FORMAT ", global abort: " SIZE_FORMAT ", yield: " SIZE_FORMAT,
3519                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3520   gclog_or_tty->print_cr("    time out: " SIZE_FORMAT ", SATB: " SIZE_FORMAT ", termination: " SIZE_FORMAT,
3521                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3522 #endif // _MARKING_STATS_
3523 }
3524 
3525 bool ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
3526   return _task_queues->steal(worker_id, hash_seed, obj);
3527 }
3528 
3529 /*****************************************************************************
3530 
3531     The do_marking_step(time_target_ms, ...) method is the building
3532     block of the parallel marking framework. It can be called in parallel
3533     with other invocations of do_marking_step() on different tasks
3534     (but only one per task, obviously) and concurrently with the
3535     mutator threads, or during remark, hence it eliminates the need
3536     for two versions of the code. When called during remark, it will
3537     pick up from where the task left off during the concurrent marking
3538     phase. Interestingly, tasks are also claimable during evacuation
3539     pauses too, since do_marking_step() ensures that it aborts before
3540     it needs to yield.
3541 
3542     The data structures that it uses to do marking work are the
3543     following:
3544 
3545       (1) Marking Bitmap. If there are gray objects that appear only
3546       on the bitmap (this happens either when dealing with an overflow
3547       or when the initial marking phase has simply marked the roots
3548       and didn't push them on the stack), then tasks claim heap
3549       regions whose bitmap they then scan to find gray objects. A
3550       global finger indicates where the end of the last claimed region
3551       is. A local finger indicates how far into the region a task has
3552       scanned. The two fingers are used to determine how to gray an
3553       object (i.e. whether simply marking it is OK, as it will be
3554       visited by a task in the future, or whether it needs to be also
3555       pushed on a stack).
3556 
3557       (2) Local Queue. The local queue of the task which is accessed
3558       reasonably efficiently by the task. Other tasks can steal from
3559       it when they run out of work. Throughout the marking phase, a
3560       task attempts to keep its local queue short but not totally
3561       empty, so that entries are available for stealing by other
3562       tasks. Only when there is no more work, a task will totally
3563       drain its local queue.
3564 
3565       (3) Global Mark Stack. This handles local queue overflow. During
3566       marking only sets of entries are moved between it and the local
3567       queues, as access to it requires a mutex and more fine-grain
3568       interaction with it which might cause contention. If it
3569       overflows, then the marking phase should restart and iterate
3570       over the bitmap to identify gray objects. Throughout the marking
3571       phase, tasks attempt to keep the global mark stack at a small
3572       length but not totally empty, so that entries are available for
3573       popping by other tasks. Only when there is no more work, tasks
3574       will totally drain the global mark stack.
3575 
3576       (4) SATB Buffer Queue. This is where completed SATB buffers are
3577       made available. Buffers are regularly removed from this queue
3578       and scanned for roots, so that the queue doesn't get too
3579       long. During remark, all completed buffers are processed, as
3580       well as the filled in parts of any uncompleted buffers.
3581 
3582     The do_marking_step() method tries to abort when the time target
3583     has been reached. There are a few other cases when the
3584     do_marking_step() method also aborts:
3585 
3586       (1) When the marking phase has been aborted (after a Full GC).
3587 
3588       (2) When a global overflow (on the global stack) has been
3589       triggered. Before the task aborts, it will actually sync up with
3590       the other tasks to ensure that all the marking data structures
3591       (local queues, stacks, fingers etc.)  are re-initialized so that
3592       when do_marking_step() completes, the marking phase can
3593       immediately restart.
3594 
3595       (3) When enough completed SATB buffers are available. The
3596       do_marking_step() method only tries to drain SATB buffers right
3597       at the beginning. So, if enough buffers are available, the
3598       marking step aborts and the SATB buffers are processed at
3599       the beginning of the next invocation.
3600 
3601       (4) To yield. when we have to yield then we abort and yield
3602       right at the end of do_marking_step(). This saves us from a lot
3603       of hassle as, by yielding we might allow a Full GC. If this
3604       happens then objects will be compacted underneath our feet, the
3605       heap might shrink, etc. We save checking for this by just
3606       aborting and doing the yield right at the end.
3607 
3608     From the above it follows that the do_marking_step() method should
3609     be called in a loop (or, otherwise, regularly) until it completes.
3610 
3611     If a marking step completes without its has_aborted() flag being
3612     true, it means it has completed the current marking phase (and
3613     also all other marking tasks have done so and have all synced up).
3614 
3615     A method called regular_clock_call() is invoked "regularly" (in
3616     sub ms intervals) throughout marking. It is this clock method that
3617     checks all the abort conditions which were mentioned above and
3618     decides when the task should abort. A work-based scheme is used to
3619     trigger this clock method: when the number of object words the
3620     marking phase has scanned or the number of references the marking
3621     phase has visited reach a given limit. Additional invocations to
3622     the method clock have been planted in a few other strategic places
3623     too. The initial reason for the clock method was to avoid calling
3624     vtime too regularly, as it is quite expensive. So, once it was in
3625     place, it was natural to piggy-back all the other conditions on it
3626     too and not constantly check them throughout the code.
3627 
3628     If do_termination is true then do_marking_step will enter its
3629     termination protocol.
3630 
3631     The value of is_serial must be true when do_marking_step is being
3632     called serially (i.e. by the VMThread) and do_marking_step should
3633     skip any synchronization in the termination and overflow code.
3634     Examples include the serial remark code and the serial reference
3635     processing closures.
3636 
3637     The value of is_serial must be false when do_marking_step is
3638     being called by any of the worker threads in a work gang.
3639     Examples include the concurrent marking code (CMMarkingTask),
3640     the MT remark code, and the MT reference processing closures.
3641 
3642  *****************************************************************************/
3643 
3644 void CMTask::do_marking_step(double time_target_ms,
3645                              bool do_termination,
3646                              bool is_serial) {
3647   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3648   assert(concurrent() == _cm->concurrent(), "they should be the same");
3649 
3650   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3651   assert(_task_queues != NULL, "invariant");
3652   assert(_task_queue != NULL, "invariant");
3653   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3654 
3655   assert(!_claimed,
3656          "only one thread should claim this task at any one time");
3657 
3658   // OK, this doesn't safeguard again all possible scenarios, as it is
3659   // possible for two threads to set the _claimed flag at the same
3660   // time. But it is only for debugging purposes anyway and it will
3661   // catch most problems.
3662   _claimed = true;
3663 
3664   _start_time_ms = os::elapsedVTime() * 1000.0;
3665   statsOnly( _interval_start_time_ms = _start_time_ms );
3666 
3667   // If do_stealing is true then do_marking_step will attempt to
3668   // steal work from the other CMTasks. It only makes sense to
3669   // enable stealing when the termination protocol is enabled
3670   // and do_marking_step() is not being called serially.
3671   bool do_stealing = do_termination && !is_serial;
3672 
3673   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3674   _time_target_ms = time_target_ms - diff_prediction_ms;
3675 
3676   // set up the variables that are used in the work-based scheme to
3677   // call the regular clock method
3678   _words_scanned = 0;
3679   _refs_reached  = 0;
3680   recalculate_limits();
3681 
3682   // clear all flags
3683   clear_has_aborted();
3684   _has_timed_out = false;
3685   _draining_satb_buffers = false;
3686 
3687   ++_calls;
3688 
3689   if (_cm->verbose_low()) {
3690     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
3691                            "target = %1.2lfms >>>>>>>>>>",
3692                            _worker_id, _calls, _time_target_ms);
3693   }
3694 
3695   // Set up the bitmap and oop closures. Anything that uses them is
3696   // eventually called from this method, so it is OK to allocate these
3697   // statically.
3698   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3699   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3700   set_cm_oop_closure(&cm_oop_closure);
3701 
3702   if (_cm->has_overflown()) {
3703     // This can happen if the mark stack overflows during a GC pause
3704     // and this task, after a yield point, restarts. We have to abort
3705     // as we need to get into the overflow protocol which happens
3706     // right at the end of this task.
3707     set_has_aborted();
3708   }
3709 
3710   // First drain any available SATB buffers. After this, we will not
3711   // look at SATB buffers before the next invocation of this method.
3712   // If enough completed SATB buffers are queued up, the regular clock
3713   // will abort this task so that it restarts.
3714   drain_satb_buffers();
3715   // ...then partially drain the local queue and the global stack
3716   drain_local_queue(true);
3717   drain_global_stack(true);
3718 
3719   do {
3720     if (!has_aborted() && _curr_region != NULL) {
3721       // This means that we're already holding on to a region.
3722       assert(_finger != NULL, "if region is not NULL, then the finger "
3723              "should not be NULL either");
3724 
3725       // We might have restarted this task after an evacuation pause
3726       // which might have evacuated the region we're holding on to
3727       // underneath our feet. Let's read its limit again to make sure
3728       // that we do not iterate over a region of the heap that
3729       // contains garbage (update_region_limit() will also move
3730       // _finger to the start of the region if it is found empty).
3731       update_region_limit();
3732       // We will start from _finger not from the start of the region,
3733       // as we might be restarting this task after aborting half-way
3734       // through scanning this region. In this case, _finger points to
3735       // the address where we last found a marked object. If this is a
3736       // fresh region, _finger points to start().
3737       MemRegion mr = MemRegion(_finger, _region_limit);
3738 
3739       if (_cm->verbose_low()) {
3740         gclog_or_tty->print_cr("[%u] we're scanning part "
3741                                "[" PTR_FORMAT ", " PTR_FORMAT ") "
3742                                "of region " HR_FORMAT,
3743                                _worker_id, p2i(_finger), p2i(_region_limit),
3744                                HR_FORMAT_PARAMS(_curr_region));
3745       }
3746 
3747       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3748              "humongous regions should go around loop once only");
3749 
3750       // Some special cases:
3751       // If the memory region is empty, we can just give up the region.
3752       // If the current region is humongous then we only need to check
3753       // the bitmap for the bit associated with the start of the object,
3754       // scan the object if it's live, and give up the region.
3755       // Otherwise, let's iterate over the bitmap of the part of the region
3756       // that is left.
3757       // If the iteration is successful, give up the region.
3758       if (mr.is_empty()) {
3759         giveup_current_region();
3760         regular_clock_call();
3761       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3762         if (_nextMarkBitMap->isMarked(mr.start())) {
3763           // The object is marked - apply the closure
3764           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3765           bitmap_closure.do_bit(offset);
3766         }
3767         // Even if this task aborted while scanning the humongous object
3768         // we can (and should) give up the current region.
3769         giveup_current_region();
3770         regular_clock_call();
3771       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3772         giveup_current_region();
3773         regular_clock_call();
3774       } else {
3775         assert(has_aborted(), "currently the only way to do so");
3776         // The only way to abort the bitmap iteration is to return
3777         // false from the do_bit() method. However, inside the
3778         // do_bit() method we move the _finger to point to the
3779         // object currently being looked at. So, if we bail out, we
3780         // have definitely set _finger to something non-null.
3781         assert(_finger != NULL, "invariant");
3782 
3783         // Region iteration was actually aborted. So now _finger
3784         // points to the address of the object we last scanned. If we
3785         // leave it there, when we restart this task, we will rescan
3786         // the object. It is easy to avoid this. We move the finger by
3787         // enough to point to the next possible object header (the
3788         // bitmap knows by how much we need to move it as it knows its
3789         // granularity).
3790         assert(_finger < _region_limit, "invariant");
3791         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3792         // Check if bitmap iteration was aborted while scanning the last object
3793         if (new_finger >= _region_limit) {
3794           giveup_current_region();
3795         } else {
3796           move_finger_to(new_finger);
3797         }
3798       }
3799     }
3800     // At this point we have either completed iterating over the
3801     // region we were holding on to, or we have aborted.
3802 
3803     // We then partially drain the local queue and the global stack.
3804     // (Do we really need this?)
3805     drain_local_queue(true);
3806     drain_global_stack(true);
3807 
3808     // Read the note on the claim_region() method on why it might
3809     // return NULL with potentially more regions available for
3810     // claiming and why we have to check out_of_regions() to determine
3811     // whether we're done or not.
3812     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3813       // We are going to try to claim a new region. We should have
3814       // given up on the previous one.
3815       // Separated the asserts so that we know which one fires.
3816       assert(_curr_region  == NULL, "invariant");
3817       assert(_finger       == NULL, "invariant");
3818       assert(_region_limit == NULL, "invariant");
3819       if (_cm->verbose_low()) {
3820         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
3821       }
3822       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3823       if (claimed_region != NULL) {
3824         // Yes, we managed to claim one
3825         statsOnly( ++_regions_claimed );
3826 
3827         if (_cm->verbose_low()) {
3828           gclog_or_tty->print_cr("[%u] we successfully claimed "
3829                                  "region " PTR_FORMAT,
3830                                  _worker_id, p2i(claimed_region));
3831         }
3832 
3833         setup_for_region(claimed_region);
3834         assert(_curr_region == claimed_region, "invariant");
3835       }
3836       // It is important to call the regular clock here. It might take
3837       // a while to claim a region if, for example, we hit a large
3838       // block of empty regions. So we need to call the regular clock
3839       // method once round the loop to make sure it's called
3840       // frequently enough.
3841       regular_clock_call();
3842     }
3843 
3844     if (!has_aborted() && _curr_region == NULL) {
3845       assert(_cm->out_of_regions(),
3846              "at this point we should be out of regions");
3847     }
3848   } while ( _curr_region != NULL && !has_aborted());
3849 
3850   if (!has_aborted()) {
3851     // We cannot check whether the global stack is empty, since other
3852     // tasks might be pushing objects to it concurrently.
3853     assert(_cm->out_of_regions(),
3854            "at this point we should be out of regions");
3855 
3856     if (_cm->verbose_low()) {
3857       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
3858     }
3859 
3860     // Try to reduce the number of available SATB buffers so that
3861     // remark has less work to do.
3862     drain_satb_buffers();
3863   }
3864 
3865   // Since we've done everything else, we can now totally drain the
3866   // local queue and global stack.
3867   drain_local_queue(false);
3868   drain_global_stack(false);
3869 
3870   // Attempt at work stealing from other task's queues.
3871   if (do_stealing && !has_aborted()) {
3872     // We have not aborted. This means that we have finished all that
3873     // we could. Let's try to do some stealing...
3874 
3875     // We cannot check whether the global stack is empty, since other
3876     // tasks might be pushing objects to it concurrently.
3877     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3878            "only way to reach here");
3879 
3880     if (_cm->verbose_low()) {
3881       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
3882     }
3883 
3884     while (!has_aborted()) {
3885       oop obj;
3886       statsOnly( ++_steal_attempts );
3887 
3888       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3889         if (_cm->verbose_medium()) {
3890           gclog_or_tty->print_cr("[%u] stolen " PTR_FORMAT " successfully",
3891                                  _worker_id, p2i((void*) obj));
3892         }
3893 
3894         statsOnly( ++_steals );
3895 
3896         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3897                "any stolen object should be marked");
3898         scan_object(obj);
3899 
3900         // And since we're towards the end, let's totally drain the
3901         // local queue and global stack.
3902         drain_local_queue(false);
3903         drain_global_stack(false);
3904       } else {
3905         break;
3906       }
3907     }
3908   }
3909 
3910   // If we are about to wrap up and go into termination, check if we
3911   // should raise the overflow flag.
3912   if (do_termination && !has_aborted()) {
3913     if (_cm->force_overflow()->should_force()) {
3914       _cm->set_has_overflown();
3915       regular_clock_call();
3916     }
3917   }
3918 
3919   // We still haven't aborted. Now, let's try to get into the
3920   // termination protocol.
3921   if (do_termination && !has_aborted()) {
3922     // We cannot check whether the global stack is empty, since other
3923     // tasks might be concurrently pushing objects on it.
3924     // Separated the asserts so that we know which one fires.
3925     assert(_cm->out_of_regions(), "only way to reach here");
3926     assert(_task_queue->size() == 0, "only way to reach here");
3927 
3928     if (_cm->verbose_low()) {
3929       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
3930     }
3931 
3932     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3933 
3934     // The CMTask class also extends the TerminatorTerminator class,
3935     // hence its should_exit_termination() method will also decide
3936     // whether to exit the termination protocol or not.
3937     bool finished = (is_serial ||
3938                      _cm->terminator()->offer_termination(this));
3939     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3940     _termination_time_ms +=
3941       termination_end_time_ms - _termination_start_time_ms;
3942 
3943     if (finished) {
3944       // We're all done.
3945 
3946       if (_worker_id == 0) {
3947         // let's allow task 0 to do this
3948         if (concurrent()) {
3949           assert(_cm->concurrent_marking_in_progress(), "invariant");
3950           // we need to set this to false before the next
3951           // safepoint. This way we ensure that the marking phase
3952           // doesn't observe any more heap expansions.
3953           _cm->clear_concurrent_marking_in_progress();
3954         }
3955       }
3956 
3957       // We can now guarantee that the global stack is empty, since
3958       // all other tasks have finished. We separated the guarantees so
3959       // that, if a condition is false, we can immediately find out
3960       // which one.
3961       guarantee(_cm->out_of_regions(), "only way to reach here");
3962       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3963       guarantee(_task_queue->size() == 0, "only way to reach here");
3964       guarantee(!_cm->has_overflown(), "only way to reach here");
3965       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3966 
3967       if (_cm->verbose_low()) {
3968         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
3969       }
3970     } else {
3971       // Apparently there's more work to do. Let's abort this task. It
3972       // will restart it and we can hopefully find more things to do.
3973 
3974       if (_cm->verbose_low()) {
3975         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
3976                                _worker_id);
3977       }
3978 
3979       set_has_aborted();
3980       statsOnly( ++_aborted_termination );
3981     }
3982   }
3983 
3984   // Mainly for debugging purposes to make sure that a pointer to the
3985   // closure which was statically allocated in this frame doesn't
3986   // escape it by accident.
3987   set_cm_oop_closure(NULL);
3988   double end_time_ms = os::elapsedVTime() * 1000.0;
3989   double elapsed_time_ms = end_time_ms - _start_time_ms;
3990   // Update the step history.
3991   _step_times_ms.add(elapsed_time_ms);
3992 
3993   if (has_aborted()) {
3994     // The task was aborted for some reason.
3995 
3996     statsOnly( ++_aborted );
3997 
3998     if (_has_timed_out) {
3999       double diff_ms = elapsed_time_ms - _time_target_ms;
4000       // Keep statistics of how well we did with respect to hitting
4001       // our target only if we actually timed out (if we aborted for
4002       // other reasons, then the results might get skewed).
4003       _marking_step_diffs_ms.add(diff_ms);
4004     }
4005 
4006     if (_cm->has_overflown()) {
4007       // This is the interesting one. We aborted because a global
4008       // overflow was raised. This means we have to restart the
4009       // marking phase and start iterating over regions. However, in
4010       // order to do this we have to make sure that all tasks stop
4011       // what they are doing and re-initialize in a safe manner. We
4012       // will achieve this with the use of two barrier sync points.
4013 
4014       if (_cm->verbose_low()) {
4015         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4016       }
4017 
4018       if (!is_serial) {
4019         // We only need to enter the sync barrier if being called
4020         // from a parallel context
4021         _cm->enter_first_sync_barrier(_worker_id);
4022 
4023         // When we exit this sync barrier we know that all tasks have
4024         // stopped doing marking work. So, it's now safe to
4025         // re-initialize our data structures. At the end of this method,
4026         // task 0 will clear the global data structures.
4027       }
4028 
4029       statsOnly( ++_aborted_overflow );
4030 
4031       // We clear the local state of this task...
4032       clear_region_fields();
4033 
4034       if (!is_serial) {
4035         // ...and enter the second barrier.
4036         _cm->enter_second_sync_barrier(_worker_id);
4037       }
4038       // At this point, if we're during the concurrent phase of
4039       // marking, everything has been re-initialized and we're
4040       // ready to restart.
4041     }
4042 
4043     if (_cm->verbose_low()) {
4044       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4045                              "elapsed = %1.2lfms <<<<<<<<<<",
4046                              _worker_id, _time_target_ms, elapsed_time_ms);
4047       if (_cm->has_aborted()) {
4048         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4049                                _worker_id);
4050       }
4051     }
4052   } else {
4053     if (_cm->verbose_low()) {
4054       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4055                              "elapsed = %1.2lfms <<<<<<<<<<",
4056                              _worker_id, _time_target_ms, elapsed_time_ms);
4057     }
4058   }
4059 
4060   _claimed = false;
4061 }
4062 
4063 CMTask::CMTask(uint worker_id,
4064                ConcurrentMark* cm,
4065                size_t* marked_bytes,
4066                BitMap* card_bm,
4067                CMTaskQueue* task_queue,
4068                CMTaskQueueSet* task_queues)
4069   : _g1h(G1CollectedHeap::heap()),
4070     _worker_id(worker_id), _cm(cm),
4071     _claimed(false),
4072     _nextMarkBitMap(NULL), _hash_seed(17),
4073     _task_queue(task_queue),
4074     _task_queues(task_queues),
4075     _cm_oop_closure(NULL),
4076     _marked_bytes_array(marked_bytes),
4077     _card_bm(card_bm) {
4078   guarantee(task_queue != NULL, "invariant");
4079   guarantee(task_queues != NULL, "invariant");
4080 
4081   statsOnly( _clock_due_to_scanning = 0;
4082              _clock_due_to_marking  = 0 );
4083 
4084   _marking_step_diffs_ms.add(0.5);
4085 }
4086 
4087 // These are formatting macros that are used below to ensure
4088 // consistent formatting. The *_H_* versions are used to format the
4089 // header for a particular value and they should be kept consistent
4090 // with the corresponding macro. Also note that most of the macros add
4091 // the necessary white space (as a prefix) which makes them a bit
4092 // easier to compose.
4093 
4094 // All the output lines are prefixed with this string to be able to
4095 // identify them easily in a large log file.
4096 #define G1PPRL_LINE_PREFIX            "###"
4097 
4098 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
4099 #ifdef _LP64
4100 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4101 #else // _LP64
4102 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4103 #endif // _LP64
4104 
4105 // For per-region info
4106 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4107 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4108 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
4109 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4110 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4111 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4112 
4113 // For summary info
4114 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
4115 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
4116 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
4117 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
4118 
4119 G1PrintRegionLivenessInfoClosure::
4120 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4121   : _out(out),
4122     _total_used_bytes(0), _total_capacity_bytes(0),
4123     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4124     _hum_used_bytes(0), _hum_capacity_bytes(0),
4125     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4126     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4127   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4128   MemRegion g1_reserved = g1h->g1_reserved();
4129   double now = os::elapsedTime();
4130 
4131   // Print the header of the output.
4132   _out->cr();
4133   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4134   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4135                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4136                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4137                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4138                  HeapRegion::GrainBytes);
4139   _out->print_cr(G1PPRL_LINE_PREFIX);
4140   _out->print_cr(G1PPRL_LINE_PREFIX
4141                 G1PPRL_TYPE_H_FORMAT
4142                 G1PPRL_ADDR_BASE_H_FORMAT
4143                 G1PPRL_BYTE_H_FORMAT
4144                 G1PPRL_BYTE_H_FORMAT
4145                 G1PPRL_BYTE_H_FORMAT
4146                 G1PPRL_DOUBLE_H_FORMAT
4147                 G1PPRL_BYTE_H_FORMAT
4148                 G1PPRL_BYTE_H_FORMAT,
4149                 "type", "address-range",
4150                 "used", "prev-live", "next-live", "gc-eff",
4151                 "remset", "code-roots");
4152   _out->print_cr(G1PPRL_LINE_PREFIX
4153                 G1PPRL_TYPE_H_FORMAT
4154                 G1PPRL_ADDR_BASE_H_FORMAT
4155                 G1PPRL_BYTE_H_FORMAT
4156                 G1PPRL_BYTE_H_FORMAT
4157                 G1PPRL_BYTE_H_FORMAT
4158                 G1PPRL_DOUBLE_H_FORMAT
4159                 G1PPRL_BYTE_H_FORMAT
4160                 G1PPRL_BYTE_H_FORMAT,
4161                 "", "",
4162                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4163                 "(bytes)", "(bytes)");
4164 }
4165 
4166 // It takes as a parameter a reference to one of the _hum_* fields, it
4167 // deduces the corresponding value for a region in a humongous region
4168 // series (either the region size, or what's left if the _hum_* field
4169 // is < the region size), and updates the _hum_* field accordingly.
4170 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4171   size_t bytes = 0;
4172   // The > 0 check is to deal with the prev and next live bytes which
4173   // could be 0.
4174   if (*hum_bytes > 0) {
4175     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4176     *hum_bytes -= bytes;
4177   }
4178   return bytes;
4179 }
4180 
4181 // It deduces the values for a region in a humongous region series
4182 // from the _hum_* fields and updates those accordingly. It assumes
4183 // that that _hum_* fields have already been set up from the "starts
4184 // humongous" region and we visit the regions in address order.
4185 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4186                                                      size_t* capacity_bytes,
4187                                                      size_t* prev_live_bytes,
4188                                                      size_t* next_live_bytes) {
4189   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4190   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4191   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4192   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4193   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4194 }
4195 
4196 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4197   const char* type       = r->get_type_str();
4198   HeapWord* bottom       = r->bottom();
4199   HeapWord* end          = r->end();
4200   size_t capacity_bytes  = r->capacity();
4201   size_t used_bytes      = r->used();
4202   size_t prev_live_bytes = r->live_bytes();
4203   size_t next_live_bytes = r->next_live_bytes();
4204   double gc_eff          = r->gc_efficiency();
4205   size_t remset_bytes    = r->rem_set()->mem_size();
4206   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4207 
4208   if (r->is_starts_humongous()) {
4209     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4210            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4211            "they should have been zeroed after the last time we used them");
4212     // Set up the _hum_* fields.
4213     _hum_capacity_bytes  = capacity_bytes;
4214     _hum_used_bytes      = used_bytes;
4215     _hum_prev_live_bytes = prev_live_bytes;
4216     _hum_next_live_bytes = next_live_bytes;
4217     get_hum_bytes(&used_bytes, &capacity_bytes,
4218                   &prev_live_bytes, &next_live_bytes);
4219     end = bottom + HeapRegion::GrainWords;
4220   } else if (r->is_continues_humongous()) {
4221     get_hum_bytes(&used_bytes, &capacity_bytes,
4222                   &prev_live_bytes, &next_live_bytes);
4223     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4224   }
4225 
4226   _total_used_bytes      += used_bytes;
4227   _total_capacity_bytes  += capacity_bytes;
4228   _total_prev_live_bytes += prev_live_bytes;
4229   _total_next_live_bytes += next_live_bytes;
4230   _total_remset_bytes    += remset_bytes;
4231   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4232 
4233   // Print a line for this particular region.
4234   _out->print_cr(G1PPRL_LINE_PREFIX
4235                  G1PPRL_TYPE_FORMAT
4236                  G1PPRL_ADDR_BASE_FORMAT
4237                  G1PPRL_BYTE_FORMAT
4238                  G1PPRL_BYTE_FORMAT
4239                  G1PPRL_BYTE_FORMAT
4240                  G1PPRL_DOUBLE_FORMAT
4241                  G1PPRL_BYTE_FORMAT
4242                  G1PPRL_BYTE_FORMAT,
4243                  type, p2i(bottom), p2i(end),
4244                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4245                  remset_bytes, strong_code_roots_bytes);
4246 
4247   return false;
4248 }
4249 
4250 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4251   // add static memory usages to remembered set sizes
4252   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4253   // Print the footer of the output.
4254   _out->print_cr(G1PPRL_LINE_PREFIX);
4255   _out->print_cr(G1PPRL_LINE_PREFIX
4256                  " SUMMARY"
4257                  G1PPRL_SUM_MB_FORMAT("capacity")
4258                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4259                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4260                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4261                  G1PPRL_SUM_MB_FORMAT("remset")
4262                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4263                  bytes_to_mb(_total_capacity_bytes),
4264                  bytes_to_mb(_total_used_bytes),
4265                  perc(_total_used_bytes, _total_capacity_bytes),
4266                  bytes_to_mb(_total_prev_live_bytes),
4267                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4268                  bytes_to_mb(_total_next_live_bytes),
4269                  perc(_total_next_live_bytes, _total_capacity_bytes),
4270                  bytes_to_mb(_total_remset_bytes),
4271                  bytes_to_mb(_total_strong_code_roots_bytes));
4272   _out->cr();
4273 }