Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "code/codeCache.hpp"
  28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  33 #include "gc_implementation/g1/g1Log.hpp"
  34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  35 #include "gc_implementation/g1/g1RemSet.hpp"
  36 #include "gc_implementation/g1/heapRegion.inline.hpp"
  37 #include "gc_implementation/g1/heapRegionManager.inline.hpp"
  38 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  39 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  40 #include "gc_implementation/shared/vmGCOperations.hpp"
  41 #include "gc_implementation/shared/gcTimer.hpp"
  42 #include "gc_implementation/shared/gcTrace.hpp"
  43 #include "gc_implementation/shared/gcTraceTime.hpp"
  44 #include "memory/allocation.hpp"
  45 #include "memory/genOopClosures.inline.hpp"
  46 #include "memory/referencePolicy.hpp"
  47 #include "memory/resourceArea.hpp"
  48 #include "oops/oop.inline.hpp"
  49 #include "runtime/handles.inline.hpp"
  50 #include "runtime/java.hpp"
  51 #include "runtime/atomic.inline.hpp"
  52 #include "runtime/prefetch.inline.hpp"
  53 #include "services/memTracker.hpp"
  54 
  55 // Concurrent marking bit map wrapper
  56 
  57 CMBitMapRO::CMBitMapRO(int shifter) :
  58   _bm(),
  59   _shifter(shifter) {
  60   _bmStartWord = 0;
  61   _bmWordSize = 0;
  62 }
  63 
  64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  65                                                const HeapWord* limit) const {
  66   // First we must round addr *up* to a possible object boundary.
  67   addr = (HeapWord*)align_size_up((intptr_t)addr,
  68                                   HeapWordSize << _shifter);
  69   size_t addrOffset = heapWordToOffset(addr);
  70   if (limit == NULL) {
  71     limit = _bmStartWord + _bmWordSize;
  72   }
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  83                                                  const HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) {
  86     limit = _bmStartWord + _bmWordSize;
  87   }
  88   size_t limitOffset = heapWordToOffset(limit);
  89   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  90   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  91   assert(nextAddr >= addr, "get_next_one postcondition");
  92   assert(nextAddr == limit || !isMarked(nextAddr),
  93          "get_next_one postcondition");
  94   return nextAddr;
  95 }
  96 
  97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  98   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  99   return (int) (diff >> _shifter);
 100 }
 101 
 102 #ifndef PRODUCT
 103 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 104   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 105   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 106          "size inconsistency");
 107   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 108          _bmWordSize  == heap_rs.word_size();
 109 }
 110 #endif
 111 
 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 113   _bm.print_on_error(st, prefix);
 114 }
 115 
 116 size_t CMBitMap::compute_size(size_t heap_size) {
 117   return heap_size / mark_distance();
 118 }
 119 
 120 size_t CMBitMap::mark_distance() {
 121   return MinObjAlignmentInBytes * BitsPerByte;
 122 }
 123 
 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 125   _bmStartWord = heap.start();
 126   _bmWordSize = heap.word_size();
 127 
 128   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 129   _bm.set_size(_bmWordSize >> _shifter);
 130 
 131   storage->set_mapping_changed_listener(&_listener);
 132 }
 133 
 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) {
 135   // We need to clear the bitmap on commit, removing any existing information.
 136   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 137   _bm->clearRange(mr);
 138 }
 139 
 140 // Closure used for clearing the given mark bitmap.
 141 class ClearBitmapHRClosure : public HeapRegionClosure {
 142  private:
 143   ConcurrentMark* _cm;
 144   CMBitMap* _bitmap;
 145   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 146  public:
 147   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 148     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 149   }
 150 
 151   virtual bool doHeapRegion(HeapRegion* r) {
 152     size_t const chunk_size_in_words = M / HeapWordSize;
 153 
 154     HeapWord* cur = r->bottom();
 155     HeapWord* const end = r->end();
 156 
 157     while (cur < end) {
 158       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 159       _bitmap->clearRange(mr);
 160 
 161       cur += chunk_size_in_words;
 162 
 163       // Abort iteration if after yielding the marking has been aborted.
 164       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 165         return true;
 166       }
 167       // Repeat the asserts from before the start of the closure. We will do them
 168       // as asserts here to minimize their overhead on the product. However, we
 169       // will have them as guarantees at the beginning / end of the bitmap
 170       // clearing to get some checking in the product.
 171       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 172       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 173     }
 174 
 175     return false;
 176   }
 177 };
 178 
 179 void CMBitMap::clearAll() {
 180   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 181   G1CollectedHeap::heap()->heap_region_iterate(&cl);
 182   guarantee(cl.complete(), "Must have completed iteration.");
 183   return;
 184 }
 185 
 186 void CMBitMap::markRange(MemRegion mr) {
 187   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 188   assert(!mr.is_empty(), "unexpected empty region");
 189   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 190           ((HeapWord *) mr.end())),
 191          "markRange memory region end is not card aligned");
 192   // convert address range into offset range
 193   _bm.at_put_range(heapWordToOffset(mr.start()),
 194                    heapWordToOffset(mr.end()), true);
 195 }
 196 
 197 void CMBitMap::clearRange(MemRegion mr) {
 198   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 199   assert(!mr.is_empty(), "unexpected empty region");
 200   // convert address range into offset range
 201   _bm.at_put_range(heapWordToOffset(mr.start()),
 202                    heapWordToOffset(mr.end()), false);
 203 }
 204 
 205 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 206                                             HeapWord* end_addr) {
 207   HeapWord* start = getNextMarkedWordAddress(addr);
 208   start = MIN2(start, end_addr);
 209   HeapWord* end   = getNextUnmarkedWordAddress(start);
 210   end = MIN2(end, end_addr);
 211   assert(start <= end, "Consistency check");
 212   MemRegion mr(start, end);
 213   if (!mr.is_empty()) {
 214     clearRange(mr);
 215   }
 216   return mr;
 217 }
 218 
 219 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 220   _base(NULL), _cm(cm)
 221 #ifdef ASSERT
 222   , _drain_in_progress(false)
 223   , _drain_in_progress_yields(false)
 224 #endif
 225 {}
 226 
 227 bool CMMarkStack::allocate(size_t capacity) {
 228   // allocate a stack of the requisite depth
 229   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 230   if (!rs.is_reserved()) {
 231     warning("ConcurrentMark MarkStack allocation failure");
 232     return false;
 233   }
 234   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 235   if (!_virtual_space.initialize(rs, rs.size())) {
 236     warning("ConcurrentMark MarkStack backing store failure");
 237     // Release the virtual memory reserved for the marking stack
 238     rs.release();
 239     return false;
 240   }
 241   assert(_virtual_space.committed_size() == rs.size(),
 242          "Didn't reserve backing store for all of ConcurrentMark stack?");
 243   _base = (oop*) _virtual_space.low();
 244   setEmpty();
 245   _capacity = (jint) capacity;
 246   _saved_index = -1;
 247   _should_expand = false;
 248   NOT_PRODUCT(_max_depth = 0);
 249   return true;
 250 }
 251 
 252 void CMMarkStack::expand() {
 253   // Called, during remark, if we've overflown the marking stack during marking.
 254   assert(isEmpty(), "stack should been emptied while handling overflow");
 255   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 256   // Clear expansion flag
 257   _should_expand = false;
 258   if (_capacity == (jint) MarkStackSizeMax) {
 259     if (PrintGCDetails && Verbose) {
 260       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 261     }
 262     return;
 263   }
 264   // Double capacity if possible
 265   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 266   // Do not give up existing stack until we have managed to
 267   // get the double capacity that we desired.
 268   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 269                                                            sizeof(oop)));
 270   if (rs.is_reserved()) {
 271     // Release the backing store associated with old stack
 272     _virtual_space.release();
 273     // Reinitialize virtual space for new stack
 274     if (!_virtual_space.initialize(rs, rs.size())) {
 275       fatal("Not enough swap for expanded marking stack capacity");
 276     }
 277     _base = (oop*)(_virtual_space.low());
 278     _index = 0;
 279     _capacity = new_capacity;
 280   } else {
 281     if (PrintGCDetails && Verbose) {
 282       // Failed to double capacity, continue;
 283       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 284                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 285                           _capacity / K, new_capacity / K);
 286     }
 287   }
 288 }
 289 
 290 void CMMarkStack::set_should_expand() {
 291   // If we're resetting the marking state because of an
 292   // marking stack overflow, record that we should, if
 293   // possible, expand the stack.
 294   _should_expand = _cm->has_overflown();
 295 }
 296 
 297 CMMarkStack::~CMMarkStack() {
 298   if (_base != NULL) {
 299     _base = NULL;
 300     _virtual_space.release();
 301   }
 302 }
 303 
 304 void CMMarkStack::par_push(oop ptr) {
 305   while (true) {
 306     if (isFull()) {
 307       _overflow = true;
 308       return;
 309     }
 310     // Otherwise...
 311     jint index = _index;
 312     jint next_index = index+1;
 313     jint res = Atomic::cmpxchg(next_index, &_index, index);
 314     if (res == index) {
 315       _base[index] = ptr;
 316       // Note that we don't maintain this atomically.  We could, but it
 317       // doesn't seem necessary.
 318       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 319       return;
 320     }
 321     // Otherwise, we need to try again.
 322   }
 323 }
 324 
 325 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 326   while (true) {
 327     if (isFull()) {
 328       _overflow = true;
 329       return;
 330     }
 331     // Otherwise...
 332     jint index = _index;
 333     jint next_index = index + n;
 334     if (next_index > _capacity) {
 335       _overflow = true;
 336       return;
 337     }
 338     jint res = Atomic::cmpxchg(next_index, &_index, index);
 339     if (res == index) {
 340       for (int i = 0; i < n; i++) {
 341         int  ind = index + i;
 342         assert(ind < _capacity, "By overflow test above.");
 343         _base[ind] = ptr_arr[i];
 344       }
 345       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 346       return;
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 353   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 354   jint start = _index;
 355   jint next_index = start + n;
 356   if (next_index > _capacity) {
 357     _overflow = true;
 358     return;
 359   }
 360   // Otherwise.
 361   _index = next_index;
 362   for (int i = 0; i < n; i++) {
 363     int ind = start + i;
 364     assert(ind < _capacity, "By overflow test above.");
 365     _base[ind] = ptr_arr[i];
 366   }
 367   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 368 }
 369 
 370 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 371   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 372   jint index = _index;
 373   if (index == 0) {
 374     *n = 0;
 375     return false;
 376   } else {
 377     int k = MIN2(max, index);
 378     jint  new_ind = index - k;
 379     for (int j = 0; j < k; j++) {
 380       ptr_arr[j] = _base[new_ind + j];
 381     }
 382     _index = new_ind;
 383     *n = k;
 384     return true;
 385   }
 386 }
 387 
 388 template<class OopClosureClass>
 389 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 390   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 391          || SafepointSynchronize::is_at_safepoint(),
 392          "Drain recursion must be yield-safe.");
 393   bool res = true;
 394   debug_only(_drain_in_progress = true);
 395   debug_only(_drain_in_progress_yields = yield_after);
 396   while (!isEmpty()) {
 397     oop newOop = pop();
 398     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 399     assert(newOop->is_oop(), "Expected an oop");
 400     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 401            "only grey objects on this stack");
 402     newOop->oop_iterate(cl);
 403     if (yield_after && _cm->do_yield_check()) {
 404       res = false;
 405       break;
 406     }
 407   }
 408   debug_only(_drain_in_progress = false);
 409   return res;
 410 }
 411 
 412 void CMMarkStack::note_start_of_gc() {
 413   assert(_saved_index == -1,
 414          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 415   _saved_index = _index;
 416 }
 417 
 418 void CMMarkStack::note_end_of_gc() {
 419   // This is intentionally a guarantee, instead of an assert. If we
 420   // accidentally add something to the mark stack during GC, it
 421   // will be a correctness issue so it's better if we crash. we'll
 422   // only check this once per GC anyway, so it won't be a performance
 423   // issue in any way.
 424   guarantee(_saved_index == _index,
 425             err_msg("saved index: %d index: %d", _saved_index, _index));
 426   _saved_index = -1;
 427 }
 428 
 429 void CMMarkStack::oops_do(OopClosure* f) {
 430   assert(_saved_index == _index,
 431          err_msg("saved index: %d index: %d", _saved_index, _index));
 432   for (int i = 0; i < _index; i += 1) {
 433     f->do_oop(&_base[i]);
 434   }
 435 }
 436 
 437 bool ConcurrentMark::not_yet_marked(oop obj) const {
 438   return _g1h->is_obj_ill(obj);
 439 }
 440 
 441 CMRootRegions::CMRootRegions() :
 442   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 443   _should_abort(false),  _next_survivor(NULL) { }
 444 
 445 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 446   _young_list = g1h->young_list();
 447   _cm = cm;
 448 }
 449 
 450 void CMRootRegions::prepare_for_scan() {
 451   assert(!scan_in_progress(), "pre-condition");
 452 
 453   // Currently, only survivors can be root regions.
 454   assert(_next_survivor == NULL, "pre-condition");
 455   _next_survivor = _young_list->first_survivor_region();
 456   _scan_in_progress = (_next_survivor != NULL);
 457   _should_abort = false;
 458 }
 459 
 460 HeapRegion* CMRootRegions::claim_next() {
 461   if (_should_abort) {
 462     // If someone has set the should_abort flag, we return NULL to
 463     // force the caller to bail out of their loop.
 464     return NULL;
 465   }
 466 
 467   // Currently, only survivors can be root regions.
 468   HeapRegion* res = _next_survivor;
 469   if (res != NULL) {
 470     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 471     // Read it again in case it changed while we were waiting for the lock.
 472     res = _next_survivor;
 473     if (res != NULL) {
 474       if (res == _young_list->last_survivor_region()) {
 475         // We just claimed the last survivor so store NULL to indicate
 476         // that we're done.
 477         _next_survivor = NULL;
 478       } else {
 479         _next_survivor = res->get_next_young_region();
 480       }
 481     } else {
 482       // Someone else claimed the last survivor while we were trying
 483       // to take the lock so nothing else to do.
 484     }
 485   }
 486   assert(res == NULL || res->is_survivor(), "post-condition");
 487 
 488   return res;
 489 }
 490 
 491 void CMRootRegions::scan_finished() {
 492   assert(scan_in_progress(), "pre-condition");
 493 
 494   // Currently, only survivors can be root regions.
 495   if (!_should_abort) {
 496     assert(_next_survivor == NULL, "we should have claimed all survivors");
 497   }
 498   _next_survivor = NULL;
 499 
 500   {
 501     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 502     _scan_in_progress = false;
 503     RootRegionScan_lock->notify_all();
 504   }
 505 }
 506 
 507 bool CMRootRegions::wait_until_scan_finished() {
 508   if (!scan_in_progress()) return false;
 509 
 510   {
 511     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 512     while (scan_in_progress()) {
 513       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 514     }
 515   }
 516   return true;
 517 }
 518 
 519 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 520 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 521 #endif // _MSC_VER
 522 
 523 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 524   return MAX2((n_par_threads + 2) / 4, 1U);
 525 }
 526 
 527 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 528   _g1h(g1h),
 529   _markBitMap1(),
 530   _markBitMap2(),
 531   _parallel_marking_threads(0),
 532   _max_parallel_marking_threads(0),
 533   _sleep_factor(0.0),
 534   _marking_task_overhead(1.0),
 535   _cleanup_sleep_factor(0.0),
 536   _cleanup_task_overhead(1.0),
 537   _cleanup_list("Cleanup List"),
 538   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 539   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 540             CardTableModRefBS::card_shift,
 541             false /* in_resource_area*/),
 542 
 543   _prevMarkBitMap(&_markBitMap1),
 544   _nextMarkBitMap(&_markBitMap2),
 545 
 546   _markStack(this),
 547   // _finger set in set_non_marking_state
 548 
 549   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 550   // _active_tasks set in set_non_marking_state
 551   // _tasks set inside the constructor
 552   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 553   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 554 
 555   _has_overflown(false),
 556   _concurrent(false),
 557   _has_aborted(false),
 558   _aborted_gc_id(GCId::undefined()),
 559   _restart_for_overflow(false),
 560   _concurrent_marking_in_progress(false),
 561 
 562   // _verbose_level set below
 563 
 564   _init_times(),
 565   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 566   _cleanup_times(),
 567   _total_counting_time(0.0),
 568   _total_rs_scrub_time(0.0),
 569 
 570   _parallel_workers(NULL),
 571 
 572   _count_card_bitmaps(NULL),
 573   _count_marked_bytes(NULL),
 574   _completed_initialization(false) {
 575   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 576   if (verbose_level < no_verbose) {
 577     verbose_level = no_verbose;
 578   }
 579   if (verbose_level > high_verbose) {
 580     verbose_level = high_verbose;
 581   }
 582   _verbose_level = verbose_level;
 583 
 584   if (verbose_low()) {
 585     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 586                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 587   }
 588 
 589   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 590   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 591 
 592   // Create & start a ConcurrentMark thread.
 593   _cmThread = new ConcurrentMarkThread(this);
 594   assert(cmThread() != NULL, "CM Thread should have been created");
 595   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 596   if (_cmThread->osthread() == NULL) {
 597       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 598   }
 599 
 600   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 601   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 602   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 603 
 604   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 605   satb_qs.set_buffer_size(G1SATBBufferSize);
 606 
 607   _root_regions.init(_g1h, this);
 608 
 609   if (ConcGCThreads > ParallelGCThreads) {
 610     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 611             "than ParallelGCThreads (" UINTX_FORMAT ").",
 612             ConcGCThreads, ParallelGCThreads);
 613     return;
 614   }
 615   if (ParallelGCThreads == 0) {
 616     // if we are not running with any parallel GC threads we will not
 617     // spawn any marking threads either
 618     _parallel_marking_threads =       0;
 619     _max_parallel_marking_threads =   0;
 620     _sleep_factor             =     0.0;
 621     _marking_task_overhead    =     1.0;
 622   } else {
 623     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 624       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 625       // if both are set
 626       _sleep_factor             = 0.0;
 627       _marking_task_overhead    = 1.0;
 628     } else if (G1MarkingOverheadPercent > 0) {
 629       // We will calculate the number of parallel marking threads based
 630       // on a target overhead with respect to the soft real-time goal
 631       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 632       double overall_cm_overhead =
 633         (double) MaxGCPauseMillis * marking_overhead /
 634         (double) GCPauseIntervalMillis;
 635       double cpu_ratio = 1.0 / (double) os::processor_count();
 636       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 637       double marking_task_overhead =
 638         overall_cm_overhead / marking_thread_num *
 639                                                 (double) os::processor_count();
 640       double sleep_factor =
 641                          (1.0 - marking_task_overhead) / marking_task_overhead;
 642 
 643       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 644       _sleep_factor             = sleep_factor;
 645       _marking_task_overhead    = marking_task_overhead;
 646     } else {
 647       // Calculate the number of parallel marking threads by scaling
 648       // the number of parallel GC threads.
 649       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 650       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 651       _sleep_factor             = 0.0;
 652       _marking_task_overhead    = 1.0;
 653     }
 654 
 655     assert(ConcGCThreads > 0, "Should have been set");
 656     _parallel_marking_threads = (uint) ConcGCThreads;
 657     _max_parallel_marking_threads = _parallel_marking_threads;
 658 
 659     if (parallel_marking_threads() > 1) {
 660       _cleanup_task_overhead = 1.0;
 661     } else {
 662       _cleanup_task_overhead = marking_task_overhead();
 663     }
 664     _cleanup_sleep_factor =
 665                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 666 
 667 #if 0
 668     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 669     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 670     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 671     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 672     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 673 #endif
 674 
 675     guarantee(parallel_marking_threads() > 0, "peace of mind");
 676     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 677          _max_parallel_marking_threads, false, true);
 678     if (_parallel_workers == NULL) {
 679       vm_exit_during_initialization("Failed necessary allocation.");
 680     } else {
 681       _parallel_workers->initialize_workers();
 682     }
 683   }
 684 
 685   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 686     uintx mark_stack_size =
 687       MIN2(MarkStackSizeMax,
 688           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 689     // Verify that the calculated value for MarkStackSize is in range.
 690     // It would be nice to use the private utility routine from Arguments.
 691     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 692       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 693               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 694               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 695       return;
 696     }
 697     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 698   } else {
 699     // Verify MarkStackSize is in range.
 700     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 701       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 702         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 703           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 704                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 705                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 706           return;
 707         }
 708       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 709         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 710           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 711                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 712                   MarkStackSize, MarkStackSizeMax);
 713           return;
 714         }
 715       }
 716     }
 717   }
 718 
 719   if (!_markStack.allocate(MarkStackSize)) {
 720     warning("Failed to allocate CM marking stack");
 721     return;
 722   }
 723 
 724   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 725   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 726 
 727   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 728   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 729 
 730   BitMap::idx_t card_bm_size = _card_bm.size();
 731 
 732   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 733   _active_tasks = _max_worker_id;
 734 
 735   size_t max_regions = (size_t) _g1h->max_regions();
 736   for (uint i = 0; i < _max_worker_id; ++i) {
 737     CMTaskQueue* task_queue = new CMTaskQueue();
 738     task_queue->initialize();
 739     _task_queues->register_queue(i, task_queue);
 740 
 741     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 742     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 743 
 744     _tasks[i] = new CMTask(i, this,
 745                            _count_marked_bytes[i],
 746                            &_count_card_bitmaps[i],
 747                            task_queue, _task_queues);
 748 
 749     _accum_task_vtime[i] = 0.0;
 750   }
 751 
 752   // Calculate the card number for the bottom of the heap. Used
 753   // in biasing indexes into the accounting card bitmaps.
 754   _heap_bottom_card_num =
 755     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 756                                 CardTableModRefBS::card_shift);
 757 
 758   // Clear all the liveness counting data
 759   clear_all_count_data();
 760 
 761   // so that the call below can read a sensible value
 762   _heap_start = g1h->reserved_region().start();
 763   set_non_marking_state();
 764   _completed_initialization = true;
 765 }
 766 
 767 void ConcurrentMark::reset() {
 768   // Starting values for these two. This should be called in a STW
 769   // phase.
 770   MemRegion reserved = _g1h->g1_reserved();
 771   _heap_start = reserved.start();
 772   _heap_end   = reserved.end();
 773 
 774   // Separated the asserts so that we know which one fires.
 775   assert(_heap_start != NULL, "heap bounds should look ok");
 776   assert(_heap_end != NULL, "heap bounds should look ok");
 777   assert(_heap_start < _heap_end, "heap bounds should look ok");
 778 
 779   // Reset all the marking data structures and any necessary flags
 780   reset_marking_state();
 781 
 782   if (verbose_low()) {
 783     gclog_or_tty->print_cr("[global] resetting");
 784   }
 785 
 786   // We do reset all of them, since different phases will use
 787   // different number of active threads. So, it's easiest to have all
 788   // of them ready.
 789   for (uint i = 0; i < _max_worker_id; ++i) {
 790     _tasks[i]->reset(_nextMarkBitMap);
 791   }
 792 
 793   // we need this to make sure that the flag is on during the evac
 794   // pause with initial mark piggy-backed
 795   set_concurrent_marking_in_progress();
 796 }
 797 
 798 
 799 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 800   _markStack.set_should_expand();
 801   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 802   if (clear_overflow) {
 803     clear_has_overflown();
 804   } else {
 805     assert(has_overflown(), "pre-condition");
 806   }
 807   _finger = _heap_start;
 808 
 809   for (uint i = 0; i < _max_worker_id; ++i) {
 810     CMTaskQueue* queue = _task_queues->queue(i);
 811     queue->set_empty();
 812   }
 813 }
 814 
 815 void ConcurrentMark::set_concurrency(uint active_tasks) {
 816   assert(active_tasks <= _max_worker_id, "we should not have more");
 817 
 818   _active_tasks = active_tasks;
 819   // Need to update the three data structures below according to the
 820   // number of active threads for this phase.
 821   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 822   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 823   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 824 }
 825 
 826 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 827   set_concurrency(active_tasks);
 828 
 829   _concurrent = concurrent;
 830   // We propagate this to all tasks, not just the active ones.
 831   for (uint i = 0; i < _max_worker_id; ++i)
 832     _tasks[i]->set_concurrent(concurrent);
 833 
 834   if (concurrent) {
 835     set_concurrent_marking_in_progress();
 836   } else {
 837     // We currently assume that the concurrent flag has been set to
 838     // false before we start remark. At this point we should also be
 839     // in a STW phase.
 840     assert(!concurrent_marking_in_progress(), "invariant");
 841     assert(out_of_regions(),
 842            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 843                    p2i(_finger), p2i(_heap_end)));
 844   }
 845 }
 846 
 847 void ConcurrentMark::set_non_marking_state() {
 848   // We set the global marking state to some default values when we're
 849   // not doing marking.
 850   reset_marking_state();
 851   _active_tasks = 0;
 852   clear_concurrent_marking_in_progress();
 853 }
 854 
 855 ConcurrentMark::~ConcurrentMark() {
 856   // The ConcurrentMark instance is never freed.
 857   ShouldNotReachHere();
 858 }
 859 
 860 void ConcurrentMark::clearNextBitmap() {
 861   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 862 
 863   // Make sure that the concurrent mark thread looks to still be in
 864   // the current cycle.
 865   guarantee(cmThread()->during_cycle(), "invariant");
 866 
 867   // We are finishing up the current cycle by clearing the next
 868   // marking bitmap and getting it ready for the next cycle. During
 869   // this time no other cycle can start. So, let's make sure that this
 870   // is the case.
 871   guarantee(!g1h->mark_in_progress(), "invariant");
 872 
 873   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 874   g1h->heap_region_iterate(&cl);
 875 
 876   // Clear the liveness counting data. If the marking has been aborted, the abort()
 877   // call already did that.
 878   if (cl.complete()) {
 879     clear_all_count_data();
 880   }
 881 
 882   // Repeat the asserts from above.
 883   guarantee(cmThread()->during_cycle(), "invariant");
 884   guarantee(!g1h->mark_in_progress(), "invariant");
 885 }
 886 
 887 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 888   CMBitMap* _bitmap;
 889   bool _error;
 890  public:
 891   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 892   }
 893 
 894   virtual bool doHeapRegion(HeapRegion* r) {
 895     return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end();
 896   }
 897 };
 898 
 899 bool ConcurrentMark::nextMarkBitmapIsClear() {
 900   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 901   _g1h->heap_region_iterate(&cl);
 902   return cl.complete();
 903 }
 904 
 905 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 906 public:
 907   bool doHeapRegion(HeapRegion* r) {
 908     if (!r->continuesHumongous()) {
 909       r->note_start_of_marking();
 910     }
 911     return false;
 912   }
 913 };
 914 
 915 void ConcurrentMark::checkpointRootsInitialPre() {
 916   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 917   G1CollectorPolicy* g1p = g1h->g1_policy();
 918 
 919   _has_aborted = false;
 920 
 921 #ifndef PRODUCT
 922   if (G1PrintReachableAtInitialMark) {
 923     print_reachable("at-cycle-start",
 924                     VerifyOption_G1UsePrevMarking, true /* all */);
 925   }
 926 #endif
 927 
 928   // Initialize marking structures. This has to be done in a STW phase.
 929   reset();
 930 
 931   // For each region note start of marking.
 932   NoteStartOfMarkHRClosure startcl;
 933   g1h->heap_region_iterate(&startcl);
 934 }
 935 
 936 
 937 void ConcurrentMark::checkpointRootsInitialPost() {
 938   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 939 
 940   // If we force an overflow during remark, the remark operation will
 941   // actually abort and we'll restart concurrent marking. If we always
 942   // force an overflow during remark we'll never actually complete the
 943   // marking phase. So, we initialize this here, at the start of the
 944   // cycle, so that at the remaining overflow number will decrease at
 945   // every remark and we'll eventually not need to cause one.
 946   force_overflow_stw()->init();
 947 
 948   // Start Concurrent Marking weak-reference discovery.
 949   ReferenceProcessor* rp = g1h->ref_processor_cm();
 950   // enable ("weak") refs discovery
 951   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 952   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 953 
 954   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 955   // This is the start of  the marking cycle, we're expected all
 956   // threads to have SATB queues with active set to false.
 957   satb_mq_set.set_active_all_threads(true, /* new active value */
 958                                      false /* expected_active */);
 959 
 960   _root_regions.prepare_for_scan();
 961 
 962   // update_g1_committed() will be called at the end of an evac pause
 963   // when marking is on. So, it's also called at the end of the
 964   // initial-mark pause to update the heap end, if the heap expands
 965   // during it. No need to call it here.
 966 }
 967 
 968 /*
 969  * Notice that in the next two methods, we actually leave the STS
 970  * during the barrier sync and join it immediately afterwards. If we
 971  * do not do this, the following deadlock can occur: one thread could
 972  * be in the barrier sync code, waiting for the other thread to also
 973  * sync up, whereas another one could be trying to yield, while also
 974  * waiting for the other threads to sync up too.
 975  *
 976  * Note, however, that this code is also used during remark and in
 977  * this case we should not attempt to leave / enter the STS, otherwise
 978  * we'll either hit an assert (debug / fastdebug) or deadlock
 979  * (product). So we should only leave / enter the STS if we are
 980  * operating concurrently.
 981  *
 982  * Because the thread that does the sync barrier has left the STS, it
 983  * is possible to be suspended for a Full GC or an evacuation pause
 984  * could occur. This is actually safe, since the entering the sync
 985  * barrier is one of the last things do_marking_step() does, and it
 986  * doesn't manipulate any data structures afterwards.
 987  */
 988 
 989 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 990   if (verbose_low()) {
 991     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
 992   }
 993 
 994   if (concurrent()) {
 995     SuspendibleThreadSet::leave();
 996   }
 997 
 998   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
 999 
1000   if (concurrent()) {
1001     SuspendibleThreadSet::join();
1002   }
1003   // at this point everyone should have synced up and not be doing any
1004   // more work
1005 
1006   if (verbose_low()) {
1007     if (barrier_aborted) {
1008       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1009     } else {
1010       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1011     }
1012   }
1013 
1014   if (barrier_aborted) {
1015     // If the barrier aborted we ignore the overflow condition and
1016     // just abort the whole marking phase as quickly as possible.
1017     return;
1018   }
1019 
1020   // If we're executing the concurrent phase of marking, reset the marking
1021   // state; otherwise the marking state is reset after reference processing,
1022   // during the remark pause.
1023   // If we reset here as a result of an overflow during the remark we will
1024   // see assertion failures from any subsequent set_concurrency_and_phase()
1025   // calls.
1026   if (concurrent()) {
1027     // let the task associated with with worker 0 do this
1028     if (worker_id == 0) {
1029       // task 0 is responsible for clearing the global data structures
1030       // We should be here because of an overflow. During STW we should
1031       // not clear the overflow flag since we rely on it being true when
1032       // we exit this method to abort the pause and restart concurrent
1033       // marking.
1034       reset_marking_state(true /* clear_overflow */);
1035       force_overflow()->update();
1036 
1037       if (G1Log::fine()) {
1038         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1039         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1040       }
1041     }
1042   }
1043 
1044   // after this, each task should reset its own data structures then
1045   // then go into the second barrier
1046 }
1047 
1048 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1049   if (verbose_low()) {
1050     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1051   }
1052 
1053   if (concurrent()) {
1054     SuspendibleThreadSet::leave();
1055   }
1056 
1057   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1058 
1059   if (concurrent()) {
1060     SuspendibleThreadSet::join();
1061   }
1062   // at this point everything should be re-initialized and ready to go
1063 
1064   if (verbose_low()) {
1065     if (barrier_aborted) {
1066       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1067     } else {
1068       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1069     }
1070   }
1071 }
1072 
1073 #ifndef PRODUCT
1074 void ForceOverflowSettings::init() {
1075   _num_remaining = G1ConcMarkForceOverflow;
1076   _force = false;
1077   update();
1078 }
1079 
1080 void ForceOverflowSettings::update() {
1081   if (_num_remaining > 0) {
1082     _num_remaining -= 1;
1083     _force = true;
1084   } else {
1085     _force = false;
1086   }
1087 }
1088 
1089 bool ForceOverflowSettings::should_force() {
1090   if (_force) {
1091     _force = false;
1092     return true;
1093   } else {
1094     return false;
1095   }
1096 }
1097 #endif // !PRODUCT
1098 
1099 class CMConcurrentMarkingTask: public AbstractGangTask {
1100 private:
1101   ConcurrentMark*       _cm;
1102   ConcurrentMarkThread* _cmt;
1103 
1104 public:
1105   void work(uint worker_id) {
1106     assert(Thread::current()->is_ConcurrentGC_thread(),
1107            "this should only be done by a conc GC thread");
1108     ResourceMark rm;
1109 
1110     double start_vtime = os::elapsedVTime();
1111 
1112     SuspendibleThreadSet::join();
1113 
1114     assert(worker_id < _cm->active_tasks(), "invariant");
1115     CMTask* the_task = _cm->task(worker_id);
1116     the_task->record_start_time();
1117     if (!_cm->has_aborted()) {
1118       do {
1119         double start_vtime_sec = os::elapsedVTime();
1120         double start_time_sec = os::elapsedTime();
1121         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1122 
1123         the_task->do_marking_step(mark_step_duration_ms,
1124                                   true  /* do_termination */,
1125                                   false /* is_serial*/);
1126 
1127         double end_time_sec = os::elapsedTime();
1128         double end_vtime_sec = os::elapsedVTime();
1129         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1130         double elapsed_time_sec = end_time_sec - start_time_sec;
1131         _cm->clear_has_overflown();
1132 
1133         bool ret = _cm->do_yield_check(worker_id);
1134 
1135         jlong sleep_time_ms;
1136         if (!_cm->has_aborted() && the_task->has_aborted()) {
1137           sleep_time_ms =
1138             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1139           SuspendibleThreadSet::leave();
1140           os::sleep(Thread::current(), sleep_time_ms, false);
1141           SuspendibleThreadSet::join();
1142         }
1143         double end_time2_sec = os::elapsedTime();
1144         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1145 
1146 #if 0
1147           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1148                                  "overhead %1.4lf",
1149                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1150                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1151           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1152                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1153 #endif
1154       } while (!_cm->has_aborted() && the_task->has_aborted());
1155     }
1156     the_task->record_end_time();
1157     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1158 
1159     SuspendibleThreadSet::leave();
1160 
1161     double end_vtime = os::elapsedVTime();
1162     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1163   }
1164 
1165   CMConcurrentMarkingTask(ConcurrentMark* cm,
1166                           ConcurrentMarkThread* cmt) :
1167       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1168 
1169   ~CMConcurrentMarkingTask() { }
1170 };
1171 
1172 // Calculates the number of active workers for a concurrent
1173 // phase.
1174 uint ConcurrentMark::calc_parallel_marking_threads() {
1175   if (G1CollectedHeap::use_parallel_gc_threads()) {
1176     uint n_conc_workers = 0;
1177     if (!UseDynamicNumberOfGCThreads ||
1178         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1179          !ForceDynamicNumberOfGCThreads)) {
1180       n_conc_workers = max_parallel_marking_threads();
1181     } else {
1182       n_conc_workers =
1183         AdaptiveSizePolicy::calc_default_active_workers(
1184                                      max_parallel_marking_threads(),
1185                                      1, /* Minimum workers */
1186                                      parallel_marking_threads(),
1187                                      Threads::number_of_non_daemon_threads());
1188       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1189       // that scaling has already gone into "_max_parallel_marking_threads".
1190     }
1191     assert(n_conc_workers > 0, "Always need at least 1");
1192     return n_conc_workers;
1193   }
1194   // If we are not running with any parallel GC threads we will not
1195   // have spawned any marking threads either. Hence the number of
1196   // concurrent workers should be 0.
1197   return 0;
1198 }
1199 
1200 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1201   // Currently, only survivors can be root regions.
1202   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1203   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1204 
1205   const uintx interval = PrefetchScanIntervalInBytes;
1206   HeapWord* curr = hr->bottom();
1207   const HeapWord* end = hr->top();
1208   while (curr < end) {
1209     Prefetch::read(curr, interval);
1210     oop obj = oop(curr);
1211     int size = obj->oop_iterate(&cl);
1212     assert(size == obj->size(), "sanity");
1213     curr += size;
1214   }
1215 }
1216 
1217 class CMRootRegionScanTask : public AbstractGangTask {
1218 private:
1219   ConcurrentMark* _cm;
1220 
1221 public:
1222   CMRootRegionScanTask(ConcurrentMark* cm) :
1223     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1224 
1225   void work(uint worker_id) {
1226     assert(Thread::current()->is_ConcurrentGC_thread(),
1227            "this should only be done by a conc GC thread");
1228 
1229     CMRootRegions* root_regions = _cm->root_regions();
1230     HeapRegion* hr = root_regions->claim_next();
1231     while (hr != NULL) {
1232       _cm->scanRootRegion(hr, worker_id);
1233       hr = root_regions->claim_next();
1234     }
1235   }
1236 };
1237 
1238 void ConcurrentMark::scanRootRegions() {
1239   // Start of concurrent marking.
1240   ClassLoaderDataGraph::clear_claimed_marks();
1241 
1242   // scan_in_progress() will have been set to true only if there was
1243   // at least one root region to scan. So, if it's false, we
1244   // should not attempt to do any further work.
1245   if (root_regions()->scan_in_progress()) {
1246     _parallel_marking_threads = calc_parallel_marking_threads();
1247     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1248            "Maximum number of marking threads exceeded");
1249     uint active_workers = MAX2(1U, parallel_marking_threads());
1250 
1251     CMRootRegionScanTask task(this);
1252     if (use_parallel_marking_threads()) {
1253       _parallel_workers->set_active_workers((int) active_workers);
1254       _parallel_workers->run_task(&task);
1255     } else {
1256       task.work(0);
1257     }
1258 
1259     // It's possible that has_aborted() is true here without actually
1260     // aborting the survivor scan earlier. This is OK as it's
1261     // mainly used for sanity checking.
1262     root_regions()->scan_finished();
1263   }
1264 }
1265 
1266 void ConcurrentMark::markFromRoots() {
1267   // we might be tempted to assert that:
1268   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1269   //        "inconsistent argument?");
1270   // However that wouldn't be right, because it's possible that
1271   // a safepoint is indeed in progress as a younger generation
1272   // stop-the-world GC happens even as we mark in this generation.
1273 
1274   _restart_for_overflow = false;
1275   force_overflow_conc()->init();
1276 
1277   // _g1h has _n_par_threads
1278   _parallel_marking_threads = calc_parallel_marking_threads();
1279   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1280     "Maximum number of marking threads exceeded");
1281 
1282   uint active_workers = MAX2(1U, parallel_marking_threads());
1283 
1284   // Parallel task terminator is set in "set_concurrency_and_phase()"
1285   set_concurrency_and_phase(active_workers, true /* concurrent */);
1286 
1287   CMConcurrentMarkingTask markingTask(this, cmThread());
1288   if (use_parallel_marking_threads()) {
1289     _parallel_workers->set_active_workers((int)active_workers);
1290     // Don't set _n_par_threads because it affects MT in process_roots()
1291     // and the decisions on that MT processing is made elsewhere.
1292     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1293     _parallel_workers->run_task(&markingTask);
1294   } else {
1295     markingTask.work(0);
1296   }
1297   print_stats();
1298 }
1299 
1300 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1301   // world is stopped at this checkpoint
1302   assert(SafepointSynchronize::is_at_safepoint(),
1303          "world should be stopped");
1304 
1305   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1306 
1307   // If a full collection has happened, we shouldn't do this.
1308   if (has_aborted()) {
1309     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1310     return;
1311   }
1312 
1313   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1314 
1315   if (VerifyDuringGC) {
1316     HandleMark hm;  // handle scope
1317     Universe::heap()->prepare_for_verify();
1318     Universe::verify(VerifyOption_G1UsePrevMarking,
1319                      " VerifyDuringGC:(before)");
1320   }
1321   g1h->check_bitmaps("Remark Start");
1322 
1323   G1CollectorPolicy* g1p = g1h->g1_policy();
1324   g1p->record_concurrent_mark_remark_start();
1325 
1326   double start = os::elapsedTime();
1327 
1328   checkpointRootsFinalWork();
1329 
1330   double mark_work_end = os::elapsedTime();
1331 
1332   weakRefsWork(clear_all_soft_refs);
1333 
1334   if (has_overflown()) {
1335     // Oops.  We overflowed.  Restart concurrent marking.
1336     _restart_for_overflow = true;
1337     if (G1TraceMarkStackOverflow) {
1338       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1339     }
1340 
1341     // Verify the heap w.r.t. the previous marking bitmap.
1342     if (VerifyDuringGC) {
1343       HandleMark hm;  // handle scope
1344       Universe::heap()->prepare_for_verify();
1345       Universe::verify(VerifyOption_G1UsePrevMarking,
1346                        " VerifyDuringGC:(overflow)");
1347     }
1348 
1349     // Clear the marking state because we will be restarting
1350     // marking due to overflowing the global mark stack.
1351     reset_marking_state();
1352   } else {
1353     // Aggregate the per-task counting data that we have accumulated
1354     // while marking.
1355     aggregate_count_data();
1356 
1357     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1358     // We're done with marking.
1359     // This is the end of  the marking cycle, we're expected all
1360     // threads to have SATB queues with active set to true.
1361     satb_mq_set.set_active_all_threads(false, /* new active value */
1362                                        true /* expected_active */);
1363 
1364     if (VerifyDuringGC) {
1365       HandleMark hm;  // handle scope
1366       Universe::heap()->prepare_for_verify();
1367       Universe::verify(VerifyOption_G1UseNextMarking,
1368                        " VerifyDuringGC:(after)");
1369     }
1370     g1h->check_bitmaps("Remark End");
1371     assert(!restart_for_overflow(), "sanity");
1372     // Completely reset the marking state since marking completed
1373     set_non_marking_state();
1374   }
1375 
1376   // Expand the marking stack, if we have to and if we can.
1377   if (_markStack.should_expand()) {
1378     _markStack.expand();
1379   }
1380 
1381   // Statistics
1382   double now = os::elapsedTime();
1383   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1384   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1385   _remark_times.add((now - start) * 1000.0);
1386 
1387   g1p->record_concurrent_mark_remark_end();
1388 
1389   G1CMIsAliveClosure is_alive(g1h);
1390   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1391 }
1392 
1393 // Base class of the closures that finalize and verify the
1394 // liveness counting data.
1395 class CMCountDataClosureBase: public HeapRegionClosure {
1396 protected:
1397   G1CollectedHeap* _g1h;
1398   ConcurrentMark* _cm;
1399   CardTableModRefBS* _ct_bs;
1400 
1401   BitMap* _region_bm;
1402   BitMap* _card_bm;
1403 
1404   // Takes a region that's not empty (i.e., it has at least one
1405   // live object in it and sets its corresponding bit on the region
1406   // bitmap to 1. If the region is "starts humongous" it will also set
1407   // to 1 the bits on the region bitmap that correspond to its
1408   // associated "continues humongous" regions.
1409   void set_bit_for_region(HeapRegion* hr) {
1410     assert(!hr->continuesHumongous(), "should have filtered those out");
1411 
1412     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1413     if (!hr->startsHumongous()) {
1414       // Normal (non-humongous) case: just set the bit.
1415       _region_bm->par_at_put(index, true);
1416     } else {
1417       // Starts humongous case: calculate how many regions are part of
1418       // this humongous region and then set the bit range.
1419       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1420       _region_bm->par_at_put_range(index, end_index, true);
1421     }
1422   }
1423 
1424 public:
1425   CMCountDataClosureBase(G1CollectedHeap* g1h,
1426                          BitMap* region_bm, BitMap* card_bm):
1427     _g1h(g1h), _cm(g1h->concurrent_mark()),
1428     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1429     _region_bm(region_bm), _card_bm(card_bm) { }
1430 };
1431 
1432 // Closure that calculates the # live objects per region. Used
1433 // for verification purposes during the cleanup pause.
1434 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1435   CMBitMapRO* _bm;
1436   size_t _region_marked_bytes;
1437 
1438 public:
1439   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1440                          BitMap* region_bm, BitMap* card_bm) :
1441     CMCountDataClosureBase(g1h, region_bm, card_bm),
1442     _bm(bm), _region_marked_bytes(0) { }
1443 
1444   bool doHeapRegion(HeapRegion* hr) {
1445 
1446     if (hr->continuesHumongous()) {
1447       // We will ignore these here and process them when their
1448       // associated "starts humongous" region is processed (see
1449       // set_bit_for_heap_region()). Note that we cannot rely on their
1450       // associated "starts humongous" region to have their bit set to
1451       // 1 since, due to the region chunking in the parallel region
1452       // iteration, a "continues humongous" region might be visited
1453       // before its associated "starts humongous".
1454       return false;
1455     }
1456 
1457     HeapWord* ntams = hr->next_top_at_mark_start();
1458     HeapWord* start = hr->bottom();
1459 
1460     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1461            err_msg("Preconditions not met - "
1462                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1463                    p2i(start), p2i(ntams), p2i(hr->end())));
1464 
1465     // Find the first marked object at or after "start".
1466     start = _bm->getNextMarkedWordAddress(start, ntams);
1467 
1468     size_t marked_bytes = 0;
1469 
1470     while (start < ntams) {
1471       oop obj = oop(start);
1472       int obj_sz = obj->size();
1473       HeapWord* obj_end = start + obj_sz;
1474 
1475       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1476       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1477 
1478       // Note: if we're looking at the last region in heap - obj_end
1479       // could be actually just beyond the end of the heap; end_idx
1480       // will then correspond to a (non-existent) card that is also
1481       // just beyond the heap.
1482       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1483         // end of object is not card aligned - increment to cover
1484         // all the cards spanned by the object
1485         end_idx += 1;
1486       }
1487 
1488       // Set the bits in the card BM for the cards spanned by this object.
1489       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1490 
1491       // Add the size of this object to the number of marked bytes.
1492       marked_bytes += (size_t)obj_sz * HeapWordSize;
1493 
1494       // Find the next marked object after this one.
1495       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1496     }
1497 
1498     // Mark the allocated-since-marking portion...
1499     HeapWord* top = hr->top();
1500     if (ntams < top) {
1501       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1502       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1503 
1504       // Note: if we're looking at the last region in heap - top
1505       // could be actually just beyond the end of the heap; end_idx
1506       // will then correspond to a (non-existent) card that is also
1507       // just beyond the heap.
1508       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1509         // end of object is not card aligned - increment to cover
1510         // all the cards spanned by the object
1511         end_idx += 1;
1512       }
1513       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1514 
1515       // This definitely means the region has live objects.
1516       set_bit_for_region(hr);
1517     }
1518 
1519     // Update the live region bitmap.
1520     if (marked_bytes > 0) {
1521       set_bit_for_region(hr);
1522     }
1523 
1524     // Set the marked bytes for the current region so that
1525     // it can be queried by a calling verification routine
1526     _region_marked_bytes = marked_bytes;
1527 
1528     return false;
1529   }
1530 
1531   size_t region_marked_bytes() const { return _region_marked_bytes; }
1532 };
1533 
1534 // Heap region closure used for verifying the counting data
1535 // that was accumulated concurrently and aggregated during
1536 // the remark pause. This closure is applied to the heap
1537 // regions during the STW cleanup pause.
1538 
1539 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1540   G1CollectedHeap* _g1h;
1541   ConcurrentMark* _cm;
1542   CalcLiveObjectsClosure _calc_cl;
1543   BitMap* _region_bm;   // Region BM to be verified
1544   BitMap* _card_bm;     // Card BM to be verified
1545   bool _verbose;        // verbose output?
1546 
1547   BitMap* _exp_region_bm; // Expected Region BM values
1548   BitMap* _exp_card_bm;   // Expected card BM values
1549 
1550   int _failures;
1551 
1552 public:
1553   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1554                                 BitMap* region_bm,
1555                                 BitMap* card_bm,
1556                                 BitMap* exp_region_bm,
1557                                 BitMap* exp_card_bm,
1558                                 bool verbose) :
1559     _g1h(g1h), _cm(g1h->concurrent_mark()),
1560     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1561     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1562     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1563     _failures(0) { }
1564 
1565   int failures() const { return _failures; }
1566 
1567   bool doHeapRegion(HeapRegion* hr) {
1568     if (hr->continuesHumongous()) {
1569       // We will ignore these here and process them when their
1570       // associated "starts humongous" region is processed (see
1571       // set_bit_for_heap_region()). Note that we cannot rely on their
1572       // associated "starts humongous" region to have their bit set to
1573       // 1 since, due to the region chunking in the parallel region
1574       // iteration, a "continues humongous" region might be visited
1575       // before its associated "starts humongous".
1576       return false;
1577     }
1578 
1579     int failures = 0;
1580 
1581     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1582     // this region and set the corresponding bits in the expected region
1583     // and card bitmaps.
1584     bool res = _calc_cl.doHeapRegion(hr);
1585     assert(res == false, "should be continuing");
1586 
1587     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1588                     Mutex::_no_safepoint_check_flag);
1589 
1590     // Verify the marked bytes for this region.
1591     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1592     size_t act_marked_bytes = hr->next_marked_bytes();
1593 
1594     // We're not OK if expected marked bytes > actual marked bytes. It means
1595     // we have missed accounting some objects during the actual marking.
1596     if (exp_marked_bytes > act_marked_bytes) {
1597       if (_verbose) {
1598         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1599                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1600                                hr->hrm_index(), exp_marked_bytes, act_marked_bytes);
1601       }
1602       failures += 1;
1603     }
1604 
1605     // Verify the bit, for this region, in the actual and expected
1606     // (which was just calculated) region bit maps.
1607     // We're not OK if the bit in the calculated expected region
1608     // bitmap is set and the bit in the actual region bitmap is not.
1609     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1610 
1611     bool expected = _exp_region_bm->at(index);
1612     bool actual = _region_bm->at(index);
1613     if (expected && !actual) {
1614       if (_verbose) {
1615         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1616                                "expected: %s, actual: %s",
1617                                hr->hrm_index(),
1618                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1619       }
1620       failures += 1;
1621     }
1622 
1623     // Verify that the card bit maps for the cards spanned by the current
1624     // region match. We have an error if we have a set bit in the expected
1625     // bit map and the corresponding bit in the actual bitmap is not set.
1626 
1627     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1628     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1629 
1630     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1631       expected = _exp_card_bm->at(i);
1632       actual = _card_bm->at(i);
1633 
1634       if (expected && !actual) {
1635         if (_verbose) {
1636           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1637                                  "expected: %s, actual: %s",
1638                                  hr->hrm_index(), i,
1639                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1640         }
1641         failures += 1;
1642       }
1643     }
1644 
1645     if (failures > 0 && _verbose)  {
1646       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1647                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1648                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1649                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1650     }
1651 
1652     _failures += failures;
1653 
1654     // We could stop iteration over the heap when we
1655     // find the first violating region by returning true.
1656     return false;
1657   }
1658 };
1659 
1660 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1661 protected:
1662   G1CollectedHeap* _g1h;
1663   ConcurrentMark* _cm;
1664   BitMap* _actual_region_bm;
1665   BitMap* _actual_card_bm;
1666 
1667   uint    _n_workers;
1668 
1669   BitMap* _expected_region_bm;
1670   BitMap* _expected_card_bm;
1671 
1672   int  _failures;
1673   bool _verbose;
1674 
1675 public:
1676   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1677                             BitMap* region_bm, BitMap* card_bm,
1678                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1679     : AbstractGangTask("G1 verify final counting"),
1680       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1681       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1682       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1683       _failures(0), _verbose(false),
1684       _n_workers(0) {
1685     assert(VerifyDuringGC, "don't call this otherwise");
1686 
1687     // Use the value already set as the number of active threads
1688     // in the call to run_task().
1689     if (G1CollectedHeap::use_parallel_gc_threads()) {
1690       assert( _g1h->workers()->active_workers() > 0,
1691         "Should have been previously set");
1692       _n_workers = _g1h->workers()->active_workers();
1693     } else {
1694       _n_workers = 1;
1695     }
1696 
1697     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1698     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1699 
1700     _verbose = _cm->verbose_medium();
1701   }
1702 
1703   void work(uint worker_id) {
1704     assert(worker_id < _n_workers, "invariant");
1705 
1706     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1707                                             _actual_region_bm, _actual_card_bm,
1708                                             _expected_region_bm,
1709                                             _expected_card_bm,
1710                                             _verbose);
1711 
1712     if (G1CollectedHeap::use_parallel_gc_threads()) {
1713       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1714                                             worker_id,
1715                                             _n_workers,
1716                                             HeapRegion::VerifyCountClaimValue);
1717     } else {
1718       _g1h->heap_region_iterate(&verify_cl);
1719     }
1720 
1721     Atomic::add(verify_cl.failures(), &_failures);
1722   }
1723 
1724   int failures() const { return _failures; }
1725 };
1726 
1727 // Closure that finalizes the liveness counting data.
1728 // Used during the cleanup pause.
1729 // Sets the bits corresponding to the interval [NTAMS, top]
1730 // (which contains the implicitly live objects) in the
1731 // card liveness bitmap. Also sets the bit for each region,
1732 // containing live data, in the region liveness bitmap.
1733 
1734 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1735  public:
1736   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1737                               BitMap* region_bm,
1738                               BitMap* card_bm) :
1739     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1740 
1741   bool doHeapRegion(HeapRegion* hr) {
1742 
1743     if (hr->continuesHumongous()) {
1744       // We will ignore these here and process them when their
1745       // associated "starts humongous" region is processed (see
1746       // set_bit_for_heap_region()). Note that we cannot rely on their
1747       // associated "starts humongous" region to have their bit set to
1748       // 1 since, due to the region chunking in the parallel region
1749       // iteration, a "continues humongous" region might be visited
1750       // before its associated "starts humongous".
1751       return false;
1752     }
1753 
1754     HeapWord* ntams = hr->next_top_at_mark_start();
1755     HeapWord* top   = hr->top();
1756 
1757     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1758 
1759     // Mark the allocated-since-marking portion...
1760     if (ntams < top) {
1761       // This definitely means the region has live objects.
1762       set_bit_for_region(hr);
1763 
1764       // Now set the bits in the card bitmap for [ntams, top)
1765       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1766       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1767 
1768       // Note: if we're looking at the last region in heap - top
1769       // could be actually just beyond the end of the heap; end_idx
1770       // will then correspond to a (non-existent) card that is also
1771       // just beyond the heap.
1772       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1773         // end of object is not card aligned - increment to cover
1774         // all the cards spanned by the object
1775         end_idx += 1;
1776       }
1777 
1778       assert(end_idx <= _card_bm->size(),
1779              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1780                      end_idx, _card_bm->size()));
1781       assert(start_idx < _card_bm->size(),
1782              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1783                      start_idx, _card_bm->size()));
1784 
1785       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1786     }
1787 
1788     // Set the bit for the region if it contains live data
1789     if (hr->next_marked_bytes() > 0) {
1790       set_bit_for_region(hr);
1791     }
1792 
1793     return false;
1794   }
1795 };
1796 
1797 class G1ParFinalCountTask: public AbstractGangTask {
1798 protected:
1799   G1CollectedHeap* _g1h;
1800   ConcurrentMark* _cm;
1801   BitMap* _actual_region_bm;
1802   BitMap* _actual_card_bm;
1803 
1804   uint    _n_workers;
1805 
1806 public:
1807   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1808     : AbstractGangTask("G1 final counting"),
1809       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1810       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1811       _n_workers(0) {
1812     // Use the value already set as the number of active threads
1813     // in the call to run_task().
1814     if (G1CollectedHeap::use_parallel_gc_threads()) {
1815       assert( _g1h->workers()->active_workers() > 0,
1816         "Should have been previously set");
1817       _n_workers = _g1h->workers()->active_workers();
1818     } else {
1819       _n_workers = 1;
1820     }
1821   }
1822 
1823   void work(uint worker_id) {
1824     assert(worker_id < _n_workers, "invariant");
1825 
1826     FinalCountDataUpdateClosure final_update_cl(_g1h,
1827                                                 _actual_region_bm,
1828                                                 _actual_card_bm);
1829 
1830     if (G1CollectedHeap::use_parallel_gc_threads()) {
1831       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1832                                             worker_id,
1833                                             _n_workers,
1834                                             HeapRegion::FinalCountClaimValue);
1835     } else {
1836       _g1h->heap_region_iterate(&final_update_cl);
1837     }
1838   }
1839 };
1840 
1841 class G1ParNoteEndTask;
1842 
1843 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1844   G1CollectedHeap* _g1;
1845   size_t _max_live_bytes;
1846   uint _regions_claimed;
1847   size_t _freed_bytes;
1848   FreeRegionList* _local_cleanup_list;
1849   HeapRegionSetCount _old_regions_removed;
1850   HeapRegionSetCount _humongous_regions_removed;
1851   HRRSCleanupTask* _hrrs_cleanup_task;
1852   double _claimed_region_time;
1853   double _max_region_time;
1854 
1855 public:
1856   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1857                              FreeRegionList* local_cleanup_list,
1858                              HRRSCleanupTask* hrrs_cleanup_task) :
1859     _g1(g1),
1860     _max_live_bytes(0), _regions_claimed(0),
1861     _freed_bytes(0),
1862     _claimed_region_time(0.0), _max_region_time(0.0),
1863     _local_cleanup_list(local_cleanup_list),
1864     _old_regions_removed(),
1865     _humongous_regions_removed(),
1866     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1867 
1868   size_t freed_bytes() { return _freed_bytes; }
1869   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1870   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1871 
1872   bool doHeapRegion(HeapRegion *hr) {
1873     if (hr->continuesHumongous()) {
1874       return false;
1875     }
1876     // We use a claim value of zero here because all regions
1877     // were claimed with value 1 in the FinalCount task.
1878     _g1->reset_gc_time_stamps(hr);
1879     double start = os::elapsedTime();
1880     _regions_claimed++;
1881     hr->note_end_of_marking();
1882     _max_live_bytes += hr->max_live_bytes();
1883 
1884     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1885       _freed_bytes += hr->used();
1886       hr->set_containing_set(NULL);
1887       if (hr->isHumongous()) {
1888         assert(hr->startsHumongous(), "we should only see starts humongous");
1889         _humongous_regions_removed.increment(1u, hr->capacity());
1890         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1891       } else {
1892         _old_regions_removed.increment(1u, hr->capacity());
1893         _g1->free_region(hr, _local_cleanup_list, true);
1894       }
1895     } else {
1896       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1897     }
1898 
1899     double region_time = (os::elapsedTime() - start);
1900     _claimed_region_time += region_time;
1901     if (region_time > _max_region_time) {
1902       _max_region_time = region_time;
1903     }
1904     return false;
1905   }
1906 
1907   size_t max_live_bytes() { return _max_live_bytes; }
1908   uint regions_claimed() { return _regions_claimed; }
1909   double claimed_region_time_sec() { return _claimed_region_time; }
1910   double max_region_time_sec() { return _max_region_time; }
1911 };
1912 
1913 class G1ParNoteEndTask: public AbstractGangTask {
1914   friend class G1NoteEndOfConcMarkClosure;
1915 
1916 protected:
1917   G1CollectedHeap* _g1h;
1918   size_t _max_live_bytes;
1919   size_t _freed_bytes;
1920   FreeRegionList* _cleanup_list;
1921 
1922 public:
1923   G1ParNoteEndTask(G1CollectedHeap* g1h,
1924                    FreeRegionList* cleanup_list) :
1925     AbstractGangTask("G1 note end"), _g1h(g1h),
1926     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1927 
1928   void work(uint worker_id) {
1929     double start = os::elapsedTime();
1930     FreeRegionList local_cleanup_list("Local Cleanup List");
1931     HRRSCleanupTask hrrs_cleanup_task;
1932     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1933                                            &hrrs_cleanup_task);
1934     if (G1CollectedHeap::use_parallel_gc_threads()) {
1935       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1936                                             _g1h->workers()->active_workers(),
1937                                             HeapRegion::NoteEndClaimValue);
1938     } else {
1939       _g1h->heap_region_iterate(&g1_note_end);
1940     }
1941     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1942 
1943     // Now update the lists
1944     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1945     {
1946       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1947       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1948       _max_live_bytes += g1_note_end.max_live_bytes();
1949       _freed_bytes += g1_note_end.freed_bytes();
1950 
1951       // If we iterate over the global cleanup list at the end of
1952       // cleanup to do this printing we will not guarantee to only
1953       // generate output for the newly-reclaimed regions (the list
1954       // might not be empty at the beginning of cleanup; we might
1955       // still be working on its previous contents). So we do the
1956       // printing here, before we append the new regions to the global
1957       // cleanup list.
1958 
1959       G1HRPrinter* hr_printer = _g1h->hr_printer();
1960       if (hr_printer->is_active()) {
1961         FreeRegionListIterator iter(&local_cleanup_list);
1962         while (iter.more_available()) {
1963           HeapRegion* hr = iter.get_next();
1964           hr_printer->cleanup(hr);
1965         }
1966       }
1967 
1968       _cleanup_list->add_ordered(&local_cleanup_list);
1969       assert(local_cleanup_list.is_empty(), "post-condition");
1970 
1971       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1972     }
1973   }
1974   size_t max_live_bytes() { return _max_live_bytes; }
1975   size_t freed_bytes() { return _freed_bytes; }
1976 };
1977 
1978 class G1ParScrubRemSetTask: public AbstractGangTask {
1979 protected:
1980   G1RemSet* _g1rs;
1981   BitMap* _region_bm;
1982   BitMap* _card_bm;
1983 public:
1984   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1985                        BitMap* region_bm, BitMap* card_bm) :
1986     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1987     _region_bm(region_bm), _card_bm(card_bm) { }
1988 
1989   void work(uint worker_id) {
1990     if (G1CollectedHeap::use_parallel_gc_threads()) {
1991       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1992                        HeapRegion::ScrubRemSetClaimValue);
1993     } else {
1994       _g1rs->scrub(_region_bm, _card_bm);
1995     }
1996   }
1997 
1998 };
1999 
2000 void ConcurrentMark::cleanup() {
2001   // world is stopped at this checkpoint
2002   assert(SafepointSynchronize::is_at_safepoint(),
2003          "world should be stopped");
2004   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2005 
2006   // If a full collection has happened, we shouldn't do this.
2007   if (has_aborted()) {
2008     g1h->set_marking_complete(); // So bitmap clearing isn't confused
2009     return;
2010   }
2011 
2012   g1h->verify_region_sets_optional();
2013 
2014   if (VerifyDuringGC) {
2015     HandleMark hm;  // handle scope
2016     Universe::heap()->prepare_for_verify();
2017     Universe::verify(VerifyOption_G1UsePrevMarking,
2018                      " VerifyDuringGC:(before)");
2019   }
2020   g1h->check_bitmaps("Cleanup Start");
2021 
2022   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2023   g1p->record_concurrent_mark_cleanup_start();
2024 
2025   double start = os::elapsedTime();
2026 
2027   HeapRegionRemSet::reset_for_cleanup_tasks();
2028 
2029   uint n_workers;
2030 
2031   // Do counting once more with the world stopped for good measure.
2032   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2033 
2034   if (G1CollectedHeap::use_parallel_gc_threads()) {
2035    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2036            "sanity check");
2037 
2038     g1h->set_par_threads();
2039     n_workers = g1h->n_par_threads();
2040     assert(g1h->n_par_threads() == n_workers,
2041            "Should not have been reset");
2042     g1h->workers()->run_task(&g1_par_count_task);
2043     // Done with the parallel phase so reset to 0.
2044     g1h->set_par_threads(0);
2045 
2046     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2047            "sanity check");
2048   } else {
2049     n_workers = 1;
2050     g1_par_count_task.work(0);
2051   }
2052 
2053   if (VerifyDuringGC) {
2054     // Verify that the counting data accumulated during marking matches
2055     // that calculated by walking the marking bitmap.
2056 
2057     // Bitmaps to hold expected values
2058     BitMap expected_region_bm(_region_bm.size(), true);
2059     BitMap expected_card_bm(_card_bm.size(), true);
2060 
2061     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2062                                                  &_region_bm,
2063                                                  &_card_bm,
2064                                                  &expected_region_bm,
2065                                                  &expected_card_bm);
2066 
2067     if (G1CollectedHeap::use_parallel_gc_threads()) {
2068       g1h->set_par_threads((int)n_workers);
2069       g1h->workers()->run_task(&g1_par_verify_task);
2070       // Done with the parallel phase so reset to 0.
2071       g1h->set_par_threads(0);
2072 
2073       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2074              "sanity check");
2075     } else {
2076       g1_par_verify_task.work(0);
2077     }
2078 
2079     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2080   }
2081 
2082   size_t start_used_bytes = g1h->used();
2083   g1h->set_marking_complete();
2084 
2085   double count_end = os::elapsedTime();
2086   double this_final_counting_time = (count_end - start);
2087   _total_counting_time += this_final_counting_time;
2088 
2089   if (G1PrintRegionLivenessInfo) {
2090     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2091     _g1h->heap_region_iterate(&cl);
2092   }
2093 
2094   // Install newly created mark bitMap as "prev".
2095   swapMarkBitMaps();
2096 
2097   g1h->reset_gc_time_stamp();
2098 
2099   // Note end of marking in all heap regions.
2100   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2101   if (G1CollectedHeap::use_parallel_gc_threads()) {
2102     g1h->set_par_threads((int)n_workers);
2103     g1h->workers()->run_task(&g1_par_note_end_task);
2104     g1h->set_par_threads(0);
2105 
2106     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2107            "sanity check");
2108   } else {
2109     g1_par_note_end_task.work(0);
2110   }
2111   g1h->check_gc_time_stamps();
2112 
2113   if (!cleanup_list_is_empty()) {
2114     // The cleanup list is not empty, so we'll have to process it
2115     // concurrently. Notify anyone else that might be wanting free
2116     // regions that there will be more free regions coming soon.
2117     g1h->set_free_regions_coming();
2118   }
2119 
2120   // call below, since it affects the metric by which we sort the heap
2121   // regions.
2122   if (G1ScrubRemSets) {
2123     double rs_scrub_start = os::elapsedTime();
2124     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2125     if (G1CollectedHeap::use_parallel_gc_threads()) {
2126       g1h->set_par_threads((int)n_workers);
2127       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2128       g1h->set_par_threads(0);
2129 
2130       assert(g1h->check_heap_region_claim_values(
2131                                             HeapRegion::ScrubRemSetClaimValue),
2132              "sanity check");
2133     } else {
2134       g1_par_scrub_rs_task.work(0);
2135     }
2136 
2137     double rs_scrub_end = os::elapsedTime();
2138     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2139     _total_rs_scrub_time += this_rs_scrub_time;
2140   }
2141 
2142   // this will also free any regions totally full of garbage objects,
2143   // and sort the regions.
2144   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2145 
2146   // Statistics.
2147   double end = os::elapsedTime();
2148   _cleanup_times.add((end - start) * 1000.0);
2149 
2150   if (G1Log::fine()) {
2151     g1h->print_size_transition(gclog_or_tty,
2152                                start_used_bytes,
2153                                g1h->used(),
2154                                g1h->capacity());
2155   }
2156 
2157   // Clean up will have freed any regions completely full of garbage.
2158   // Update the soft reference policy with the new heap occupancy.
2159   Universe::update_heap_info_at_gc();
2160 
2161   if (VerifyDuringGC) {
2162     HandleMark hm;  // handle scope
2163     Universe::heap()->prepare_for_verify();
2164     Universe::verify(VerifyOption_G1UsePrevMarking,
2165                      " VerifyDuringGC:(after)");
2166   }
2167 
2168   g1h->check_bitmaps("Cleanup End");
2169 
2170   g1h->verify_region_sets_optional();
2171 
2172   // We need to make this be a "collection" so any collection pause that
2173   // races with it goes around and waits for completeCleanup to finish.
2174   g1h->increment_total_collections();
2175 
2176   // Clean out dead classes and update Metaspace sizes.
2177   if (ClassUnloadingWithConcurrentMark) {
2178     ClassLoaderDataGraph::purge();
2179   }
2180   MetaspaceGC::compute_new_size();
2181 
2182   // We reclaimed old regions so we should calculate the sizes to make
2183   // sure we update the old gen/space data.
2184   g1h->g1mm()->update_sizes();
2185 
2186   g1h->trace_heap_after_concurrent_cycle();
2187 }
2188 
2189 void ConcurrentMark::completeCleanup() {
2190   if (has_aborted()) return;
2191 
2192   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2193 
2194   _cleanup_list.verify_optional();
2195   FreeRegionList tmp_free_list("Tmp Free List");
2196 
2197   if (G1ConcRegionFreeingVerbose) {
2198     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2199                            "cleanup list has %u entries",
2200                            _cleanup_list.length());
2201   }
2202 
2203   // No one else should be accessing the _cleanup_list at this point,
2204   // so it is not necessary to take any locks
2205   while (!_cleanup_list.is_empty()) {
2206     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2207     assert(hr != NULL, "Got NULL from a non-empty list");
2208     hr->par_clear();
2209     tmp_free_list.add_ordered(hr);
2210 
2211     // Instead of adding one region at a time to the secondary_free_list,
2212     // we accumulate them in the local list and move them a few at a
2213     // time. This also cuts down on the number of notify_all() calls
2214     // we do during this process. We'll also append the local list when
2215     // _cleanup_list is empty (which means we just removed the last
2216     // region from the _cleanup_list).
2217     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2218         _cleanup_list.is_empty()) {
2219       if (G1ConcRegionFreeingVerbose) {
2220         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2221                                "appending %u entries to the secondary_free_list, "
2222                                "cleanup list still has %u entries",
2223                                tmp_free_list.length(),
2224                                _cleanup_list.length());
2225       }
2226 
2227       {
2228         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2229         g1h->secondary_free_list_add(&tmp_free_list);
2230         SecondaryFreeList_lock->notify_all();
2231       }
2232 
2233       if (G1StressConcRegionFreeing) {
2234         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2235           os::sleep(Thread::current(), (jlong) 1, false);
2236         }
2237       }
2238     }
2239   }
2240   assert(tmp_free_list.is_empty(), "post-condition");
2241 }
2242 
2243 // Supporting Object and Oop closures for reference discovery
2244 // and processing in during marking
2245 
2246 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2247   HeapWord* addr = (HeapWord*)obj;
2248   return addr != NULL &&
2249          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2250 }
2251 
2252 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2253 // Uses the CMTask associated with a worker thread (for serial reference
2254 // processing the CMTask for worker 0 is used) to preserve (mark) and
2255 // trace referent objects.
2256 //
2257 // Using the CMTask and embedded local queues avoids having the worker
2258 // threads operating on the global mark stack. This reduces the risk
2259 // of overflowing the stack - which we would rather avoid at this late
2260 // state. Also using the tasks' local queues removes the potential
2261 // of the workers interfering with each other that could occur if
2262 // operating on the global stack.
2263 
2264 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2265   ConcurrentMark* _cm;
2266   CMTask*         _task;
2267   int             _ref_counter_limit;
2268   int             _ref_counter;
2269   bool            _is_serial;
2270  public:
2271   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2272     _cm(cm), _task(task), _is_serial(is_serial),
2273     _ref_counter_limit(G1RefProcDrainInterval) {
2274     assert(_ref_counter_limit > 0, "sanity");
2275     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2276     _ref_counter = _ref_counter_limit;
2277   }
2278 
2279   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2280   virtual void do_oop(      oop* p) { do_oop_work(p); }
2281 
2282   template <class T> void do_oop_work(T* p) {
2283     if (!_cm->has_overflown()) {
2284       oop obj = oopDesc::load_decode_heap_oop(p);
2285       if (_cm->verbose_high()) {
2286         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2287                                "*"PTR_FORMAT" = "PTR_FORMAT,
2288                                _task->worker_id(), p2i(p), p2i((void*) obj));
2289       }
2290 
2291       _task->deal_with_reference(obj);
2292       _ref_counter--;
2293 
2294       if (_ref_counter == 0) {
2295         // We have dealt with _ref_counter_limit references, pushing them
2296         // and objects reachable from them on to the local stack (and
2297         // possibly the global stack). Call CMTask::do_marking_step() to
2298         // process these entries.
2299         //
2300         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2301         // there's nothing more to do (i.e. we're done with the entries that
2302         // were pushed as a result of the CMTask::deal_with_reference() calls
2303         // above) or we overflow.
2304         //
2305         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2306         // flag while there may still be some work to do. (See the comment at
2307         // the beginning of CMTask::do_marking_step() for those conditions -
2308         // one of which is reaching the specified time target.) It is only
2309         // when CMTask::do_marking_step() returns without setting the
2310         // has_aborted() flag that the marking step has completed.
2311         do {
2312           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2313           _task->do_marking_step(mark_step_duration_ms,
2314                                  false      /* do_termination */,
2315                                  _is_serial);
2316         } while (_task->has_aborted() && !_cm->has_overflown());
2317         _ref_counter = _ref_counter_limit;
2318       }
2319     } else {
2320       if (_cm->verbose_high()) {
2321          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2322       }
2323     }
2324   }
2325 };
2326 
2327 // 'Drain' oop closure used by both serial and parallel reference processing.
2328 // Uses the CMTask associated with a given worker thread (for serial
2329 // reference processing the CMtask for worker 0 is used). Calls the
2330 // do_marking_step routine, with an unbelievably large timeout value,
2331 // to drain the marking data structures of the remaining entries
2332 // added by the 'keep alive' oop closure above.
2333 
2334 class G1CMDrainMarkingStackClosure: public VoidClosure {
2335   ConcurrentMark* _cm;
2336   CMTask*         _task;
2337   bool            _is_serial;
2338  public:
2339   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2340     _cm(cm), _task(task), _is_serial(is_serial) {
2341     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2342   }
2343 
2344   void do_void() {
2345     do {
2346       if (_cm->verbose_high()) {
2347         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2348                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2349       }
2350 
2351       // We call CMTask::do_marking_step() to completely drain the local
2352       // and global marking stacks of entries pushed by the 'keep alive'
2353       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2354       //
2355       // CMTask::do_marking_step() is called in a loop, which we'll exit
2356       // if there's nothing more to do (i.e. we've completely drained the
2357       // entries that were pushed as a a result of applying the 'keep alive'
2358       // closure to the entries on the discovered ref lists) or we overflow
2359       // the global marking stack.
2360       //
2361       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2362       // flag while there may still be some work to do. (See the comment at
2363       // the beginning of CMTask::do_marking_step() for those conditions -
2364       // one of which is reaching the specified time target.) It is only
2365       // when CMTask::do_marking_step() returns without setting the
2366       // has_aborted() flag that the marking step has completed.
2367 
2368       _task->do_marking_step(1000000000.0 /* something very large */,
2369                              true         /* do_termination */,
2370                              _is_serial);
2371     } while (_task->has_aborted() && !_cm->has_overflown());
2372   }
2373 };
2374 
2375 // Implementation of AbstractRefProcTaskExecutor for parallel
2376 // reference processing at the end of G1 concurrent marking
2377 
2378 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2379 private:
2380   G1CollectedHeap* _g1h;
2381   ConcurrentMark*  _cm;
2382   WorkGang*        _workers;
2383   int              _active_workers;
2384 
2385 public:
2386   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2387                         ConcurrentMark* cm,
2388                         WorkGang* workers,
2389                         int n_workers) :
2390     _g1h(g1h), _cm(cm),
2391     _workers(workers), _active_workers(n_workers) { }
2392 
2393   // Executes the given task using concurrent marking worker threads.
2394   virtual void execute(ProcessTask& task);
2395   virtual void execute(EnqueueTask& task);
2396 };
2397 
2398 class G1CMRefProcTaskProxy: public AbstractGangTask {
2399   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2400   ProcessTask&     _proc_task;
2401   G1CollectedHeap* _g1h;
2402   ConcurrentMark*  _cm;
2403 
2404 public:
2405   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2406                      G1CollectedHeap* g1h,
2407                      ConcurrentMark* cm) :
2408     AbstractGangTask("Process reference objects in parallel"),
2409     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2410     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2411     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2412   }
2413 
2414   virtual void work(uint worker_id) {
2415     ResourceMark rm;
2416     HandleMark hm;
2417     CMTask* task = _cm->task(worker_id);
2418     G1CMIsAliveClosure g1_is_alive(_g1h);
2419     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2420     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2421 
2422     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2423   }
2424 };
2425 
2426 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2427   assert(_workers != NULL, "Need parallel worker threads.");
2428   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2429 
2430   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2431 
2432   // We need to reset the concurrency level before each
2433   // proxy task execution, so that the termination protocol
2434   // and overflow handling in CMTask::do_marking_step() knows
2435   // how many workers to wait for.
2436   _cm->set_concurrency(_active_workers);
2437   _g1h->set_par_threads(_active_workers);
2438   _workers->run_task(&proc_task_proxy);
2439   _g1h->set_par_threads(0);
2440 }
2441 
2442 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2443   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2444   EnqueueTask& _enq_task;
2445 
2446 public:
2447   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2448     AbstractGangTask("Enqueue reference objects in parallel"),
2449     _enq_task(enq_task) { }
2450 
2451   virtual void work(uint worker_id) {
2452     _enq_task.work(worker_id);
2453   }
2454 };
2455 
2456 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2457   assert(_workers != NULL, "Need parallel worker threads.");
2458   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2459 
2460   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2461 
2462   // Not strictly necessary but...
2463   //
2464   // We need to reset the concurrency level before each
2465   // proxy task execution, so that the termination protocol
2466   // and overflow handling in CMTask::do_marking_step() knows
2467   // how many workers to wait for.
2468   _cm->set_concurrency(_active_workers);
2469   _g1h->set_par_threads(_active_workers);
2470   _workers->run_task(&enq_task_proxy);
2471   _g1h->set_par_threads(0);
2472 }
2473 
2474 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2475   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2476 }
2477 
2478 // Helper class to get rid of some boilerplate code.
2479 class G1RemarkGCTraceTime : public GCTraceTime {
2480   static bool doit_and_prepend(bool doit) {
2481     if (doit) {
2482       gclog_or_tty->put(' ');
2483     }
2484     return doit;
2485   }
2486 
2487  public:
2488   G1RemarkGCTraceTime(const char* title, bool doit)
2489     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2490         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2491   }
2492 };
2493 
2494 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2495   if (has_overflown()) {
2496     // Skip processing the discovered references if we have
2497     // overflown the global marking stack. Reference objects
2498     // only get discovered once so it is OK to not
2499     // de-populate the discovered reference lists. We could have,
2500     // but the only benefit would be that, when marking restarts,
2501     // less reference objects are discovered.
2502     return;
2503   }
2504 
2505   ResourceMark rm;
2506   HandleMark   hm;
2507 
2508   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2509 
2510   // Is alive closure.
2511   G1CMIsAliveClosure g1_is_alive(g1h);
2512 
2513   // Inner scope to exclude the cleaning of the string and symbol
2514   // tables from the displayed time.
2515   {
2516     if (G1Log::finer()) {
2517       gclog_or_tty->put(' ');
2518     }
2519     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2520 
2521     ReferenceProcessor* rp = g1h->ref_processor_cm();
2522 
2523     // See the comment in G1CollectedHeap::ref_processing_init()
2524     // about how reference processing currently works in G1.
2525 
2526     // Set the soft reference policy
2527     rp->setup_policy(clear_all_soft_refs);
2528     assert(_markStack.isEmpty(), "mark stack should be empty");
2529 
2530     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2531     // in serial reference processing. Note these closures are also
2532     // used for serially processing (by the the current thread) the
2533     // JNI references during parallel reference processing.
2534     //
2535     // These closures do not need to synchronize with the worker
2536     // threads involved in parallel reference processing as these
2537     // instances are executed serially by the current thread (e.g.
2538     // reference processing is not multi-threaded and is thus
2539     // performed by the current thread instead of a gang worker).
2540     //
2541     // The gang tasks involved in parallel reference processing create
2542     // their own instances of these closures, which do their own
2543     // synchronization among themselves.
2544     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2545     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2546 
2547     // We need at least one active thread. If reference processing
2548     // is not multi-threaded we use the current (VMThread) thread,
2549     // otherwise we use the work gang from the G1CollectedHeap and
2550     // we utilize all the worker threads we can.
2551     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2552     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2553     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2554 
2555     // Parallel processing task executor.
2556     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2557                                               g1h->workers(), active_workers);
2558     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2559 
2560     // Set the concurrency level. The phase was already set prior to
2561     // executing the remark task.
2562     set_concurrency(active_workers);
2563 
2564     // Set the degree of MT processing here.  If the discovery was done MT,
2565     // the number of threads involved during discovery could differ from
2566     // the number of active workers.  This is OK as long as the discovered
2567     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2568     rp->set_active_mt_degree(active_workers);
2569 
2570     // Process the weak references.
2571     const ReferenceProcessorStats& stats =
2572         rp->process_discovered_references(&g1_is_alive,
2573                                           &g1_keep_alive,
2574                                           &g1_drain_mark_stack,
2575                                           executor,
2576                                           g1h->gc_timer_cm(),
2577                                           concurrent_gc_id());
2578     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2579 
2580     // The do_oop work routines of the keep_alive and drain_marking_stack
2581     // oop closures will set the has_overflown flag if we overflow the
2582     // global marking stack.
2583 
2584     assert(_markStack.overflow() || _markStack.isEmpty(),
2585             "mark stack should be empty (unless it overflowed)");
2586 
2587     if (_markStack.overflow()) {
2588       // This should have been done already when we tried to push an
2589       // entry on to the global mark stack. But let's do it again.
2590       set_has_overflown();
2591     }
2592 
2593     assert(rp->num_q() == active_workers, "why not");
2594 
2595     rp->enqueue_discovered_references(executor);
2596 
2597     rp->verify_no_references_recorded();
2598     assert(!rp->discovery_enabled(), "Post condition");
2599   }
2600 
2601   if (has_overflown()) {
2602     // We can not trust g1_is_alive if the marking stack overflowed
2603     return;
2604   }
2605 
2606   assert(_markStack.isEmpty(), "Marking should have completed");
2607 
2608   // Unload Klasses, String, Symbols, Code Cache, etc.
2609   {
2610     G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2611 
2612     if (ClassUnloadingWithConcurrentMark) {
2613       bool purged_classes;
2614 
2615       {
2616         G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2617         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2618       }
2619 
2620       {
2621         G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2622         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2623       }
2624     }
2625 
2626     if (G1StringDedup::is_enabled()) {
2627       G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2628       G1StringDedup::unlink(&g1_is_alive);
2629     }
2630   }
2631 }
2632 
2633 void ConcurrentMark::swapMarkBitMaps() {
2634   CMBitMapRO* temp = _prevMarkBitMap;
2635   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2636   _nextMarkBitMap  = (CMBitMap*)  temp;
2637 }
2638 
2639 class CMObjectClosure;
2640 
2641 // Closure for iterating over objects, currently only used for
2642 // processing SATB buffers.
2643 class CMObjectClosure : public ObjectClosure {
2644 private:
2645   CMTask* _task;
2646 
2647 public:
2648   void do_object(oop obj) {
2649     _task->deal_with_reference(obj);
2650   }
2651 
2652   CMObjectClosure(CMTask* task) : _task(task) { }
2653 };
2654 
2655 class G1RemarkThreadsClosure : public ThreadClosure {
2656   CMObjectClosure _cm_obj;
2657   G1CMOopClosure _cm_cl;
2658   MarkingCodeBlobClosure _code_cl;
2659   int _thread_parity;
2660   bool _is_par;
2661 
2662  public:
2663   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2664     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2665     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2666 
2667   void do_thread(Thread* thread) {
2668     if (thread->is_Java_thread()) {
2669       if (thread->claim_oops_do(_is_par, _thread_parity)) {
2670         JavaThread* jt = (JavaThread*)thread;
2671 
2672         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2673         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2674         // * Alive if on the stack of an executing method
2675         // * Weakly reachable otherwise
2676         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2677         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2678         jt->nmethods_do(&_code_cl);
2679 
2680         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2681       }
2682     } else if (thread->is_VM_thread()) {
2683       if (thread->claim_oops_do(_is_par, _thread_parity)) {
2684         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2685       }
2686     }
2687   }
2688 };
2689 
2690 class CMRemarkTask: public AbstractGangTask {
2691 private:
2692   ConcurrentMark* _cm;
2693   bool            _is_serial;
2694 public:
2695   void work(uint worker_id) {
2696     // Since all available tasks are actually started, we should
2697     // only proceed if we're supposed to be active.
2698     if (worker_id < _cm->active_tasks()) {
2699       CMTask* task = _cm->task(worker_id);
2700       task->record_start_time();
2701       {
2702         ResourceMark rm;
2703         HandleMark hm;
2704 
2705         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2706         Threads::threads_do(&threads_f);
2707       }
2708 
2709       do {
2710         task->do_marking_step(1000000000.0 /* something very large */,
2711                               true         /* do_termination       */,
2712                               _is_serial);
2713       } while (task->has_aborted() && !_cm->has_overflown());
2714       // If we overflow, then we do not want to restart. We instead
2715       // want to abort remark and do concurrent marking again.
2716       task->record_end_time();
2717     }
2718   }
2719 
2720   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2721     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2722     _cm->terminator()->reset_for_reuse(active_workers);
2723   }
2724 };
2725 
2726 void ConcurrentMark::checkpointRootsFinalWork() {
2727   ResourceMark rm;
2728   HandleMark   hm;
2729   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2730 
2731   G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2732 
2733   g1h->ensure_parsability(false);
2734 
2735   if (G1CollectedHeap::use_parallel_gc_threads()) {
2736     G1CollectedHeap::StrongRootsScope srs(g1h);
2737     // this is remark, so we'll use up all active threads
2738     uint active_workers = g1h->workers()->active_workers();
2739     if (active_workers == 0) {
2740       assert(active_workers > 0, "Should have been set earlier");
2741       active_workers = (uint) ParallelGCThreads;
2742       g1h->workers()->set_active_workers(active_workers);
2743     }
2744     set_concurrency_and_phase(active_workers, false /* concurrent */);
2745     // Leave _parallel_marking_threads at it's
2746     // value originally calculated in the ConcurrentMark
2747     // constructor and pass values of the active workers
2748     // through the gang in the task.
2749 
2750     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2751     // We will start all available threads, even if we decide that the
2752     // active_workers will be fewer. The extra ones will just bail out
2753     // immediately.
2754     g1h->set_par_threads(active_workers);
2755     g1h->workers()->run_task(&remarkTask);
2756     g1h->set_par_threads(0);
2757   } else {
2758     G1CollectedHeap::StrongRootsScope srs(g1h);
2759     uint active_workers = 1;
2760     set_concurrency_and_phase(active_workers, false /* concurrent */);
2761 
2762     // Note - if there's no work gang then the VMThread will be
2763     // the thread to execute the remark - serially. We have
2764     // to pass true for the is_serial parameter so that
2765     // CMTask::do_marking_step() doesn't enter the sync
2766     // barriers in the event of an overflow. Doing so will
2767     // cause an assert that the current thread is not a
2768     // concurrent GC thread.
2769     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2770     remarkTask.work(0);
2771   }
2772   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2773   guarantee(has_overflown() ||
2774             satb_mq_set.completed_buffers_num() == 0,
2775             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2776                     BOOL_TO_STR(has_overflown()),
2777                     satb_mq_set.completed_buffers_num()));
2778 
2779   print_stats();
2780 }
2781 
2782 #ifndef PRODUCT
2783 
2784 class PrintReachableOopClosure: public OopClosure {
2785 private:
2786   G1CollectedHeap* _g1h;
2787   outputStream*    _out;
2788   VerifyOption     _vo;
2789   bool             _all;
2790 
2791 public:
2792   PrintReachableOopClosure(outputStream* out,
2793                            VerifyOption  vo,
2794                            bool          all) :
2795     _g1h(G1CollectedHeap::heap()),
2796     _out(out), _vo(vo), _all(all) { }
2797 
2798   void do_oop(narrowOop* p) { do_oop_work(p); }
2799   void do_oop(      oop* p) { do_oop_work(p); }
2800 
2801   template <class T> void do_oop_work(T* p) {
2802     oop         obj = oopDesc::load_decode_heap_oop(p);
2803     const char* str = NULL;
2804     const char* str2 = "";
2805 
2806     if (obj == NULL) {
2807       str = "";
2808     } else if (!_g1h->is_in_g1_reserved(obj)) {
2809       str = " O";
2810     } else {
2811       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2812       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2813       bool marked = _g1h->is_marked(obj, _vo);
2814 
2815       if (over_tams) {
2816         str = " >";
2817         if (marked) {
2818           str2 = " AND MARKED";
2819         }
2820       } else if (marked) {
2821         str = " M";
2822       } else {
2823         str = " NOT";
2824       }
2825     }
2826 
2827     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2828                    p2i(p), p2i((void*) obj), str, str2);
2829   }
2830 };
2831 
2832 class PrintReachableObjectClosure : public ObjectClosure {
2833 private:
2834   G1CollectedHeap* _g1h;
2835   outputStream*    _out;
2836   VerifyOption     _vo;
2837   bool             _all;
2838   HeapRegion*      _hr;
2839 
2840 public:
2841   PrintReachableObjectClosure(outputStream* out,
2842                               VerifyOption  vo,
2843                               bool          all,
2844                               HeapRegion*   hr) :
2845     _g1h(G1CollectedHeap::heap()),
2846     _out(out), _vo(vo), _all(all), _hr(hr) { }
2847 
2848   void do_object(oop o) {
2849     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2850     bool marked = _g1h->is_marked(o, _vo);
2851     bool print_it = _all || over_tams || marked;
2852 
2853     if (print_it) {
2854       _out->print_cr(" "PTR_FORMAT"%s",
2855                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2856       PrintReachableOopClosure oopCl(_out, _vo, _all);
2857       o->oop_iterate_no_header(&oopCl);
2858     }
2859   }
2860 };
2861 
2862 class PrintReachableRegionClosure : public HeapRegionClosure {
2863 private:
2864   G1CollectedHeap* _g1h;
2865   outputStream*    _out;
2866   VerifyOption     _vo;
2867   bool             _all;
2868 
2869 public:
2870   bool doHeapRegion(HeapRegion* hr) {
2871     HeapWord* b = hr->bottom();
2872     HeapWord* e = hr->end();
2873     HeapWord* t = hr->top();
2874     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2875     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2876                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2877     _out->cr();
2878 
2879     HeapWord* from = b;
2880     HeapWord* to   = t;
2881 
2882     if (to > from) {
2883       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2884       _out->cr();
2885       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2886       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2887       _out->cr();
2888     }
2889 
2890     return false;
2891   }
2892 
2893   PrintReachableRegionClosure(outputStream* out,
2894                               VerifyOption  vo,
2895                               bool          all) :
2896     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2897 };
2898 
2899 void ConcurrentMark::print_reachable(const char* str,
2900                                      VerifyOption vo,
2901                                      bool all) {
2902   gclog_or_tty->cr();
2903   gclog_or_tty->print_cr("== Doing heap dump... ");
2904 
2905   if (G1PrintReachableBaseFile == NULL) {
2906     gclog_or_tty->print_cr("  #### error: no base file defined");
2907     return;
2908   }
2909 
2910   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2911       (JVM_MAXPATHLEN - 1)) {
2912     gclog_or_tty->print_cr("  #### error: file name too long");
2913     return;
2914   }
2915 
2916   char file_name[JVM_MAXPATHLEN];
2917   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2918   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2919 
2920   fileStream fout(file_name);
2921   if (!fout.is_open()) {
2922     gclog_or_tty->print_cr("  #### error: could not open file");
2923     return;
2924   }
2925 
2926   outputStream* out = &fout;
2927   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2928   out->cr();
2929 
2930   out->print_cr("--- ITERATING OVER REGIONS");
2931   out->cr();
2932   PrintReachableRegionClosure rcl(out, vo, all);
2933   _g1h->heap_region_iterate(&rcl);
2934   out->cr();
2935 
2936   gclog_or_tty->print_cr("  done");
2937   gclog_or_tty->flush();
2938 }
2939 
2940 #endif // PRODUCT
2941 
2942 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2943   // Note we are overriding the read-only view of the prev map here, via
2944   // the cast.
2945   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2946 }
2947 
2948 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2949   _nextMarkBitMap->clearRange(mr);
2950 }
2951 
2952 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2953   clearRangePrevBitmap(mr);
2954   clearRangeNextBitmap(mr);
2955 }
2956 
2957 HeapRegion*
2958 ConcurrentMark::claim_region(uint worker_id) {
2959   // "checkpoint" the finger
2960   HeapWord* finger = _finger;
2961 
2962   // _heap_end will not change underneath our feet; it only changes at
2963   // yield points.
2964   while (finger < _heap_end) {
2965     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2966 
2967     // Note on how this code handles humongous regions. In the
2968     // normal case the finger will reach the start of a "starts
2969     // humongous" (SH) region. Its end will either be the end of the
2970     // last "continues humongous" (CH) region in the sequence, or the
2971     // standard end of the SH region (if the SH is the only region in
2972     // the sequence). That way claim_region() will skip over the CH
2973     // regions. However, there is a subtle race between a CM thread
2974     // executing this method and a mutator thread doing a humongous
2975     // object allocation. The two are not mutually exclusive as the CM
2976     // thread does not need to hold the Heap_lock when it gets
2977     // here. So there is a chance that claim_region() will come across
2978     // a free region that's in the progress of becoming a SH or a CH
2979     // region. In the former case, it will either
2980     //   a) Miss the update to the region's end, in which case it will
2981     //      visit every subsequent CH region, will find their bitmaps
2982     //      empty, and do nothing, or
2983     //   b) Will observe the update of the region's end (in which case
2984     //      it will skip the subsequent CH regions).
2985     // If it comes across a region that suddenly becomes CH, the
2986     // scenario will be similar to b). So, the race between
2987     // claim_region() and a humongous object allocation might force us
2988     // to do a bit of unnecessary work (due to some unnecessary bitmap
2989     // iterations) but it should not introduce and correctness issues.
2990     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
2991 
2992     // Above heap_region_containing_raw may return NULL as we always scan claim
2993     // until the end of the heap. In this case, just jump to the next region.
2994     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2995 
2996     // Is the gap between reading the finger and doing the CAS too long?
2997     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2998     if (res == finger && curr_region != NULL) {
2999       // we succeeded
3000       HeapWord*   bottom        = curr_region->bottom();
3001       HeapWord*   limit         = curr_region->next_top_at_mark_start();
3002 
3003       if (verbose_low()) {
3004         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
3005                                "["PTR_FORMAT", "PTR_FORMAT"), "
3006                                "limit = "PTR_FORMAT,
3007                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
3008       }
3009 
3010       // notice that _finger == end cannot be guaranteed here since,
3011       // someone else might have moved the finger even further
3012       assert(_finger >= end, "the finger should have moved forward");
3013 
3014       if (verbose_low()) {
3015         gclog_or_tty->print_cr("[%u] we were successful with region = "
3016                                PTR_FORMAT, worker_id, p2i(curr_region));
3017       }
3018 
3019       if (limit > bottom) {
3020         if (verbose_low()) {
3021           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
3022                                  "returning it ", worker_id, p2i(curr_region));
3023         }
3024         return curr_region;
3025       } else {
3026         assert(limit == bottom,
3027                "the region limit should be at bottom");
3028         if (verbose_low()) {
3029           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3030                                  "returning NULL", worker_id, p2i(curr_region));
3031         }
3032         // we return NULL and the caller should try calling
3033         // claim_region() again.
3034         return NULL;
3035       }
3036     } else {
3037       assert(_finger > finger, "the finger should have moved forward");
3038       if (verbose_low()) {
3039         if (curr_region == NULL) {
3040           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3041                                  "global finger = "PTR_FORMAT", "
3042                                  "our finger = "PTR_FORMAT,
3043                                  worker_id, p2i(_finger), p2i(finger));
3044         } else {
3045           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3046                                  "global finger = "PTR_FORMAT", "
3047                                  "our finger = "PTR_FORMAT,
3048                                  worker_id, p2i(_finger), p2i(finger));
3049         }
3050       }
3051 
3052       // read it again
3053       finger = _finger;
3054     }
3055   }
3056 
3057   return NULL;
3058 }
3059 
3060 #ifndef PRODUCT
3061 enum VerifyNoCSetOopsPhase {
3062   VerifyNoCSetOopsStack,
3063   VerifyNoCSetOopsQueues,
3064   VerifyNoCSetOopsSATBCompleted,
3065   VerifyNoCSetOopsSATBThread
3066 };
3067 
3068 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
3069 private:
3070   G1CollectedHeap* _g1h;
3071   VerifyNoCSetOopsPhase _phase;
3072   int _info;
3073 
3074   const char* phase_str() {
3075     switch (_phase) {
3076     case VerifyNoCSetOopsStack:         return "Stack";
3077     case VerifyNoCSetOopsQueues:        return "Queue";
3078     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3079     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
3080     default:                            ShouldNotReachHere();
3081     }
3082     return NULL;
3083   }
3084 
3085   void do_object_work(oop obj) {
3086     guarantee(!_g1h->obj_in_cs(obj),
3087               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3088                       p2i((void*) obj), phase_str(), _info));
3089   }
3090 
3091 public:
3092   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3093 
3094   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3095     _phase = phase;
3096     _info = info;
3097   }
3098 
3099   virtual void do_oop(oop* p) {
3100     oop obj = oopDesc::load_decode_heap_oop(p);
3101     do_object_work(obj);
3102   }
3103 
3104   virtual void do_oop(narrowOop* p) {
3105     // We should not come across narrow oops while scanning marking
3106     // stacks and SATB buffers.
3107     ShouldNotReachHere();
3108   }
3109 
3110   virtual void do_object(oop obj) {
3111     do_object_work(obj);
3112   }
3113 };
3114 
3115 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3116                                          bool verify_enqueued_buffers,
3117                                          bool verify_thread_buffers,
3118                                          bool verify_fingers) {
3119   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3120   if (!G1CollectedHeap::heap()->mark_in_progress()) {
3121     return;
3122   }
3123 
3124   VerifyNoCSetOopsClosure cl;
3125 
3126   if (verify_stacks) {
3127     // Verify entries on the global mark stack
3128     cl.set_phase(VerifyNoCSetOopsStack);
3129     _markStack.oops_do(&cl);
3130 
3131     // Verify entries on the task queues
3132     for (uint i = 0; i < _max_worker_id; i += 1) {
3133       cl.set_phase(VerifyNoCSetOopsQueues, i);
3134       CMTaskQueue* queue = _task_queues->queue(i);
3135       queue->oops_do(&cl);
3136     }
3137   }
3138 
3139   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3140 
3141   // Verify entries on the enqueued SATB buffers
3142   if (verify_enqueued_buffers) {
3143     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3144     satb_qs.iterate_completed_buffers_read_only(&cl);
3145   }
3146 
3147   // Verify entries on the per-thread SATB buffers
3148   if (verify_thread_buffers) {
3149     cl.set_phase(VerifyNoCSetOopsSATBThread);
3150     satb_qs.iterate_thread_buffers_read_only(&cl);
3151   }
3152 
3153   if (verify_fingers) {
3154     // Verify the global finger
3155     HeapWord* global_finger = finger();
3156     if (global_finger != NULL && global_finger < _heap_end) {
3157       // The global finger always points to a heap region boundary. We
3158       // use heap_region_containing_raw() to get the containing region
3159       // given that the global finger could be pointing to a free region
3160       // which subsequently becomes continues humongous. If that
3161       // happens, heap_region_containing() will return the bottom of the
3162       // corresponding starts humongous region and the check below will
3163       // not hold any more.
3164       // Since we always iterate over all regions, we might get a NULL HeapRegion
3165       // here.
3166       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3167       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3168                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3169                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3170     }
3171 
3172     // Verify the task fingers
3173     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3174     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3175       CMTask* task = _tasks[i];
3176       HeapWord* task_finger = task->finger();
3177       if (task_finger != NULL && task_finger < _heap_end) {
3178         // See above note on the global finger verification.
3179         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3180         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3181                   !task_hr->in_collection_set(),
3182                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3183                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3184       }
3185     }
3186   }
3187 }
3188 #endif // PRODUCT
3189 
3190 // Aggregate the counting data that was constructed concurrently
3191 // with marking.
3192 class AggregateCountDataHRClosure: public HeapRegionClosure {
3193   G1CollectedHeap* _g1h;
3194   ConcurrentMark* _cm;
3195   CardTableModRefBS* _ct_bs;
3196   BitMap* _cm_card_bm;
3197   uint _max_worker_id;
3198 
3199  public:
3200   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3201                               BitMap* cm_card_bm,
3202                               uint max_worker_id) :
3203     _g1h(g1h), _cm(g1h->concurrent_mark()),
3204     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3205     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3206 
3207   bool doHeapRegion(HeapRegion* hr) {
3208     if (hr->continuesHumongous()) {
3209       // We will ignore these here and process them when their
3210       // associated "starts humongous" region is processed.
3211       // Note that we cannot rely on their associated
3212       // "starts humongous" region to have their bit set to 1
3213       // since, due to the region chunking in the parallel region
3214       // iteration, a "continues humongous" region might be visited
3215       // before its associated "starts humongous".
3216       return false;
3217     }
3218 
3219     HeapWord* start = hr->bottom();
3220     HeapWord* limit = hr->next_top_at_mark_start();
3221     HeapWord* end = hr->end();
3222 
3223     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3224            err_msg("Preconditions not met - "
3225                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3226                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3227                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3228 
3229     assert(hr->next_marked_bytes() == 0, "Precondition");
3230 
3231     if (start == limit) {
3232       // NTAMS of this region has not been set so nothing to do.
3233       return false;
3234     }
3235 
3236     // 'start' should be in the heap.
3237     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3238     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3239     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3240 
3241     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3242     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3243     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3244 
3245     // If ntams is not card aligned then we bump card bitmap index
3246     // for limit so that we get the all the cards spanned by
3247     // the object ending at ntams.
3248     // Note: if this is the last region in the heap then ntams
3249     // could be actually just beyond the end of the the heap;
3250     // limit_idx will then  correspond to a (non-existent) card
3251     // that is also outside the heap.
3252     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3253       limit_idx += 1;
3254     }
3255 
3256     assert(limit_idx <= end_idx, "or else use atomics");
3257 
3258     // Aggregate the "stripe" in the count data associated with hr.
3259     uint hrm_index = hr->hrm_index();
3260     size_t marked_bytes = 0;
3261 
3262     for (uint i = 0; i < _max_worker_id; i += 1) {
3263       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3264       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3265 
3266       // Fetch the marked_bytes in this region for task i and
3267       // add it to the running total for this region.
3268       marked_bytes += marked_bytes_array[hrm_index];
3269 
3270       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3271       // into the global card bitmap.
3272       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3273 
3274       while (scan_idx < limit_idx) {
3275         assert(task_card_bm->at(scan_idx) == true, "should be");
3276         _cm_card_bm->set_bit(scan_idx);
3277         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3278 
3279         // BitMap::get_next_one_offset() can handle the case when
3280         // its left_offset parameter is greater than its right_offset
3281         // parameter. It does, however, have an early exit if
3282         // left_offset == right_offset. So let's limit the value
3283         // passed in for left offset here.
3284         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3285         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3286       }
3287     }
3288 
3289     // Update the marked bytes for this region.
3290     hr->add_to_marked_bytes(marked_bytes);
3291 
3292     // Next heap region
3293     return false;
3294   }
3295 };
3296 
3297 class G1AggregateCountDataTask: public AbstractGangTask {
3298 protected:
3299   G1CollectedHeap* _g1h;
3300   ConcurrentMark* _cm;
3301   BitMap* _cm_card_bm;
3302   uint _max_worker_id;
3303   int _active_workers;
3304 
3305 public:
3306   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3307                            ConcurrentMark* cm,
3308                            BitMap* cm_card_bm,
3309                            uint max_worker_id,
3310                            int n_workers) :
3311     AbstractGangTask("Count Aggregation"),
3312     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3313     _max_worker_id(max_worker_id),
3314     _active_workers(n_workers) { }
3315 
3316   void work(uint worker_id) {
3317     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3318 
3319     if (G1CollectedHeap::use_parallel_gc_threads()) {
3320       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3321                                             _active_workers,
3322                                             HeapRegion::AggregateCountClaimValue);
3323     } else {
3324       _g1h->heap_region_iterate(&cl);
3325     }
3326   }
3327 };
3328 
3329 
3330 void ConcurrentMark::aggregate_count_data() {
3331   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3332                         _g1h->workers()->active_workers() :
3333                         1);
3334 
3335   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3336                                            _max_worker_id, n_workers);
3337 
3338   if (G1CollectedHeap::use_parallel_gc_threads()) {
3339     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3340            "sanity check");
3341     _g1h->set_par_threads(n_workers);
3342     _g1h->workers()->run_task(&g1_par_agg_task);
3343     _g1h->set_par_threads(0);
3344 
3345     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3346            "sanity check");
3347     _g1h->reset_heap_region_claim_values();
3348   } else {
3349     g1_par_agg_task.work(0);
3350   }
3351 }
3352 
3353 // Clear the per-worker arrays used to store the per-region counting data
3354 void ConcurrentMark::clear_all_count_data() {
3355   // Clear the global card bitmap - it will be filled during
3356   // liveness count aggregation (during remark) and the
3357   // final counting task.
3358   _card_bm.clear();
3359 
3360   // Clear the global region bitmap - it will be filled as part
3361   // of the final counting task.
3362   _region_bm.clear();
3363 
3364   uint max_regions = _g1h->max_regions();
3365   assert(_max_worker_id > 0, "uninitialized");
3366 
3367   for (uint i = 0; i < _max_worker_id; i += 1) {
3368     BitMap* task_card_bm = count_card_bitmap_for(i);
3369     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3370 
3371     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3372     assert(marked_bytes_array != NULL, "uninitialized");
3373 
3374     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3375     task_card_bm->clear();
3376   }
3377 }
3378 
3379 void ConcurrentMark::print_stats() {
3380   if (verbose_stats()) {
3381     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3382     for (size_t i = 0; i < _active_tasks; ++i) {
3383       _tasks[i]->print_stats();
3384       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3385     }
3386   }
3387 }
3388 
3389 // abandon current marking iteration due to a Full GC
3390 void ConcurrentMark::abort() {
3391   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3392   // concurrent bitmap clearing.
3393   _nextMarkBitMap->clearAll();
3394 
3395   // Note we cannot clear the previous marking bitmap here
3396   // since VerifyDuringGC verifies the objects marked during
3397   // a full GC against the previous bitmap.
3398 
3399   // Clear the liveness counting data
3400   clear_all_count_data();
3401   // Empty mark stack
3402   reset_marking_state();
3403   for (uint i = 0; i < _max_worker_id; ++i) {
3404     _tasks[i]->clear_region_fields();
3405   }
3406   _first_overflow_barrier_sync.abort();
3407   _second_overflow_barrier_sync.abort();
3408   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3409   if (!gc_id.is_undefined()) {
3410     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3411     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3412     _aborted_gc_id = gc_id;
3413    }
3414   _has_aborted = true;
3415 
3416   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3417   satb_mq_set.abandon_partial_marking();
3418   // This can be called either during or outside marking, we'll read
3419   // the expected_active value from the SATB queue set.
3420   satb_mq_set.set_active_all_threads(
3421                                  false, /* new active value */
3422                                  satb_mq_set.is_active() /* expected_active */);
3423 
3424   _g1h->trace_heap_after_concurrent_cycle();
3425   _g1h->register_concurrent_cycle_end();
3426 }
3427 
3428 const GCId& ConcurrentMark::concurrent_gc_id() {
3429   if (has_aborted()) {
3430     return _aborted_gc_id;
3431   }
3432   return _g1h->gc_tracer_cm()->gc_id();
3433 }
3434 
3435 static void print_ms_time_info(const char* prefix, const char* name,
3436                                NumberSeq& ns) {
3437   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3438                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3439   if (ns.num() > 0) {
3440     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3441                            prefix, ns.sd(), ns.maximum());
3442   }
3443 }
3444 
3445 void ConcurrentMark::print_summary_info() {
3446   gclog_or_tty->print_cr(" Concurrent marking:");
3447   print_ms_time_info("  ", "init marks", _init_times);
3448   print_ms_time_info("  ", "remarks", _remark_times);
3449   {
3450     print_ms_time_info("     ", "final marks", _remark_mark_times);
3451     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3452 
3453   }
3454   print_ms_time_info("  ", "cleanups", _cleanup_times);
3455   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3456                          _total_counting_time,
3457                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3458                           (double)_cleanup_times.num()
3459                          : 0.0));
3460   if (G1ScrubRemSets) {
3461     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3462                            _total_rs_scrub_time,
3463                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3464                             (double)_cleanup_times.num()
3465                            : 0.0));
3466   }
3467   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3468                          (_init_times.sum() + _remark_times.sum() +
3469                           _cleanup_times.sum())/1000.0);
3470   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3471                 "(%8.2f s marking).",
3472                 cmThread()->vtime_accum(),
3473                 cmThread()->vtime_mark_accum());
3474 }
3475 
3476 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3477   if (use_parallel_marking_threads()) {
3478     _parallel_workers->print_worker_threads_on(st);
3479   }
3480 }
3481 
3482 void ConcurrentMark::print_on_error(outputStream* st) const {
3483   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3484       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3485   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3486   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3487 }
3488 
3489 // We take a break if someone is trying to stop the world.
3490 bool ConcurrentMark::do_yield_check(uint worker_id) {
3491   if (SuspendibleThreadSet::should_yield()) {
3492     if (worker_id == 0) {
3493       _g1h->g1_policy()->record_concurrent_pause();
3494     }
3495     SuspendibleThreadSet::yield();
3496     return true;
3497   } else {
3498     return false;
3499   }
3500 }
3501 
3502 bool ConcurrentMark::containing_card_is_marked(void* p) {
3503   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3504   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3505 }
3506 
3507 bool ConcurrentMark::containing_cards_are_marked(void* start,
3508                                                  void* last) {
3509   return containing_card_is_marked(start) &&
3510          containing_card_is_marked(last);
3511 }
3512 
3513 #ifndef PRODUCT
3514 // for debugging purposes
3515 void ConcurrentMark::print_finger() {
3516   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3517                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3518   for (uint i = 0; i < _max_worker_id; ++i) {
3519     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3520   }
3521   gclog_or_tty->cr();
3522 }
3523 #endif
3524 
3525 void CMTask::scan_object(oop obj) {
3526   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3527 
3528   if (_cm->verbose_high()) {
3529     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3530                            _worker_id, p2i((void*) obj));
3531   }
3532 
3533   size_t obj_size = obj->size();
3534   _words_scanned += obj_size;
3535 
3536   obj->oop_iterate(_cm_oop_closure);
3537   statsOnly( ++_objs_scanned );
3538   check_limits();
3539 }
3540 
3541 // Closure for iteration over bitmaps
3542 class CMBitMapClosure : public BitMapClosure {
3543 private:
3544   // the bitmap that is being iterated over
3545   CMBitMap*                   _nextMarkBitMap;
3546   ConcurrentMark*             _cm;
3547   CMTask*                     _task;
3548 
3549 public:
3550   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3551     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3552 
3553   bool do_bit(size_t offset) {
3554     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3555     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3556     assert( addr < _cm->finger(), "invariant");
3557 
3558     statsOnly( _task->increase_objs_found_on_bitmap() );
3559     assert(addr >= _task->finger(), "invariant");
3560 
3561     // We move that task's local finger along.
3562     _task->move_finger_to(addr);
3563 
3564     _task->scan_object(oop(addr));
3565     // we only partially drain the local queue and global stack
3566     _task->drain_local_queue(true);
3567     _task->drain_global_stack(true);
3568 
3569     // if the has_aborted flag has been raised, we need to bail out of
3570     // the iteration
3571     return !_task->has_aborted();
3572   }
3573 };
3574 
3575 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3576                                ConcurrentMark* cm,
3577                                CMTask* task)
3578   : _g1h(g1h), _cm(cm), _task(task) {
3579   assert(_ref_processor == NULL, "should be initialized to NULL");
3580 
3581   if (G1UseConcMarkReferenceProcessing) {
3582     _ref_processor = g1h->ref_processor_cm();
3583     assert(_ref_processor != NULL, "should not be NULL");
3584   }
3585 }
3586 
3587 void CMTask::setup_for_region(HeapRegion* hr) {
3588   assert(hr != NULL,
3589         "claim_region() should have filtered out NULL regions");
3590   assert(!hr->continuesHumongous(),
3591         "claim_region() should have filtered out continues humongous regions");
3592 
3593   if (_cm->verbose_low()) {
3594     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3595                            _worker_id, p2i(hr));
3596   }
3597 
3598   _curr_region  = hr;
3599   _finger       = hr->bottom();
3600   update_region_limit();
3601 }
3602 
3603 void CMTask::update_region_limit() {
3604   HeapRegion* hr            = _curr_region;
3605   HeapWord* bottom          = hr->bottom();
3606   HeapWord* limit           = hr->next_top_at_mark_start();
3607 
3608   if (limit == bottom) {
3609     if (_cm->verbose_low()) {
3610       gclog_or_tty->print_cr("[%u] found an empty region "
3611                              "["PTR_FORMAT", "PTR_FORMAT")",
3612                              _worker_id, p2i(bottom), p2i(limit));
3613     }
3614     // The region was collected underneath our feet.
3615     // We set the finger to bottom to ensure that the bitmap
3616     // iteration that will follow this will not do anything.
3617     // (this is not a condition that holds when we set the region up,
3618     // as the region is not supposed to be empty in the first place)
3619     _finger = bottom;
3620   } else if (limit >= _region_limit) {
3621     assert(limit >= _finger, "peace of mind");
3622   } else {
3623     assert(limit < _region_limit, "only way to get here");
3624     // This can happen under some pretty unusual circumstances.  An
3625     // evacuation pause empties the region underneath our feet (NTAMS
3626     // at bottom). We then do some allocation in the region (NTAMS
3627     // stays at bottom), followed by the region being used as a GC
3628     // alloc region (NTAMS will move to top() and the objects
3629     // originally below it will be grayed). All objects now marked in
3630     // the region are explicitly grayed, if below the global finger,
3631     // and we do not need in fact to scan anything else. So, we simply
3632     // set _finger to be limit to ensure that the bitmap iteration
3633     // doesn't do anything.
3634     _finger = limit;
3635   }
3636 
3637   _region_limit = limit;
3638 }
3639 
3640 void CMTask::giveup_current_region() {
3641   assert(_curr_region != NULL, "invariant");
3642   if (_cm->verbose_low()) {
3643     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3644                            _worker_id, p2i(_curr_region));
3645   }
3646   clear_region_fields();
3647 }
3648 
3649 void CMTask::clear_region_fields() {
3650   // Values for these three fields that indicate that we're not
3651   // holding on to a region.
3652   _curr_region   = NULL;
3653   _finger        = NULL;
3654   _region_limit  = NULL;
3655 }
3656 
3657 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3658   if (cm_oop_closure == NULL) {
3659     assert(_cm_oop_closure != NULL, "invariant");
3660   } else {
3661     assert(_cm_oop_closure == NULL, "invariant");
3662   }
3663   _cm_oop_closure = cm_oop_closure;
3664 }
3665 
3666 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3667   guarantee(nextMarkBitMap != NULL, "invariant");
3668 
3669   if (_cm->verbose_low()) {
3670     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3671   }
3672 
3673   _nextMarkBitMap                = nextMarkBitMap;
3674   clear_region_fields();
3675 
3676   _calls                         = 0;
3677   _elapsed_time_ms               = 0.0;
3678   _termination_time_ms           = 0.0;
3679   _termination_start_time_ms     = 0.0;
3680 
3681 #if _MARKING_STATS_
3682   _local_pushes                  = 0;
3683   _local_pops                    = 0;
3684   _local_max_size                = 0;
3685   _objs_scanned                  = 0;
3686   _global_pushes                 = 0;
3687   _global_pops                   = 0;
3688   _global_max_size               = 0;
3689   _global_transfers_to           = 0;
3690   _global_transfers_from         = 0;
3691   _regions_claimed               = 0;
3692   _objs_found_on_bitmap          = 0;
3693   _satb_buffers_processed        = 0;
3694   _steal_attempts                = 0;
3695   _steals                        = 0;
3696   _aborted                       = 0;
3697   _aborted_overflow              = 0;
3698   _aborted_cm_aborted            = 0;
3699   _aborted_yield                 = 0;
3700   _aborted_timed_out             = 0;
3701   _aborted_satb                  = 0;
3702   _aborted_termination           = 0;
3703 #endif // _MARKING_STATS_
3704 }
3705 
3706 bool CMTask::should_exit_termination() {
3707   regular_clock_call();
3708   // This is called when we are in the termination protocol. We should
3709   // quit if, for some reason, this task wants to abort or the global
3710   // stack is not empty (this means that we can get work from it).
3711   return !_cm->mark_stack_empty() || has_aborted();
3712 }
3713 
3714 void CMTask::reached_limit() {
3715   assert(_words_scanned >= _words_scanned_limit ||
3716          _refs_reached >= _refs_reached_limit ,
3717          "shouldn't have been called otherwise");
3718   regular_clock_call();
3719 }
3720 
3721 void CMTask::regular_clock_call() {
3722   if (has_aborted()) return;
3723 
3724   // First, we need to recalculate the words scanned and refs reached
3725   // limits for the next clock call.
3726   recalculate_limits();
3727 
3728   // During the regular clock call we do the following
3729 
3730   // (1) If an overflow has been flagged, then we abort.
3731   if (_cm->has_overflown()) {
3732     set_has_aborted();
3733     return;
3734   }
3735 
3736   // If we are not concurrent (i.e. we're doing remark) we don't need
3737   // to check anything else. The other steps are only needed during
3738   // the concurrent marking phase.
3739   if (!concurrent()) return;
3740 
3741   // (2) If marking has been aborted for Full GC, then we also abort.
3742   if (_cm->has_aborted()) {
3743     set_has_aborted();
3744     statsOnly( ++_aborted_cm_aborted );
3745     return;
3746   }
3747 
3748   double curr_time_ms = os::elapsedVTime() * 1000.0;
3749 
3750   // (3) If marking stats are enabled, then we update the step history.
3751 #if _MARKING_STATS_
3752   if (_words_scanned >= _words_scanned_limit) {
3753     ++_clock_due_to_scanning;
3754   }
3755   if (_refs_reached >= _refs_reached_limit) {
3756     ++_clock_due_to_marking;
3757   }
3758 
3759   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3760   _interval_start_time_ms = curr_time_ms;
3761   _all_clock_intervals_ms.add(last_interval_ms);
3762 
3763   if (_cm->verbose_medium()) {
3764       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3765                         "scanned = %d%s, refs reached = %d%s",
3766                         _worker_id, last_interval_ms,
3767                         _words_scanned,
3768                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3769                         _refs_reached,
3770                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3771   }
3772 #endif // _MARKING_STATS_
3773 
3774   // (4) We check whether we should yield. If we have to, then we abort.
3775   if (SuspendibleThreadSet::should_yield()) {
3776     // We should yield. To do this we abort the task. The caller is
3777     // responsible for yielding.
3778     set_has_aborted();
3779     statsOnly( ++_aborted_yield );
3780     return;
3781   }
3782 
3783   // (5) We check whether we've reached our time quota. If we have,
3784   // then we abort.
3785   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3786   if (elapsed_time_ms > _time_target_ms) {
3787     set_has_aborted();
3788     _has_timed_out = true;
3789     statsOnly( ++_aborted_timed_out );
3790     return;
3791   }
3792 
3793   // (6) Finally, we check whether there are enough completed STAB
3794   // buffers available for processing. If there are, we abort.
3795   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3796   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3797     if (_cm->verbose_low()) {
3798       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3799                              _worker_id);
3800     }
3801     // we do need to process SATB buffers, we'll abort and restart
3802     // the marking task to do so
3803     set_has_aborted();
3804     statsOnly( ++_aborted_satb );
3805     return;
3806   }
3807 }
3808 
3809 void CMTask::recalculate_limits() {
3810   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3811   _words_scanned_limit      = _real_words_scanned_limit;
3812 
3813   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3814   _refs_reached_limit       = _real_refs_reached_limit;
3815 }
3816 
3817 void CMTask::decrease_limits() {
3818   // This is called when we believe that we're going to do an infrequent
3819   // operation which will increase the per byte scanned cost (i.e. move
3820   // entries to/from the global stack). It basically tries to decrease the
3821   // scanning limit so that the clock is called earlier.
3822 
3823   if (_cm->verbose_medium()) {
3824     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3825   }
3826 
3827   _words_scanned_limit = _real_words_scanned_limit -
3828     3 * words_scanned_period / 4;
3829   _refs_reached_limit  = _real_refs_reached_limit -
3830     3 * refs_reached_period / 4;
3831 }
3832 
3833 void CMTask::move_entries_to_global_stack() {
3834   // local array where we'll store the entries that will be popped
3835   // from the local queue
3836   oop buffer[global_stack_transfer_size];
3837 
3838   int n = 0;
3839   oop obj;
3840   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3841     buffer[n] = obj;
3842     ++n;
3843   }
3844 
3845   if (n > 0) {
3846     // we popped at least one entry from the local queue
3847 
3848     statsOnly( ++_global_transfers_to; _local_pops += n );
3849 
3850     if (!_cm->mark_stack_push(buffer, n)) {
3851       if (_cm->verbose_low()) {
3852         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3853                                _worker_id);
3854       }
3855       set_has_aborted();
3856     } else {
3857       // the transfer was successful
3858 
3859       if (_cm->verbose_medium()) {
3860         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3861                                _worker_id, n);
3862       }
3863       statsOnly( int tmp_size = _cm->mark_stack_size();
3864                  if (tmp_size > _global_max_size) {
3865                    _global_max_size = tmp_size;
3866                  }
3867                  _global_pushes += n );
3868     }
3869   }
3870 
3871   // this operation was quite expensive, so decrease the limits
3872   decrease_limits();
3873 }
3874 
3875 void CMTask::get_entries_from_global_stack() {
3876   // local array where we'll store the entries that will be popped
3877   // from the global stack.
3878   oop buffer[global_stack_transfer_size];
3879   int n;
3880   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3881   assert(n <= global_stack_transfer_size,
3882          "we should not pop more than the given limit");
3883   if (n > 0) {
3884     // yes, we did actually pop at least one entry
3885 
3886     statsOnly( ++_global_transfers_from; _global_pops += n );
3887     if (_cm->verbose_medium()) {
3888       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3889                              _worker_id, n);
3890     }
3891     for (int i = 0; i < n; ++i) {
3892       bool success = _task_queue->push(buffer[i]);
3893       // We only call this when the local queue is empty or under a
3894       // given target limit. So, we do not expect this push to fail.
3895       assert(success, "invariant");
3896     }
3897 
3898     statsOnly( int tmp_size = _task_queue->size();
3899                if (tmp_size > _local_max_size) {
3900                  _local_max_size = tmp_size;
3901                }
3902                _local_pushes += n );
3903   }
3904 
3905   // this operation was quite expensive, so decrease the limits
3906   decrease_limits();
3907 }
3908 
3909 void CMTask::drain_local_queue(bool partially) {
3910   if (has_aborted()) return;
3911 
3912   // Decide what the target size is, depending whether we're going to
3913   // drain it partially (so that other tasks can steal if they run out
3914   // of things to do) or totally (at the very end).
3915   size_t target_size;
3916   if (partially) {
3917     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3918   } else {
3919     target_size = 0;
3920   }
3921 
3922   if (_task_queue->size() > target_size) {
3923     if (_cm->verbose_high()) {
3924       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3925                              _worker_id, target_size);
3926     }
3927 
3928     oop obj;
3929     bool ret = _task_queue->pop_local(obj);
3930     while (ret) {
3931       statsOnly( ++_local_pops );
3932 
3933       if (_cm->verbose_high()) {
3934         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3935                                p2i((void*) obj));
3936       }
3937 
3938       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3939       assert(!_g1h->is_on_master_free_list(
3940                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3941 
3942       scan_object(obj);
3943 
3944       if (_task_queue->size() <= target_size || has_aborted()) {
3945         ret = false;
3946       } else {
3947         ret = _task_queue->pop_local(obj);
3948       }
3949     }
3950 
3951     if (_cm->verbose_high()) {
3952       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3953                              _worker_id, _task_queue->size());
3954     }
3955   }
3956 }
3957 
3958 void CMTask::drain_global_stack(bool partially) {
3959   if (has_aborted()) return;
3960 
3961   // We have a policy to drain the local queue before we attempt to
3962   // drain the global stack.
3963   assert(partially || _task_queue->size() == 0, "invariant");
3964 
3965   // Decide what the target size is, depending whether we're going to
3966   // drain it partially (so that other tasks can steal if they run out
3967   // of things to do) or totally (at the very end).  Notice that,
3968   // because we move entries from the global stack in chunks or
3969   // because another task might be doing the same, we might in fact
3970   // drop below the target. But, this is not a problem.
3971   size_t target_size;
3972   if (partially) {
3973     target_size = _cm->partial_mark_stack_size_target();
3974   } else {
3975     target_size = 0;
3976   }
3977 
3978   if (_cm->mark_stack_size() > target_size) {
3979     if (_cm->verbose_low()) {
3980       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
3981                              _worker_id, target_size);
3982     }
3983 
3984     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3985       get_entries_from_global_stack();
3986       drain_local_queue(partially);
3987     }
3988 
3989     if (_cm->verbose_low()) {
3990       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
3991                              _worker_id, _cm->mark_stack_size());
3992     }
3993   }
3994 }
3995 
3996 // SATB Queue has several assumptions on whether to call the par or
3997 // non-par versions of the methods. this is why some of the code is
3998 // replicated. We should really get rid of the single-threaded version
3999 // of the code to simplify things.
4000 void CMTask::drain_satb_buffers() {
4001   if (has_aborted()) return;
4002 
4003   // We set this so that the regular clock knows that we're in the
4004   // middle of draining buffers and doesn't set the abort flag when it
4005   // notices that SATB buffers are available for draining. It'd be
4006   // very counter productive if it did that. :-)
4007   _draining_satb_buffers = true;
4008 
4009   CMObjectClosure oc(this);
4010   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4011   if (G1CollectedHeap::use_parallel_gc_threads()) {
4012     satb_mq_set.set_par_closure(_worker_id, &oc);
4013   } else {
4014     satb_mq_set.set_closure(&oc);
4015   }
4016 
4017   // This keeps claiming and applying the closure to completed buffers
4018   // until we run out of buffers or we need to abort.
4019   if (G1CollectedHeap::use_parallel_gc_threads()) {
4020     while (!has_aborted() &&
4021            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
4022       if (_cm->verbose_medium()) {
4023         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4024       }
4025       statsOnly( ++_satb_buffers_processed );
4026       regular_clock_call();
4027     }
4028   } else {
4029     while (!has_aborted() &&
4030            satb_mq_set.apply_closure_to_completed_buffer()) {
4031       if (_cm->verbose_medium()) {
4032         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4033       }
4034       statsOnly( ++_satb_buffers_processed );
4035       regular_clock_call();
4036     }
4037   }
4038 
4039   _draining_satb_buffers = false;
4040 
4041   assert(has_aborted() ||
4042          concurrent() ||
4043          satb_mq_set.completed_buffers_num() == 0, "invariant");
4044 
4045   if (G1CollectedHeap::use_parallel_gc_threads()) {
4046     satb_mq_set.set_par_closure(_worker_id, NULL);
4047   } else {
4048     satb_mq_set.set_closure(NULL);
4049   }
4050 
4051   // again, this was a potentially expensive operation, decrease the
4052   // limits to get the regular clock call early
4053   decrease_limits();
4054 }
4055 
4056 void CMTask::print_stats() {
4057   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4058                          _worker_id, _calls);
4059   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4060                          _elapsed_time_ms, _termination_time_ms);
4061   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4062                          _step_times_ms.num(), _step_times_ms.avg(),
4063                          _step_times_ms.sd());
4064   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4065                          _step_times_ms.maximum(), _step_times_ms.sum());
4066 
4067 #if _MARKING_STATS_
4068   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4069                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4070                          _all_clock_intervals_ms.sd());
4071   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4072                          _all_clock_intervals_ms.maximum(),
4073                          _all_clock_intervals_ms.sum());
4074   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4075                          _clock_due_to_scanning, _clock_due_to_marking);
4076   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4077                          _objs_scanned, _objs_found_on_bitmap);
4078   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4079                          _local_pushes, _local_pops, _local_max_size);
4080   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4081                          _global_pushes, _global_pops, _global_max_size);
4082   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4083                          _global_transfers_to,_global_transfers_from);
4084   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
4085   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4086   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4087                          _steal_attempts, _steals);
4088   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4089   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4090                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4091   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4092                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4093 #endif // _MARKING_STATS_
4094 }
4095 
4096 /*****************************************************************************
4097 
4098     The do_marking_step(time_target_ms, ...) method is the building
4099     block of the parallel marking framework. It can be called in parallel
4100     with other invocations of do_marking_step() on different tasks
4101     (but only one per task, obviously) and concurrently with the
4102     mutator threads, or during remark, hence it eliminates the need
4103     for two versions of the code. When called during remark, it will
4104     pick up from where the task left off during the concurrent marking
4105     phase. Interestingly, tasks are also claimable during evacuation
4106     pauses too, since do_marking_step() ensures that it aborts before
4107     it needs to yield.
4108 
4109     The data structures that it uses to do marking work are the
4110     following:
4111 
4112       (1) Marking Bitmap. If there are gray objects that appear only
4113       on the bitmap (this happens either when dealing with an overflow
4114       or when the initial marking phase has simply marked the roots
4115       and didn't push them on the stack), then tasks claim heap
4116       regions whose bitmap they then scan to find gray objects. A
4117       global finger indicates where the end of the last claimed region
4118       is. A local finger indicates how far into the region a task has
4119       scanned. The two fingers are used to determine how to gray an
4120       object (i.e. whether simply marking it is OK, as it will be
4121       visited by a task in the future, or whether it needs to be also
4122       pushed on a stack).
4123 
4124       (2) Local Queue. The local queue of the task which is accessed
4125       reasonably efficiently by the task. Other tasks can steal from
4126       it when they run out of work. Throughout the marking phase, a
4127       task attempts to keep its local queue short but not totally
4128       empty, so that entries are available for stealing by other
4129       tasks. Only when there is no more work, a task will totally
4130       drain its local queue.
4131 
4132       (3) Global Mark Stack. This handles local queue overflow. During
4133       marking only sets of entries are moved between it and the local
4134       queues, as access to it requires a mutex and more fine-grain
4135       interaction with it which might cause contention. If it
4136       overflows, then the marking phase should restart and iterate
4137       over the bitmap to identify gray objects. Throughout the marking
4138       phase, tasks attempt to keep the global mark stack at a small
4139       length but not totally empty, so that entries are available for
4140       popping by other tasks. Only when there is no more work, tasks
4141       will totally drain the global mark stack.
4142 
4143       (4) SATB Buffer Queue. This is where completed SATB buffers are
4144       made available. Buffers are regularly removed from this queue
4145       and scanned for roots, so that the queue doesn't get too
4146       long. During remark, all completed buffers are processed, as
4147       well as the filled in parts of any uncompleted buffers.
4148 
4149     The do_marking_step() method tries to abort when the time target
4150     has been reached. There are a few other cases when the
4151     do_marking_step() method also aborts:
4152 
4153       (1) When the marking phase has been aborted (after a Full GC).
4154 
4155       (2) When a global overflow (on the global stack) has been
4156       triggered. Before the task aborts, it will actually sync up with
4157       the other tasks to ensure that all the marking data structures
4158       (local queues, stacks, fingers etc.)  are re-initialized so that
4159       when do_marking_step() completes, the marking phase can
4160       immediately restart.
4161 
4162       (3) When enough completed SATB buffers are available. The
4163       do_marking_step() method only tries to drain SATB buffers right
4164       at the beginning. So, if enough buffers are available, the
4165       marking step aborts and the SATB buffers are processed at
4166       the beginning of the next invocation.
4167 
4168       (4) To yield. when we have to yield then we abort and yield
4169       right at the end of do_marking_step(). This saves us from a lot
4170       of hassle as, by yielding we might allow a Full GC. If this
4171       happens then objects will be compacted underneath our feet, the
4172       heap might shrink, etc. We save checking for this by just
4173       aborting and doing the yield right at the end.
4174 
4175     From the above it follows that the do_marking_step() method should
4176     be called in a loop (or, otherwise, regularly) until it completes.
4177 
4178     If a marking step completes without its has_aborted() flag being
4179     true, it means it has completed the current marking phase (and
4180     also all other marking tasks have done so and have all synced up).
4181 
4182     A method called regular_clock_call() is invoked "regularly" (in
4183     sub ms intervals) throughout marking. It is this clock method that
4184     checks all the abort conditions which were mentioned above and
4185     decides when the task should abort. A work-based scheme is used to
4186     trigger this clock method: when the number of object words the
4187     marking phase has scanned or the number of references the marking
4188     phase has visited reach a given limit. Additional invocations to
4189     the method clock have been planted in a few other strategic places
4190     too. The initial reason for the clock method was to avoid calling
4191     vtime too regularly, as it is quite expensive. So, once it was in
4192     place, it was natural to piggy-back all the other conditions on it
4193     too and not constantly check them throughout the code.
4194 
4195     If do_termination is true then do_marking_step will enter its
4196     termination protocol.
4197 
4198     The value of is_serial must be true when do_marking_step is being
4199     called serially (i.e. by the VMThread) and do_marking_step should
4200     skip any synchronization in the termination and overflow code.
4201     Examples include the serial remark code and the serial reference
4202     processing closures.
4203 
4204     The value of is_serial must be false when do_marking_step is
4205     being called by any of the worker threads in a work gang.
4206     Examples include the concurrent marking code (CMMarkingTask),
4207     the MT remark code, and the MT reference processing closures.
4208 
4209  *****************************************************************************/
4210 
4211 void CMTask::do_marking_step(double time_target_ms,
4212                              bool do_termination,
4213                              bool is_serial) {
4214   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4215   assert(concurrent() == _cm->concurrent(), "they should be the same");
4216 
4217   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4218   assert(_task_queues != NULL, "invariant");
4219   assert(_task_queue != NULL, "invariant");
4220   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4221 
4222   assert(!_claimed,
4223          "only one thread should claim this task at any one time");
4224 
4225   // OK, this doesn't safeguard again all possible scenarios, as it is
4226   // possible for two threads to set the _claimed flag at the same
4227   // time. But it is only for debugging purposes anyway and it will
4228   // catch most problems.
4229   _claimed = true;
4230 
4231   _start_time_ms = os::elapsedVTime() * 1000.0;
4232   statsOnly( _interval_start_time_ms = _start_time_ms );
4233 
4234   // If do_stealing is true then do_marking_step will attempt to
4235   // steal work from the other CMTasks. It only makes sense to
4236   // enable stealing when the termination protocol is enabled
4237   // and do_marking_step() is not being called serially.
4238   bool do_stealing = do_termination && !is_serial;
4239 
4240   double diff_prediction_ms =
4241     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4242   _time_target_ms = time_target_ms - diff_prediction_ms;
4243 
4244   // set up the variables that are used in the work-based scheme to
4245   // call the regular clock method
4246   _words_scanned = 0;
4247   _refs_reached  = 0;
4248   recalculate_limits();
4249 
4250   // clear all flags
4251   clear_has_aborted();
4252   _has_timed_out = false;
4253   _draining_satb_buffers = false;
4254 
4255   ++_calls;
4256 
4257   if (_cm->verbose_low()) {
4258     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4259                            "target = %1.2lfms >>>>>>>>>>",
4260                            _worker_id, _calls, _time_target_ms);
4261   }
4262 
4263   // Set up the bitmap and oop closures. Anything that uses them is
4264   // eventually called from this method, so it is OK to allocate these
4265   // statically.
4266   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4267   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4268   set_cm_oop_closure(&cm_oop_closure);
4269 
4270   if (_cm->has_overflown()) {
4271     // This can happen if the mark stack overflows during a GC pause
4272     // and this task, after a yield point, restarts. We have to abort
4273     // as we need to get into the overflow protocol which happens
4274     // right at the end of this task.
4275     set_has_aborted();
4276   }
4277 
4278   // First drain any available SATB buffers. After this, we will not
4279   // look at SATB buffers before the next invocation of this method.
4280   // If enough completed SATB buffers are queued up, the regular clock
4281   // will abort this task so that it restarts.
4282   drain_satb_buffers();
4283   // ...then partially drain the local queue and the global stack
4284   drain_local_queue(true);
4285   drain_global_stack(true);
4286 
4287   do {
4288     if (!has_aborted() && _curr_region != NULL) {
4289       // This means that we're already holding on to a region.
4290       assert(_finger != NULL, "if region is not NULL, then the finger "
4291              "should not be NULL either");
4292 
4293       // We might have restarted this task after an evacuation pause
4294       // which might have evacuated the region we're holding on to
4295       // underneath our feet. Let's read its limit again to make sure
4296       // that we do not iterate over a region of the heap that
4297       // contains garbage (update_region_limit() will also move
4298       // _finger to the start of the region if it is found empty).
4299       update_region_limit();
4300       // We will start from _finger not from the start of the region,
4301       // as we might be restarting this task after aborting half-way
4302       // through scanning this region. In this case, _finger points to
4303       // the address where we last found a marked object. If this is a
4304       // fresh region, _finger points to start().
4305       MemRegion mr = MemRegion(_finger, _region_limit);
4306 
4307       if (_cm->verbose_low()) {
4308         gclog_or_tty->print_cr("[%u] we're scanning part "
4309                                "["PTR_FORMAT", "PTR_FORMAT") "
4310                                "of region "HR_FORMAT,
4311                                _worker_id, p2i(_finger), p2i(_region_limit),
4312                                HR_FORMAT_PARAMS(_curr_region));
4313       }
4314 
4315       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4316              "humongous regions should go around loop once only");
4317 
4318       // Some special cases:
4319       // If the memory region is empty, we can just give up the region.
4320       // If the current region is humongous then we only need to check
4321       // the bitmap for the bit associated with the start of the object,
4322       // scan the object if it's live, and give up the region.
4323       // Otherwise, let's iterate over the bitmap of the part of the region
4324       // that is left.
4325       // If the iteration is successful, give up the region.
4326       if (mr.is_empty()) {
4327         giveup_current_region();
4328         regular_clock_call();
4329       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4330         if (_nextMarkBitMap->isMarked(mr.start())) {
4331           // The object is marked - apply the closure
4332           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4333           bitmap_closure.do_bit(offset);
4334         }
4335         // Even if this task aborted while scanning the humongous object
4336         // we can (and should) give up the current region.
4337         giveup_current_region();
4338         regular_clock_call();
4339       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4340         giveup_current_region();
4341         regular_clock_call();
4342       } else {
4343         assert(has_aborted(), "currently the only way to do so");
4344         // The only way to abort the bitmap iteration is to return
4345         // false from the do_bit() method. However, inside the
4346         // do_bit() method we move the _finger to point to the
4347         // object currently being looked at. So, if we bail out, we
4348         // have definitely set _finger to something non-null.
4349         assert(_finger != NULL, "invariant");
4350 
4351         // Region iteration was actually aborted. So now _finger
4352         // points to the address of the object we last scanned. If we
4353         // leave it there, when we restart this task, we will rescan
4354         // the object. It is easy to avoid this. We move the finger by
4355         // enough to point to the next possible object header (the
4356         // bitmap knows by how much we need to move it as it knows its
4357         // granularity).
4358         assert(_finger < _region_limit, "invariant");
4359         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4360         // Check if bitmap iteration was aborted while scanning the last object
4361         if (new_finger >= _region_limit) {
4362           giveup_current_region();
4363         } else {
4364           move_finger_to(new_finger);
4365         }
4366       }
4367     }
4368     // At this point we have either completed iterating over the
4369     // region we were holding on to, or we have aborted.
4370 
4371     // We then partially drain the local queue and the global stack.
4372     // (Do we really need this?)
4373     drain_local_queue(true);
4374     drain_global_stack(true);
4375 
4376     // Read the note on the claim_region() method on why it might
4377     // return NULL with potentially more regions available for
4378     // claiming and why we have to check out_of_regions() to determine
4379     // whether we're done or not.
4380     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4381       // We are going to try to claim a new region. We should have
4382       // given up on the previous one.
4383       // Separated the asserts so that we know which one fires.
4384       assert(_curr_region  == NULL, "invariant");
4385       assert(_finger       == NULL, "invariant");
4386       assert(_region_limit == NULL, "invariant");
4387       if (_cm->verbose_low()) {
4388         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4389       }
4390       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4391       if (claimed_region != NULL) {
4392         // Yes, we managed to claim one
4393         statsOnly( ++_regions_claimed );
4394 
4395         if (_cm->verbose_low()) {
4396           gclog_or_tty->print_cr("[%u] we successfully claimed "
4397                                  "region "PTR_FORMAT,
4398                                  _worker_id, p2i(claimed_region));
4399         }
4400 
4401         setup_for_region(claimed_region);
4402         assert(_curr_region == claimed_region, "invariant");
4403       }
4404       // It is important to call the regular clock here. It might take
4405       // a while to claim a region if, for example, we hit a large
4406       // block of empty regions. So we need to call the regular clock
4407       // method once round the loop to make sure it's called
4408       // frequently enough.
4409       regular_clock_call();
4410     }
4411 
4412     if (!has_aborted() && _curr_region == NULL) {
4413       assert(_cm->out_of_regions(),
4414              "at this point we should be out of regions");
4415     }
4416   } while ( _curr_region != NULL && !has_aborted());
4417 
4418   if (!has_aborted()) {
4419     // We cannot check whether the global stack is empty, since other
4420     // tasks might be pushing objects to it concurrently.
4421     assert(_cm->out_of_regions(),
4422            "at this point we should be out of regions");
4423 
4424     if (_cm->verbose_low()) {
4425       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4426     }
4427 
4428     // Try to reduce the number of available SATB buffers so that
4429     // remark has less work to do.
4430     drain_satb_buffers();
4431   }
4432 
4433   // Since we've done everything else, we can now totally drain the
4434   // local queue and global stack.
4435   drain_local_queue(false);
4436   drain_global_stack(false);
4437 
4438   // Attempt at work stealing from other task's queues.
4439   if (do_stealing && !has_aborted()) {
4440     // We have not aborted. This means that we have finished all that
4441     // we could. Let's try to do some stealing...
4442 
4443     // We cannot check whether the global stack is empty, since other
4444     // tasks might be pushing objects to it concurrently.
4445     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4446            "only way to reach here");
4447 
4448     if (_cm->verbose_low()) {
4449       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4450     }
4451 
4452     while (!has_aborted()) {
4453       oop obj;
4454       statsOnly( ++_steal_attempts );
4455 
4456       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4457         if (_cm->verbose_medium()) {
4458           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4459                                  _worker_id, p2i((void*) obj));
4460         }
4461 
4462         statsOnly( ++_steals );
4463 
4464         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4465                "any stolen object should be marked");
4466         scan_object(obj);
4467 
4468         // And since we're towards the end, let's totally drain the
4469         // local queue and global stack.
4470         drain_local_queue(false);
4471         drain_global_stack(false);
4472       } else {
4473         break;
4474       }
4475     }
4476   }
4477 
4478   // If we are about to wrap up and go into termination, check if we
4479   // should raise the overflow flag.
4480   if (do_termination && !has_aborted()) {
4481     if (_cm->force_overflow()->should_force()) {
4482       _cm->set_has_overflown();
4483       regular_clock_call();
4484     }
4485   }
4486 
4487   // We still haven't aborted. Now, let's try to get into the
4488   // termination protocol.
4489   if (do_termination && !has_aborted()) {
4490     // We cannot check whether the global stack is empty, since other
4491     // tasks might be concurrently pushing objects on it.
4492     // Separated the asserts so that we know which one fires.
4493     assert(_cm->out_of_regions(), "only way to reach here");
4494     assert(_task_queue->size() == 0, "only way to reach here");
4495 
4496     if (_cm->verbose_low()) {
4497       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4498     }
4499 
4500     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4501 
4502     // The CMTask class also extends the TerminatorTerminator class,
4503     // hence its should_exit_termination() method will also decide
4504     // whether to exit the termination protocol or not.
4505     bool finished = (is_serial ||
4506                      _cm->terminator()->offer_termination(this));
4507     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4508     _termination_time_ms +=
4509       termination_end_time_ms - _termination_start_time_ms;
4510 
4511     if (finished) {
4512       // We're all done.
4513 
4514       if (_worker_id == 0) {
4515         // let's allow task 0 to do this
4516         if (concurrent()) {
4517           assert(_cm->concurrent_marking_in_progress(), "invariant");
4518           // we need to set this to false before the next
4519           // safepoint. This way we ensure that the marking phase
4520           // doesn't observe any more heap expansions.
4521           _cm->clear_concurrent_marking_in_progress();
4522         }
4523       }
4524 
4525       // We can now guarantee that the global stack is empty, since
4526       // all other tasks have finished. We separated the guarantees so
4527       // that, if a condition is false, we can immediately find out
4528       // which one.
4529       guarantee(_cm->out_of_regions(), "only way to reach here");
4530       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4531       guarantee(_task_queue->size() == 0, "only way to reach here");
4532       guarantee(!_cm->has_overflown(), "only way to reach here");
4533       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4534 
4535       if (_cm->verbose_low()) {
4536         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4537       }
4538     } else {
4539       // Apparently there's more work to do. Let's abort this task. It
4540       // will restart it and we can hopefully find more things to do.
4541 
4542       if (_cm->verbose_low()) {
4543         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4544                                _worker_id);
4545       }
4546 
4547       set_has_aborted();
4548       statsOnly( ++_aborted_termination );
4549     }
4550   }
4551 
4552   // Mainly for debugging purposes to make sure that a pointer to the
4553   // closure which was statically allocated in this frame doesn't
4554   // escape it by accident.
4555   set_cm_oop_closure(NULL);
4556   double end_time_ms = os::elapsedVTime() * 1000.0;
4557   double elapsed_time_ms = end_time_ms - _start_time_ms;
4558   // Update the step history.
4559   _step_times_ms.add(elapsed_time_ms);
4560 
4561   if (has_aborted()) {
4562     // The task was aborted for some reason.
4563 
4564     statsOnly( ++_aborted );
4565 
4566     if (_has_timed_out) {
4567       double diff_ms = elapsed_time_ms - _time_target_ms;
4568       // Keep statistics of how well we did with respect to hitting
4569       // our target only if we actually timed out (if we aborted for
4570       // other reasons, then the results might get skewed).
4571       _marking_step_diffs_ms.add(diff_ms);
4572     }
4573 
4574     if (_cm->has_overflown()) {
4575       // This is the interesting one. We aborted because a global
4576       // overflow was raised. This means we have to restart the
4577       // marking phase and start iterating over regions. However, in
4578       // order to do this we have to make sure that all tasks stop
4579       // what they are doing and re-initialize in a safe manner. We
4580       // will achieve this with the use of two barrier sync points.
4581 
4582       if (_cm->verbose_low()) {
4583         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4584       }
4585 
4586       if (!is_serial) {
4587         // We only need to enter the sync barrier if being called
4588         // from a parallel context
4589         _cm->enter_first_sync_barrier(_worker_id);
4590 
4591         // When we exit this sync barrier we know that all tasks have
4592         // stopped doing marking work. So, it's now safe to
4593         // re-initialize our data structures. At the end of this method,
4594         // task 0 will clear the global data structures.
4595       }
4596 
4597       statsOnly( ++_aborted_overflow );
4598 
4599       // We clear the local state of this task...
4600       clear_region_fields();
4601 
4602       if (!is_serial) {
4603         // ...and enter the second barrier.
4604         _cm->enter_second_sync_barrier(_worker_id);
4605       }
4606       // At this point, if we're during the concurrent phase of
4607       // marking, everything has been re-initialized and we're
4608       // ready to restart.
4609     }
4610 
4611     if (_cm->verbose_low()) {
4612       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4613                              "elapsed = %1.2lfms <<<<<<<<<<",
4614                              _worker_id, _time_target_ms, elapsed_time_ms);
4615       if (_cm->has_aborted()) {
4616         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4617                                _worker_id);
4618       }
4619     }
4620   } else {
4621     if (_cm->verbose_low()) {
4622       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4623                              "elapsed = %1.2lfms <<<<<<<<<<",
4624                              _worker_id, _time_target_ms, elapsed_time_ms);
4625     }
4626   }
4627 
4628   _claimed = false;
4629 }
4630 
4631 CMTask::CMTask(uint worker_id,
4632                ConcurrentMark* cm,
4633                size_t* marked_bytes,
4634                BitMap* card_bm,
4635                CMTaskQueue* task_queue,
4636                CMTaskQueueSet* task_queues)
4637   : _g1h(G1CollectedHeap::heap()),
4638     _worker_id(worker_id), _cm(cm),
4639     _claimed(false),
4640     _nextMarkBitMap(NULL), _hash_seed(17),
4641     _task_queue(task_queue),
4642     _task_queues(task_queues),
4643     _cm_oop_closure(NULL),
4644     _marked_bytes_array(marked_bytes),
4645     _card_bm(card_bm) {
4646   guarantee(task_queue != NULL, "invariant");
4647   guarantee(task_queues != NULL, "invariant");
4648 
4649   statsOnly( _clock_due_to_scanning = 0;
4650              _clock_due_to_marking  = 0 );
4651 
4652   _marking_step_diffs_ms.add(0.5);
4653 }
4654 
4655 // These are formatting macros that are used below to ensure
4656 // consistent formatting. The *_H_* versions are used to format the
4657 // header for a particular value and they should be kept consistent
4658 // with the corresponding macro. Also note that most of the macros add
4659 // the necessary white space (as a prefix) which makes them a bit
4660 // easier to compose.
4661 
4662 // All the output lines are prefixed with this string to be able to
4663 // identify them easily in a large log file.
4664 #define G1PPRL_LINE_PREFIX            "###"
4665 
4666 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4667 #ifdef _LP64
4668 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4669 #else // _LP64
4670 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4671 #endif // _LP64
4672 
4673 // For per-region info
4674 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4675 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4676 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4677 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4678 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4679 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4680 
4681 // For summary info
4682 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4683 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4684 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4685 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4686 
4687 G1PrintRegionLivenessInfoClosure::
4688 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4689   : _out(out),
4690     _total_used_bytes(0), _total_capacity_bytes(0),
4691     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4692     _hum_used_bytes(0), _hum_capacity_bytes(0),
4693     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4694     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4695   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4696   MemRegion g1_reserved = g1h->g1_reserved();
4697   double now = os::elapsedTime();
4698 
4699   // Print the header of the output.
4700   _out->cr();
4701   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4702   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4703                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4704                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4705                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4706                  HeapRegion::GrainBytes);
4707   _out->print_cr(G1PPRL_LINE_PREFIX);
4708   _out->print_cr(G1PPRL_LINE_PREFIX
4709                 G1PPRL_TYPE_H_FORMAT
4710                 G1PPRL_ADDR_BASE_H_FORMAT
4711                 G1PPRL_BYTE_H_FORMAT
4712                 G1PPRL_BYTE_H_FORMAT
4713                 G1PPRL_BYTE_H_FORMAT
4714                 G1PPRL_DOUBLE_H_FORMAT
4715                 G1PPRL_BYTE_H_FORMAT
4716                 G1PPRL_BYTE_H_FORMAT,
4717                 "type", "address-range",
4718                 "used", "prev-live", "next-live", "gc-eff",
4719                 "remset", "code-roots");
4720   _out->print_cr(G1PPRL_LINE_PREFIX
4721                 G1PPRL_TYPE_H_FORMAT
4722                 G1PPRL_ADDR_BASE_H_FORMAT
4723                 G1PPRL_BYTE_H_FORMAT
4724                 G1PPRL_BYTE_H_FORMAT
4725                 G1PPRL_BYTE_H_FORMAT
4726                 G1PPRL_DOUBLE_H_FORMAT
4727                 G1PPRL_BYTE_H_FORMAT
4728                 G1PPRL_BYTE_H_FORMAT,
4729                 "", "",
4730                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4731                 "(bytes)", "(bytes)");
4732 }
4733 
4734 // It takes as a parameter a reference to one of the _hum_* fields, it
4735 // deduces the corresponding value for a region in a humongous region
4736 // series (either the region size, or what's left if the _hum_* field
4737 // is < the region size), and updates the _hum_* field accordingly.
4738 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4739   size_t bytes = 0;
4740   // The > 0 check is to deal with the prev and next live bytes which
4741   // could be 0.
4742   if (*hum_bytes > 0) {
4743     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4744     *hum_bytes -= bytes;
4745   }
4746   return bytes;
4747 }
4748 
4749 // It deduces the values for a region in a humongous region series
4750 // from the _hum_* fields and updates those accordingly. It assumes
4751 // that that _hum_* fields have already been set up from the "starts
4752 // humongous" region and we visit the regions in address order.
4753 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4754                                                      size_t* capacity_bytes,
4755                                                      size_t* prev_live_bytes,
4756                                                      size_t* next_live_bytes) {
4757   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4758   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4759   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4760   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4761   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4762 }
4763 
4764 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4765   const char* type = "";
4766   HeapWord* bottom       = r->bottom();
4767   HeapWord* end          = r->end();
4768   size_t capacity_bytes  = r->capacity();
4769   size_t used_bytes      = r->used();
4770   size_t prev_live_bytes = r->live_bytes();
4771   size_t next_live_bytes = r->next_live_bytes();
4772   double gc_eff          = r->gc_efficiency();
4773   size_t remset_bytes    = r->rem_set()->mem_size();
4774   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4775 
4776   if (r->used() == 0) {
4777     type = "FREE";
4778   } else if (r->is_survivor()) {
4779     type = "SURV";
4780   } else if (r->is_young()) {
4781     type = "EDEN";
4782   } else if (r->startsHumongous()) {
4783     type = "HUMS";
4784 
4785     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4786            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4787            "they should have been zeroed after the last time we used them");
4788     // Set up the _hum_* fields.
4789     _hum_capacity_bytes  = capacity_bytes;
4790     _hum_used_bytes      = used_bytes;
4791     _hum_prev_live_bytes = prev_live_bytes;
4792     _hum_next_live_bytes = next_live_bytes;
4793     get_hum_bytes(&used_bytes, &capacity_bytes,
4794                   &prev_live_bytes, &next_live_bytes);
4795     end = bottom + HeapRegion::GrainWords;
4796   } else if (r->continuesHumongous()) {
4797     type = "HUMC";
4798     get_hum_bytes(&used_bytes, &capacity_bytes,
4799                   &prev_live_bytes, &next_live_bytes);
4800     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4801   } else {
4802     type = "OLD";
4803   }
4804 
4805   _total_used_bytes      += used_bytes;
4806   _total_capacity_bytes  += capacity_bytes;
4807   _total_prev_live_bytes += prev_live_bytes;
4808   _total_next_live_bytes += next_live_bytes;
4809   _total_remset_bytes    += remset_bytes;
4810   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4811 
4812   // Print a line for this particular region.
4813   _out->print_cr(G1PPRL_LINE_PREFIX
4814                  G1PPRL_TYPE_FORMAT
4815                  G1PPRL_ADDR_BASE_FORMAT
4816                  G1PPRL_BYTE_FORMAT
4817                  G1PPRL_BYTE_FORMAT
4818                  G1PPRL_BYTE_FORMAT
4819                  G1PPRL_DOUBLE_FORMAT
4820                  G1PPRL_BYTE_FORMAT
4821                  G1PPRL_BYTE_FORMAT,
4822                  type, p2i(bottom), p2i(end),
4823                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4824                  remset_bytes, strong_code_roots_bytes);
4825 
4826   return false;
4827 }
4828 
4829 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4830   // add static memory usages to remembered set sizes
4831   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4832   // Print the footer of the output.
4833   _out->print_cr(G1PPRL_LINE_PREFIX);
4834   _out->print_cr(G1PPRL_LINE_PREFIX
4835                  " SUMMARY"
4836                  G1PPRL_SUM_MB_FORMAT("capacity")
4837                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4838                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4839                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4840                  G1PPRL_SUM_MB_FORMAT("remset")
4841                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4842                  bytes_to_mb(_total_capacity_bytes),
4843                  bytes_to_mb(_total_used_bytes),
4844                  perc(_total_used_bytes, _total_capacity_bytes),
4845                  bytes_to_mb(_total_prev_live_bytes),
4846                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4847                  bytes_to_mb(_total_next_live_bytes),
4848                  perc(_total_next_live_bytes, _total_capacity_bytes),
4849                  bytes_to_mb(_total_remset_bytes),
4850                  bytes_to_mb(_total_strong_code_roots_bytes));
4851   _out->cr();
4852 }