New src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "code/codeCache.hpp"
  28 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  29 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  32 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  33 #include "gc_implementation/g1/g1Log.hpp"
  34 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  35 #include "gc_implementation/g1/g1RemSet.hpp"
  36 #include "gc_implementation/g1/heapRegion.inline.hpp"
  37 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  38 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  39 #include "gc_implementation/g1/heapRegionSet.inline.hpp"
  40 #include "gc_implementation/shared/vmGCOperations.hpp"
  41 #include "gc_implementation/shared/gcTimer.hpp"
  42 #include "gc_implementation/shared/gcTrace.hpp"
  43 #include "gc_implementation/shared/gcTraceTime.hpp"
  44 #include "memory/allocation.hpp"
  45 #include "memory/genOopClosures.inline.hpp"
  46 #include "memory/referencePolicy.hpp"
  47 #include "memory/resourceArea.hpp"
  48 #include "oops/oop.inline.hpp"
  49 #include "runtime/handles.inline.hpp"
  50 #include "runtime/java.hpp"
  51 #include "runtime/atomic.inline.hpp"
  52 #include "runtime/prefetch.inline.hpp"
  53 #include "services/memTracker.hpp"
  54 
  55 // Concurrent marking bit map wrapper
  56 
  57 CMBitMapRO::CMBitMapRO(int shifter) :
  58   _bm(),
  59   _shifter(shifter) {
  60   _bmStartWord = 0;
  61   _bmWordSize = 0;
  62 }
  63 
  64 HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  65                                                const HeapWord* limit) const {
  66   // First we must round addr *up* to a possible object boundary.
  67   addr = (HeapWord*)align_size_up((intptr_t)addr,
  68                                   HeapWordSize << _shifter);
  69   size_t addrOffset = heapWordToOffset(addr);
  70   if (limit == NULL) {
  71     limit = _bmStartWord + _bmWordSize;
  72   }
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
  83                                                  const HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) {
  86     limit = _bmStartWord + _bmWordSize;
  87   }
  88   size_t limitOffset = heapWordToOffset(limit);
  89   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  90   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  91   assert(nextAddr >= addr, "get_next_one postcondition");
  92   assert(nextAddr == limit || !isMarked(nextAddr),
  93          "get_next_one postcondition");
  94   return nextAddr;
  95 }
  96 
  97 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  98   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  99   return (int) (diff >> _shifter);
 100 }
 101 
 102 #ifndef PRODUCT
 103 bool CMBitMapRO::covers(MemRegion heap_rs) const {
 104   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 105   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 106          "size inconsistency");
 107   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
 108          _bmWordSize  == heap_rs.word_size();
 109 }
 110 #endif
 111 
 112 void CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
 113   _bm.print_on_error(st, prefix);
 114 }
 115 
 116 size_t CMBitMap::compute_size(size_t heap_size) {
 117   return heap_size / mark_distance();
 118 }
 119 
 120 size_t CMBitMap::mark_distance() {
 121   return MinObjAlignmentInBytes * BitsPerByte;
 122 }
 123 
 124 void CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 125   _bmStartWord = heap.start();
 126   _bmWordSize = heap.word_size();
 127 
 128   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 129   _bm.set_size(_bmWordSize >> _shifter);
 130 
 131   storage->set_mapping_changed_listener(&_listener);
 132 }
 133 
 134 void CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions) {
 135   // We need to clear the bitmap on commit, removing any existing information.
 136   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 137   _bm->clearRange(mr);
 138 }
 139 
 140 // Closure used for clearing the given mark bitmap.
 141 class ClearBitmapHRClosure : public HeapRegionClosure {
 142  private:
 143   ConcurrentMark* _cm;
 144   CMBitMap* _bitmap;
 145   bool _may_yield;      // The closure may yield during iteration. If yielded, abort the iteration.
 146  public:
 147   ClearBitmapHRClosure(ConcurrentMark* cm, CMBitMap* bitmap, bool may_yield) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap), _may_yield(may_yield) {
 148     assert(!may_yield || cm != NULL, "CM must be non-NULL if this closure is expected to yield.");
 149   }
 150 
 151   virtual bool doHeapRegion(HeapRegion* r) {
 152     size_t const chunk_size_in_words = M / HeapWordSize;
 153 
 154     HeapWord* cur = r->bottom();
 155     HeapWord* const end = r->end();
 156 
 157     while (cur < end) {
 158       MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 159       _bitmap->clearRange(mr);
 160 
 161       cur += chunk_size_in_words;
 162 
 163       // Abort iteration if after yielding the marking has been aborted.
 164       if (_may_yield && _cm->do_yield_check() && _cm->has_aborted()) {
 165         return true;
 166       }
 167       // Repeat the asserts from before the start of the closure. We will do them
 168       // as asserts here to minimize their overhead on the product. However, we
 169       // will have them as guarantees at the beginning / end of the bitmap
 170       // clearing to get some checking in the product.
 171       assert(!_may_yield || _cm->cmThread()->during_cycle(), "invariant");
 172       assert(!_may_yield || !G1CollectedHeap::heap()->mark_in_progress(), "invariant");
 173     }
 174 
 175     return false;
 176   }
 177 };
 178 
 179 class ParClearNextMarkBitmapTask : public AbstractGangTask {
 180   ClearBitmapHRClosure* _cl;
 181   uint                  _workload;
 182   bool                  _suspendible; // If the task is suspendible, workers must join the STS.
 183 public:
 184   ParClearNextMarkBitmapTask(ClearBitmapHRClosure *cl, uint n_workers, bool suspendible) : _cl(cl), _suspendible(suspendible), AbstractGangTask("Parallel Clear Bitmap Task") {
 185     assert(n_workers > 0, "Must have at least one worker.");
 186     uint max_regions = G1CollectedHeap::heap()->max_regions();
 187     _workload = (max_regions + n_workers - 1) / n_workers;
 188     assert(n_workers * _workload >= max_regions, "Workloads should cover all regions.");
 189   }
 190 
 191   void work(uint worker_id) {
 192     if (_suspendible) {
 193       SuspendibleThreadSet::join();
 194     }
 195     uint start = worker_id * _workload;
 196     uint end = MIN2(start + _workload, G1CollectedHeap::heap()->max_regions());
 197     G1CollectedHeap::heap()->heap_region_iterate_range(_cl, start, end);
 198     if (_suspendible) {
 199       SuspendibleThreadSet::leave();
 200     }
 201   }
 202 };
 203 
 204 void CMBitMap::clearAll() {
 205   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 206   ClearBitmapHRClosure cl(NULL, this, false /* may_yield */);
 207   if (g1h->use_parallel_gc_threads()) {
 208     uint n_workers = g1h->workers()->active_workers();
 209     ParClearNextMarkBitmapTask task(&cl, n_workers, false);
 210     g1h->workers()->run_task(&task);
 211   } else {
 212     g1h->heap_region_iterate(&cl);
 213   }
 214   guarantee(cl.complete(), "Must have completed iteration.");
 215   return;
 216 }
 217 
 218 void CMBitMap::markRange(MemRegion mr) {
 219   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 220   assert(!mr.is_empty(), "unexpected empty region");
 221   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 222           ((HeapWord *) mr.end())),
 223          "markRange memory region end is not card aligned");
 224   // convert address range into offset range
 225   _bm.at_put_range(heapWordToOffset(mr.start()),
 226                    heapWordToOffset(mr.end()), true);
 227 }
 228 
 229 void CMBitMap::clearRange(MemRegion mr) {
 230   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 231   assert(!mr.is_empty(), "unexpected empty region");
 232   // convert address range into offset range
 233   _bm.at_put_range(heapWordToOffset(mr.start()),
 234                    heapWordToOffset(mr.end()), false);
 235 }
 236 
 237 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 238                                             HeapWord* end_addr) {
 239   HeapWord* start = getNextMarkedWordAddress(addr);
 240   start = MIN2(start, end_addr);
 241   HeapWord* end   = getNextUnmarkedWordAddress(start);
 242   end = MIN2(end, end_addr);
 243   assert(start <= end, "Consistency check");
 244   MemRegion mr(start, end);
 245   if (!mr.is_empty()) {
 246     clearRange(mr);
 247   }
 248   return mr;
 249 }
 250 
 251 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 252   _base(NULL), _cm(cm)
 253 #ifdef ASSERT
 254   , _drain_in_progress(false)
 255   , _drain_in_progress_yields(false)
 256 #endif
 257 {}
 258 
 259 bool CMMarkStack::allocate(size_t capacity) {
 260   // allocate a stack of the requisite depth
 261   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 262   if (!rs.is_reserved()) {
 263     warning("ConcurrentMark MarkStack allocation failure");
 264     return false;
 265   }
 266   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 267   if (!_virtual_space.initialize(rs, rs.size())) {
 268     warning("ConcurrentMark MarkStack backing store failure");
 269     // Release the virtual memory reserved for the marking stack
 270     rs.release();
 271     return false;
 272   }
 273   assert(_virtual_space.committed_size() == rs.size(),
 274          "Didn't reserve backing store for all of ConcurrentMark stack?");
 275   _base = (oop*) _virtual_space.low();
 276   setEmpty();
 277   _capacity = (jint) capacity;
 278   _saved_index = -1;
 279   _should_expand = false;
 280   NOT_PRODUCT(_max_depth = 0);
 281   return true;
 282 }
 283 
 284 void CMMarkStack::expand() {
 285   // Called, during remark, if we've overflown the marking stack during marking.
 286   assert(isEmpty(), "stack should been emptied while handling overflow");
 287   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 288   // Clear expansion flag
 289   _should_expand = false;
 290   if (_capacity == (jint) MarkStackSizeMax) {
 291     if (PrintGCDetails && Verbose) {
 292       gclog_or_tty->print_cr(" (benign) Can't expand marking stack capacity, at max size limit");
 293     }
 294     return;
 295   }
 296   // Double capacity if possible
 297   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 298   // Do not give up existing stack until we have managed to
 299   // get the double capacity that we desired.
 300   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 301                                                            sizeof(oop)));
 302   if (rs.is_reserved()) {
 303     // Release the backing store associated with old stack
 304     _virtual_space.release();
 305     // Reinitialize virtual space for new stack
 306     if (!_virtual_space.initialize(rs, rs.size())) {
 307       fatal("Not enough swap for expanded marking stack capacity");
 308     }
 309     _base = (oop*)(_virtual_space.low());
 310     _index = 0;
 311     _capacity = new_capacity;
 312   } else {
 313     if (PrintGCDetails && Verbose) {
 314       // Failed to double capacity, continue;
 315       gclog_or_tty->print(" (benign) Failed to expand marking stack capacity from "
 316                           SIZE_FORMAT"K to " SIZE_FORMAT"K",
 317                           _capacity / K, new_capacity / K);
 318     }
 319   }
 320 }
 321 
 322 void CMMarkStack::set_should_expand() {
 323   // If we're resetting the marking state because of an
 324   // marking stack overflow, record that we should, if
 325   // possible, expand the stack.
 326   _should_expand = _cm->has_overflown();
 327 }
 328 
 329 CMMarkStack::~CMMarkStack() {
 330   if (_base != NULL) {
 331     _base = NULL;
 332     _virtual_space.release();
 333   }
 334 }
 335 
 336 void CMMarkStack::par_push(oop ptr) {
 337   while (true) {
 338     if (isFull()) {
 339       _overflow = true;
 340       return;
 341     }
 342     // Otherwise...
 343     jint index = _index;
 344     jint next_index = index+1;
 345     jint res = Atomic::cmpxchg(next_index, &_index, index);
 346     if (res == index) {
 347       _base[index] = ptr;
 348       // Note that we don't maintain this atomically.  We could, but it
 349       // doesn't seem necessary.
 350       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 351       return;
 352     }
 353     // Otherwise, we need to try again.
 354   }
 355 }
 356 
 357 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 358   while (true) {
 359     if (isFull()) {
 360       _overflow = true;
 361       return;
 362     }
 363     // Otherwise...
 364     jint index = _index;
 365     jint next_index = index + n;
 366     if (next_index > _capacity) {
 367       _overflow = true;
 368       return;
 369     }
 370     jint res = Atomic::cmpxchg(next_index, &_index, index);
 371     if (res == index) {
 372       for (int i = 0; i < n; i++) {
 373         int  ind = index + i;
 374         assert(ind < _capacity, "By overflow test above.");
 375         _base[ind] = ptr_arr[i];
 376       }
 377       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 378       return;
 379     }
 380     // Otherwise, we need to try again.
 381   }
 382 }
 383 
 384 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 385   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 386   jint start = _index;
 387   jint next_index = start + n;
 388   if (next_index > _capacity) {
 389     _overflow = true;
 390     return;
 391   }
 392   // Otherwise.
 393   _index = next_index;
 394   for (int i = 0; i < n; i++) {
 395     int ind = start + i;
 396     assert(ind < _capacity, "By overflow test above.");
 397     _base[ind] = ptr_arr[i];
 398   }
 399   NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 400 }
 401 
 402 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 403   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 404   jint index = _index;
 405   if (index == 0) {
 406     *n = 0;
 407     return false;
 408   } else {
 409     int k = MIN2(max, index);
 410     jint  new_ind = index - k;
 411     for (int j = 0; j < k; j++) {
 412       ptr_arr[j] = _base[new_ind + j];
 413     }
 414     _index = new_ind;
 415     *n = k;
 416     return true;
 417   }
 418 }
 419 
 420 template<class OopClosureClass>
 421 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 422   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 423          || SafepointSynchronize::is_at_safepoint(),
 424          "Drain recursion must be yield-safe.");
 425   bool res = true;
 426   debug_only(_drain_in_progress = true);
 427   debug_only(_drain_in_progress_yields = yield_after);
 428   while (!isEmpty()) {
 429     oop newOop = pop();
 430     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 431     assert(newOop->is_oop(), "Expected an oop");
 432     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 433            "only grey objects on this stack");
 434     newOop->oop_iterate(cl);
 435     if (yield_after && _cm->do_yield_check()) {
 436       res = false;
 437       break;
 438     }
 439   }
 440   debug_only(_drain_in_progress = false);
 441   return res;
 442 }
 443 
 444 void CMMarkStack::note_start_of_gc() {
 445   assert(_saved_index == -1,
 446          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 447   _saved_index = _index;
 448 }
 449 
 450 void CMMarkStack::note_end_of_gc() {
 451   // This is intentionally a guarantee, instead of an assert. If we
 452   // accidentally add something to the mark stack during GC, it
 453   // will be a correctness issue so it's better if we crash. we'll
 454   // only check this once per GC anyway, so it won't be a performance
 455   // issue in any way.
 456   guarantee(_saved_index == _index,
 457             err_msg("saved index: %d index: %d", _saved_index, _index));
 458   _saved_index = -1;
 459 }
 460 
 461 void CMMarkStack::oops_do(OopClosure* f) {
 462   assert(_saved_index == _index,
 463          err_msg("saved index: %d index: %d", _saved_index, _index));
 464   for (int i = 0; i < _index; i += 1) {
 465     f->do_oop(&_base[i]);
 466   }
 467 }
 468 
 469 bool ConcurrentMark::not_yet_marked(oop obj) const {
 470   return _g1h->is_obj_ill(obj);
 471 }
 472 
 473 CMRootRegions::CMRootRegions() :
 474   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 475   _should_abort(false),  _next_survivor(NULL) { }
 476 
 477 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 478   _young_list = g1h->young_list();
 479   _cm = cm;
 480 }
 481 
 482 void CMRootRegions::prepare_for_scan() {
 483   assert(!scan_in_progress(), "pre-condition");
 484 
 485   // Currently, only survivors can be root regions.
 486   assert(_next_survivor == NULL, "pre-condition");
 487   _next_survivor = _young_list->first_survivor_region();
 488   _scan_in_progress = (_next_survivor != NULL);
 489   _should_abort = false;
 490 }
 491 
 492 HeapRegion* CMRootRegions::claim_next() {
 493   if (_should_abort) {
 494     // If someone has set the should_abort flag, we return NULL to
 495     // force the caller to bail out of their loop.
 496     return NULL;
 497   }
 498 
 499   // Currently, only survivors can be root regions.
 500   HeapRegion* res = _next_survivor;
 501   if (res != NULL) {
 502     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 503     // Read it again in case it changed while we were waiting for the lock.
 504     res = _next_survivor;
 505     if (res != NULL) {
 506       if (res == _young_list->last_survivor_region()) {
 507         // We just claimed the last survivor so store NULL to indicate
 508         // that we're done.
 509         _next_survivor = NULL;
 510       } else {
 511         _next_survivor = res->get_next_young_region();
 512       }
 513     } else {
 514       // Someone else claimed the last survivor while we were trying
 515       // to take the lock so nothing else to do.
 516     }
 517   }
 518   assert(res == NULL || res->is_survivor(), "post-condition");
 519 
 520   return res;
 521 }
 522 
 523 void CMRootRegions::scan_finished() {
 524   assert(scan_in_progress(), "pre-condition");
 525 
 526   // Currently, only survivors can be root regions.
 527   if (!_should_abort) {
 528     assert(_next_survivor == NULL, "we should have claimed all survivors");
 529   }
 530   _next_survivor = NULL;
 531 
 532   {
 533     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 534     _scan_in_progress = false;
 535     RootRegionScan_lock->notify_all();
 536   }
 537 }
 538 
 539 bool CMRootRegions::wait_until_scan_finished() {
 540   if (!scan_in_progress()) return false;
 541 
 542   {
 543     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 544     while (scan_in_progress()) {
 545       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 546     }
 547   }
 548   return true;
 549 }
 550 
 551 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 552 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 553 #endif // _MSC_VER
 554 
 555 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 556   return MAX2((n_par_threads + 2) / 4, 1U);
 557 }
 558 
 559 ConcurrentMark::ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 560   _g1h(g1h),
 561   _markBitMap1(),
 562   _markBitMap2(),
 563   _parallel_marking_threads(0),
 564   _max_parallel_marking_threads(0),
 565   _sleep_factor(0.0),
 566   _marking_task_overhead(1.0),
 567   _cleanup_sleep_factor(0.0),
 568   _cleanup_task_overhead(1.0),
 569   _cleanup_list("Cleanup List"),
 570   _region_bm((BitMap::idx_t)(g1h->max_regions()), false /* in_resource_area*/),
 571   _card_bm((g1h->reserved_region().byte_size() + CardTableModRefBS::card_size - 1) >>
 572             CardTableModRefBS::card_shift,
 573             false /* in_resource_area*/),
 574 
 575   _prevMarkBitMap(&_markBitMap1),
 576   _nextMarkBitMap(&_markBitMap2),
 577 
 578   _markStack(this),
 579   // _finger set in set_non_marking_state
 580 
 581   _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
 582   // _active_tasks set in set_non_marking_state
 583   // _tasks set inside the constructor
 584   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
 585   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 586 
 587   _has_overflown(false),
 588   _concurrent(false),
 589   _has_aborted(false),
 590   _aborted_gc_id(GCId::undefined()),
 591   _restart_for_overflow(false),
 592   _concurrent_marking_in_progress(false),
 593 
 594   // _verbose_level set below
 595 
 596   _init_times(),
 597   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 598   _cleanup_times(),
 599   _total_counting_time(0.0),
 600   _total_rs_scrub_time(0.0),
 601 
 602   _parallel_workers(NULL),
 603 
 604   _count_card_bitmaps(NULL),
 605   _count_marked_bytes(NULL),
 606   _completed_initialization(false) {
 607   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 608   if (verbose_level < no_verbose) {
 609     verbose_level = no_verbose;
 610   }
 611   if (verbose_level > high_verbose) {
 612     verbose_level = high_verbose;
 613   }
 614   _verbose_level = verbose_level;
 615 
 616   if (verbose_low()) {
 617     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 618                            "heap end = " PTR_FORMAT, p2i(_heap_start), p2i(_heap_end));
 619   }
 620 
 621   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 622   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 623 
 624   // Create & start a ConcurrentMark thread.
 625   _cmThread = new ConcurrentMarkThread(this);
 626   assert(cmThread() != NULL, "CM Thread should have been created");
 627   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 628   if (_cmThread->osthread() == NULL) {
 629       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 630   }
 631 
 632   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 633   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 634   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 635 
 636   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 637   satb_qs.set_buffer_size(G1SATBBufferSize);
 638 
 639   _root_regions.init(_g1h, this);
 640 
 641   if (ConcGCThreads > ParallelGCThreads) {
 642     warning("Can't have more ConcGCThreads (" UINTX_FORMAT ") "
 643             "than ParallelGCThreads (" UINTX_FORMAT ").",
 644             ConcGCThreads, ParallelGCThreads);
 645     return;
 646   }
 647   if (ParallelGCThreads == 0) {
 648     // if we are not running with any parallel GC threads we will not
 649     // spawn any marking threads either
 650     _parallel_marking_threads =       0;
 651     _max_parallel_marking_threads =   0;
 652     _sleep_factor             =     0.0;
 653     _marking_task_overhead    =     1.0;
 654   } else {
 655     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 656       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 657       // if both are set
 658       _sleep_factor             = 0.0;
 659       _marking_task_overhead    = 1.0;
 660     } else if (G1MarkingOverheadPercent > 0) {
 661       // We will calculate the number of parallel marking threads based
 662       // on a target overhead with respect to the soft real-time goal
 663       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 664       double overall_cm_overhead =
 665         (double) MaxGCPauseMillis * marking_overhead /
 666         (double) GCPauseIntervalMillis;
 667       double cpu_ratio = 1.0 / (double) os::processor_count();
 668       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 669       double marking_task_overhead =
 670         overall_cm_overhead / marking_thread_num *
 671                                                 (double) os::processor_count();
 672       double sleep_factor =
 673                          (1.0 - marking_task_overhead) / marking_task_overhead;
 674 
 675       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 676       _sleep_factor             = sleep_factor;
 677       _marking_task_overhead    = marking_task_overhead;
 678     } else {
 679       // Calculate the number of parallel marking threads by scaling
 680       // the number of parallel GC threads.
 681       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 682       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 683       _sleep_factor             = 0.0;
 684       _marking_task_overhead    = 1.0;
 685     }
 686 
 687     assert(ConcGCThreads > 0, "Should have been set");
 688     _parallel_marking_threads = (uint) ConcGCThreads;
 689     _max_parallel_marking_threads = _parallel_marking_threads;
 690 
 691     if (parallel_marking_threads() > 1) {
 692       _cleanup_task_overhead = 1.0;
 693     } else {
 694       _cleanup_task_overhead = marking_task_overhead();
 695     }
 696     _cleanup_sleep_factor =
 697                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 698 
 699 #if 0
 700     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 701     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 702     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 703     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 704     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 705 #endif
 706 
 707     guarantee(parallel_marking_threads() > 0, "peace of mind");
 708     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 709          _max_parallel_marking_threads, false, true);
 710     if (_parallel_workers == NULL) {
 711       vm_exit_during_initialization("Failed necessary allocation.");
 712     } else {
 713       _parallel_workers->initialize_workers();
 714     }
 715   }
 716 
 717   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 718     uintx mark_stack_size =
 719       MIN2(MarkStackSizeMax,
 720           MAX2(MarkStackSize, (uintx) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 721     // Verify that the calculated value for MarkStackSize is in range.
 722     // It would be nice to use the private utility routine from Arguments.
 723     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 724       warning("Invalid value calculated for MarkStackSize (" UINTX_FORMAT "): "
 725               "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 726               mark_stack_size, (uintx) 1, MarkStackSizeMax);
 727       return;
 728     }
 729     FLAG_SET_ERGO(uintx, MarkStackSize, mark_stack_size);
 730   } else {
 731     // Verify MarkStackSize is in range.
 732     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 733       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 734         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 735           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT "): "
 736                   "must be between " UINTX_FORMAT " and " UINTX_FORMAT,
 737                   MarkStackSize, (uintx) 1, MarkStackSizeMax);
 738           return;
 739         }
 740       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 741         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 742           warning("Invalid value specified for MarkStackSize (" UINTX_FORMAT ")"
 743                   " or for MarkStackSizeMax (" UINTX_FORMAT ")",
 744                   MarkStackSize, MarkStackSizeMax);
 745           return;
 746         }
 747       }
 748     }
 749   }
 750 
 751   if (!_markStack.allocate(MarkStackSize)) {
 752     warning("Failed to allocate CM marking stack");
 753     return;
 754   }
 755 
 756   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_worker_id, mtGC);
 757   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 758 
 759   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_worker_id, mtGC);
 760   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_worker_id, mtGC);
 761 
 762   BitMap::idx_t card_bm_size = _card_bm.size();
 763 
 764   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 765   _active_tasks = _max_worker_id;
 766 
 767   size_t max_regions = (size_t) _g1h->max_regions();
 768   for (uint i = 0; i < _max_worker_id; ++i) {
 769     CMTaskQueue* task_queue = new CMTaskQueue();
 770     task_queue->initialize();
 771     _task_queues->register_queue(i, task_queue);
 772 
 773     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 774     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, max_regions, mtGC);
 775 
 776     _tasks[i] = new CMTask(i, this,
 777                            _count_marked_bytes[i],
 778                            &_count_card_bitmaps[i],
 779                            task_queue, _task_queues);
 780 
 781     _accum_task_vtime[i] = 0.0;
 782   }
 783 
 784   // Calculate the card number for the bottom of the heap. Used
 785   // in biasing indexes into the accounting card bitmaps.
 786   _heap_bottom_card_num =
 787     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 788                                 CardTableModRefBS::card_shift);
 789 
 790   // Clear all the liveness counting data
 791   clear_all_count_data();
 792 
 793   // so that the call below can read a sensible value
 794   _heap_start = g1h->reserved_region().start();
 795   set_non_marking_state();
 796   _completed_initialization = true;
 797 }
 798 
 799 void ConcurrentMark::reset() {
 800   // Starting values for these two. This should be called in a STW
 801   // phase.
 802   MemRegion reserved = _g1h->g1_reserved();
 803   _heap_start = reserved.start();
 804   _heap_end   = reserved.end();
 805 
 806   // Separated the asserts so that we know which one fires.
 807   assert(_heap_start != NULL, "heap bounds should look ok");
 808   assert(_heap_end != NULL, "heap bounds should look ok");
 809   assert(_heap_start < _heap_end, "heap bounds should look ok");
 810 
 811   // Reset all the marking data structures and any necessary flags
 812   reset_marking_state();
 813 
 814   if (verbose_low()) {
 815     gclog_or_tty->print_cr("[global] resetting");
 816   }
 817 
 818   // We do reset all of them, since different phases will use
 819   // different number of active threads. So, it's easiest to have all
 820   // of them ready.
 821   for (uint i = 0; i < _max_worker_id; ++i) {
 822     _tasks[i]->reset(_nextMarkBitMap);
 823   }
 824 
 825   // we need this to make sure that the flag is on during the evac
 826   // pause with initial mark piggy-backed
 827   set_concurrent_marking_in_progress();
 828 }
 829 
 830 
 831 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 832   _markStack.set_should_expand();
 833   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 834   if (clear_overflow) {
 835     clear_has_overflown();
 836   } else {
 837     assert(has_overflown(), "pre-condition");
 838   }
 839   _finger = _heap_start;
 840 
 841   for (uint i = 0; i < _max_worker_id; ++i) {
 842     CMTaskQueue* queue = _task_queues->queue(i);
 843     queue->set_empty();
 844   }
 845 }
 846 
 847 void ConcurrentMark::set_concurrency(uint active_tasks) {
 848   assert(active_tasks <= _max_worker_id, "we should not have more");
 849 
 850   _active_tasks = active_tasks;
 851   // Need to update the three data structures below according to the
 852   // number of active threads for this phase.
 853   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 854   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 855   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 856 }
 857 
 858 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 859   set_concurrency(active_tasks);
 860 
 861   _concurrent = concurrent;
 862   // We propagate this to all tasks, not just the active ones.
 863   for (uint i = 0; i < _max_worker_id; ++i)
 864     _tasks[i]->set_concurrent(concurrent);
 865 
 866   if (concurrent) {
 867     set_concurrent_marking_in_progress();
 868   } else {
 869     // We currently assume that the concurrent flag has been set to
 870     // false before we start remark. At this point we should also be
 871     // in a STW phase.
 872     assert(!concurrent_marking_in_progress(), "invariant");
 873     assert(out_of_regions(),
 874            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 875                    p2i(_finger), p2i(_heap_end)));
 876   }
 877 }
 878 
 879 void ConcurrentMark::set_non_marking_state() {
 880   // We set the global marking state to some default values when we're
 881   // not doing marking.
 882   reset_marking_state();
 883   _active_tasks = 0;
 884   clear_concurrent_marking_in_progress();
 885 }
 886 
 887 ConcurrentMark::~ConcurrentMark() {
 888   // The ConcurrentMark instance is never freed.
 889   ShouldNotReachHere();
 890 }
 891 
 892 void ConcurrentMark::clearNextBitmap() {
 893   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 894 
 895   // Make sure that the concurrent mark thread looks to still be in
 896   // the current cycle.
 897   guarantee(cmThread()->during_cycle(), "invariant");
 898 
 899   // We are finishing up the current cycle by clearing the next
 900   // marking bitmap and getting it ready for the next cycle. During
 901   // this time no other cycle can start. So, let's make sure that this
 902   // is the case.
 903   guarantee(!g1h->mark_in_progress(), "invariant");
 904 
 905   ClearBitmapHRClosure cl(this, _nextMarkBitMap, true /* may_yield */);
 906   if (use_parallel_marking_threads()) {
 907     ParClearNextMarkBitmapTask task(&cl, parallel_marking_threads(), true);
 908     _parallel_workers->run_task(&task);
 909   } else {
 910     SuspendibleThreadSetJoiner sts;
 911     g1h->heap_region_iterate(&cl);
 912   }
 913 
 914   // Clear the liveness counting data. If the marking has been aborted, the abort()
 915   // call already did that.
 916   if (cl.complete()) {
 917     clear_all_count_data();
 918   }
 919 
 920   // Repeat the asserts from above.
 921   guarantee(cmThread()->during_cycle(), "invariant");
 922   guarantee(!g1h->mark_in_progress(), "invariant");
 923 }
 924 
 925 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 926   CMBitMap* _bitmap;
 927   bool _error;
 928  public:
 929   CheckBitmapClearHRClosure(CMBitMap* bitmap) : _bitmap(bitmap) {
 930   }
 931 
 932   virtual bool doHeapRegion(HeapRegion* r) {
 933     return _bitmap->getNextMarkedWordAddress(r->bottom(), r->end()) != r->end();
 934   }
 935 };
 936 
 937 bool ConcurrentMark::nextMarkBitmapIsClear() {
 938   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 939   _g1h->heap_region_iterate(&cl);
 940   return cl.complete();
 941 }
 942 
 943 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 944 public:
 945   bool doHeapRegion(HeapRegion* r) {
 946     if (!r->continuesHumongous()) {
 947       r->note_start_of_marking();
 948     }
 949     return false;
 950   }
 951 };
 952 
 953 void ConcurrentMark::checkpointRootsInitialPre() {
 954   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 955   G1CollectorPolicy* g1p = g1h->g1_policy();
 956 
 957   _has_aborted = false;
 958 
 959 #ifndef PRODUCT
 960   if (G1PrintReachableAtInitialMark) {
 961     print_reachable("at-cycle-start",
 962                     VerifyOption_G1UsePrevMarking, true /* all */);
 963   }
 964 #endif
 965 
 966   // Initialize marking structures. This has to be done in a STW phase.
 967   reset();
 968 
 969   // For each region note start of marking.
 970   NoteStartOfMarkHRClosure startcl;
 971   g1h->heap_region_iterate(&startcl);
 972 }
 973 
 974 
 975 void ConcurrentMark::checkpointRootsInitialPost() {
 976   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 977 
 978   // If we force an overflow during remark, the remark operation will
 979   // actually abort and we'll restart concurrent marking. If we always
 980   // force an overflow during remark we'll never actually complete the
 981   // marking phase. So, we initialize this here, at the start of the
 982   // cycle, so that at the remaining overflow number will decrease at
 983   // every remark and we'll eventually not need to cause one.
 984   force_overflow_stw()->init();
 985 
 986   // Start Concurrent Marking weak-reference discovery.
 987   ReferenceProcessor* rp = g1h->ref_processor_cm();
 988   // enable ("weak") refs discovery
 989   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 990   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 991 
 992   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 993   // This is the start of  the marking cycle, we're expected all
 994   // threads to have SATB queues with active set to false.
 995   satb_mq_set.set_active_all_threads(true, /* new active value */
 996                                      false /* expected_active */);
 997 
 998   _root_regions.prepare_for_scan();
 999 
1000   // update_g1_committed() will be called at the end of an evac pause
1001   // when marking is on. So, it's also called at the end of the
1002   // initial-mark pause to update the heap end, if the heap expands
1003   // during it. No need to call it here.
1004 }
1005 
1006 /*
1007  * Notice that in the next two methods, we actually leave the STS
1008  * during the barrier sync and join it immediately afterwards. If we
1009  * do not do this, the following deadlock can occur: one thread could
1010  * be in the barrier sync code, waiting for the other thread to also
1011  * sync up, whereas another one could be trying to yield, while also
1012  * waiting for the other threads to sync up too.
1013  *
1014  * Note, however, that this code is also used during remark and in
1015  * this case we should not attempt to leave / enter the STS, otherwise
1016  * we'll either hit an assert (debug / fastdebug) or deadlock
1017  * (product). So we should only leave / enter the STS if we are
1018  * operating concurrently.
1019  *
1020  * Because the thread that does the sync barrier has left the STS, it
1021  * is possible to be suspended for a Full GC or an evacuation pause
1022  * could occur. This is actually safe, since the entering the sync
1023  * barrier is one of the last things do_marking_step() does, and it
1024  * doesn't manipulate any data structures afterwards.
1025  */
1026 
1027 void ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
1028   if (verbose_low()) {
1029     gclog_or_tty->print_cr("[%u] entering first barrier", worker_id);
1030   }
1031 
1032   if (concurrent()) {
1033     SuspendibleThreadSet::leave();
1034   }
1035 
1036   bool barrier_aborted = !_first_overflow_barrier_sync.enter();
1037 
1038   if (concurrent()) {
1039     SuspendibleThreadSet::join();
1040   }
1041   // at this point everyone should have synced up and not be doing any
1042   // more work
1043 
1044   if (verbose_low()) {
1045     if (barrier_aborted) {
1046       gclog_or_tty->print_cr("[%u] aborted first barrier", worker_id);
1047     } else {
1048       gclog_or_tty->print_cr("[%u] leaving first barrier", worker_id);
1049     }
1050   }
1051 
1052   if (barrier_aborted) {
1053     // If the barrier aborted we ignore the overflow condition and
1054     // just abort the whole marking phase as quickly as possible.
1055     return;
1056   }
1057 
1058   // If we're executing the concurrent phase of marking, reset the marking
1059   // state; otherwise the marking state is reset after reference processing,
1060   // during the remark pause.
1061   // If we reset here as a result of an overflow during the remark we will
1062   // see assertion failures from any subsequent set_concurrency_and_phase()
1063   // calls.
1064   if (concurrent()) {
1065     // let the task associated with with worker 0 do this
1066     if (worker_id == 0) {
1067       // task 0 is responsible for clearing the global data structures
1068       // We should be here because of an overflow. During STW we should
1069       // not clear the overflow flag since we rely on it being true when
1070       // we exit this method to abort the pause and restart concurrent
1071       // marking.
1072       reset_marking_state(true /* clear_overflow */);
1073       force_overflow()->update();
1074 
1075       if (G1Log::fine()) {
1076         gclog_or_tty->gclog_stamp(concurrent_gc_id());
1077         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
1078       }
1079     }
1080   }
1081 
1082   // after this, each task should reset its own data structures then
1083   // then go into the second barrier
1084 }
1085 
1086 void ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
1087   if (verbose_low()) {
1088     gclog_or_tty->print_cr("[%u] entering second barrier", worker_id);
1089   }
1090 
1091   if (concurrent()) {
1092     SuspendibleThreadSet::leave();
1093   }
1094 
1095   bool barrier_aborted = !_second_overflow_barrier_sync.enter();
1096 
1097   if (concurrent()) {
1098     SuspendibleThreadSet::join();
1099   }
1100   // at this point everything should be re-initialized and ready to go
1101 
1102   if (verbose_low()) {
1103     if (barrier_aborted) {
1104       gclog_or_tty->print_cr("[%u] aborted second barrier", worker_id);
1105     } else {
1106       gclog_or_tty->print_cr("[%u] leaving second barrier", worker_id);
1107     }
1108   }
1109 }
1110 
1111 #ifndef PRODUCT
1112 void ForceOverflowSettings::init() {
1113   _num_remaining = G1ConcMarkForceOverflow;
1114   _force = false;
1115   update();
1116 }
1117 
1118 void ForceOverflowSettings::update() {
1119   if (_num_remaining > 0) {
1120     _num_remaining -= 1;
1121     _force = true;
1122   } else {
1123     _force = false;
1124   }
1125 }
1126 
1127 bool ForceOverflowSettings::should_force() {
1128   if (_force) {
1129     _force = false;
1130     return true;
1131   } else {
1132     return false;
1133   }
1134 }
1135 #endif // !PRODUCT
1136 
1137 class CMConcurrentMarkingTask: public AbstractGangTask {
1138 private:
1139   ConcurrentMark*       _cm;
1140   ConcurrentMarkThread* _cmt;
1141 
1142 public:
1143   void work(uint worker_id) {
1144     assert(Thread::current()->is_ConcurrentGC_thread(),
1145            "this should only be done by a conc GC thread");
1146     ResourceMark rm;
1147 
1148     double start_vtime = os::elapsedVTime();
1149 
1150     SuspendibleThreadSet::join();
1151 
1152     assert(worker_id < _cm->active_tasks(), "invariant");
1153     CMTask* the_task = _cm->task(worker_id);
1154     the_task->record_start_time();
1155     if (!_cm->has_aborted()) {
1156       do {
1157         double start_vtime_sec = os::elapsedVTime();
1158         double start_time_sec = os::elapsedTime();
1159         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1160 
1161         the_task->do_marking_step(mark_step_duration_ms,
1162                                   true  /* do_termination */,
1163                                   false /* is_serial*/);
1164 
1165         double end_time_sec = os::elapsedTime();
1166         double end_vtime_sec = os::elapsedVTime();
1167         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1168         double elapsed_time_sec = end_time_sec - start_time_sec;
1169         _cm->clear_has_overflown();
1170 
1171         bool ret = _cm->do_yield_check(worker_id);
1172 
1173         jlong sleep_time_ms;
1174         if (!_cm->has_aborted() && the_task->has_aborted()) {
1175           sleep_time_ms =
1176             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1177           SuspendibleThreadSet::leave();
1178           os::sleep(Thread::current(), sleep_time_ms, false);
1179           SuspendibleThreadSet::join();
1180         }
1181         double end_time2_sec = os::elapsedTime();
1182         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1183 
1184 #if 0
1185           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1186                                  "overhead %1.4lf",
1187                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1188                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1189           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1190                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1191 #endif
1192       } while (!_cm->has_aborted() && the_task->has_aborted());
1193     }
1194     the_task->record_end_time();
1195     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1196 
1197     SuspendibleThreadSet::leave();
1198 
1199     double end_vtime = os::elapsedVTime();
1200     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1201   }
1202 
1203   CMConcurrentMarkingTask(ConcurrentMark* cm,
1204                           ConcurrentMarkThread* cmt) :
1205       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1206 
1207   ~CMConcurrentMarkingTask() { }
1208 };
1209 
1210 // Calculates the number of active workers for a concurrent
1211 // phase.
1212 uint ConcurrentMark::calc_parallel_marking_threads() {
1213   if (G1CollectedHeap::use_parallel_gc_threads()) {
1214     uint n_conc_workers = 0;
1215     if (!UseDynamicNumberOfGCThreads ||
1216         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1217          !ForceDynamicNumberOfGCThreads)) {
1218       n_conc_workers = max_parallel_marking_threads();
1219     } else {
1220       n_conc_workers =
1221         AdaptiveSizePolicy::calc_default_active_workers(
1222                                      max_parallel_marking_threads(),
1223                                      1, /* Minimum workers */
1224                                      parallel_marking_threads(),
1225                                      Threads::number_of_non_daemon_threads());
1226       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1227       // that scaling has already gone into "_max_parallel_marking_threads".
1228     }
1229     assert(n_conc_workers > 0, "Always need at least 1");
1230     return n_conc_workers;
1231   }
1232   // If we are not running with any parallel GC threads we will not
1233   // have spawned any marking threads either. Hence the number of
1234   // concurrent workers should be 0.
1235   return 0;
1236 }
1237 
1238 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1239   // Currently, only survivors can be root regions.
1240   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1241   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1242 
1243   const uintx interval = PrefetchScanIntervalInBytes;
1244   HeapWord* curr = hr->bottom();
1245   const HeapWord* end = hr->top();
1246   while (curr < end) {
1247     Prefetch::read(curr, interval);
1248     oop obj = oop(curr);
1249     int size = obj->oop_iterate(&cl);
1250     assert(size == obj->size(), "sanity");
1251     curr += size;
1252   }
1253 }
1254 
1255 class CMRootRegionScanTask : public AbstractGangTask {
1256 private:
1257   ConcurrentMark* _cm;
1258 
1259 public:
1260   CMRootRegionScanTask(ConcurrentMark* cm) :
1261     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1262 
1263   void work(uint worker_id) {
1264     assert(Thread::current()->is_ConcurrentGC_thread(),
1265            "this should only be done by a conc GC thread");
1266 
1267     CMRootRegions* root_regions = _cm->root_regions();
1268     HeapRegion* hr = root_regions->claim_next();
1269     while (hr != NULL) {
1270       _cm->scanRootRegion(hr, worker_id);
1271       hr = root_regions->claim_next();
1272     }
1273   }
1274 };
1275 
1276 void ConcurrentMark::scanRootRegions() {
1277   // Start of concurrent marking.
1278   ClassLoaderDataGraph::clear_claimed_marks();
1279 
1280   // scan_in_progress() will have been set to true only if there was
1281   // at least one root region to scan. So, if it's false, we
1282   // should not attempt to do any further work.
1283   if (root_regions()->scan_in_progress()) {
1284     _parallel_marking_threads = calc_parallel_marking_threads();
1285     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1286            "Maximum number of marking threads exceeded");
1287     uint active_workers = MAX2(1U, parallel_marking_threads());
1288 
1289     CMRootRegionScanTask task(this);
1290     if (use_parallel_marking_threads()) {
1291       _parallel_workers->set_active_workers((int) active_workers);
1292       _parallel_workers->run_task(&task);
1293     } else {
1294       task.work(0);
1295     }
1296 
1297     // It's possible that has_aborted() is true here without actually
1298     // aborting the survivor scan earlier. This is OK as it's
1299     // mainly used for sanity checking.
1300     root_regions()->scan_finished();
1301   }
1302 }
1303 
1304 void ConcurrentMark::markFromRoots() {
1305   // we might be tempted to assert that:
1306   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1307   //        "inconsistent argument?");
1308   // However that wouldn't be right, because it's possible that
1309   // a safepoint is indeed in progress as a younger generation
1310   // stop-the-world GC happens even as we mark in this generation.
1311 
1312   _restart_for_overflow = false;
1313   force_overflow_conc()->init();
1314 
1315   // _g1h has _n_par_threads
1316   _parallel_marking_threads = calc_parallel_marking_threads();
1317   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1318     "Maximum number of marking threads exceeded");
1319 
1320   uint active_workers = MAX2(1U, parallel_marking_threads());
1321 
1322   // Parallel task terminator is set in "set_concurrency_and_phase()"
1323   set_concurrency_and_phase(active_workers, true /* concurrent */);
1324 
1325   CMConcurrentMarkingTask markingTask(this, cmThread());
1326   if (use_parallel_marking_threads()) {
1327     _parallel_workers->set_active_workers((int)active_workers);
1328     // Don't set _n_par_threads because it affects MT in process_roots()
1329     // and the decisions on that MT processing is made elsewhere.
1330     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1331     _parallel_workers->run_task(&markingTask);
1332   } else {
1333     markingTask.work(0);
1334   }
1335   print_stats();
1336 }
1337 
1338 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1339   // world is stopped at this checkpoint
1340   assert(SafepointSynchronize::is_at_safepoint(),
1341          "world should be stopped");
1342 
1343   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1344 
1345   // If a full collection has happened, we shouldn't do this.
1346   if (has_aborted()) {
1347     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1348     return;
1349   }
1350 
1351   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1352 
1353   if (VerifyDuringGC) {
1354     HandleMark hm;  // handle scope
1355     Universe::heap()->prepare_for_verify();
1356     Universe::verify(VerifyOption_G1UsePrevMarking,
1357                      " VerifyDuringGC:(before)");
1358   }
1359   g1h->check_bitmaps("Remark Start");
1360 
1361   G1CollectorPolicy* g1p = g1h->g1_policy();
1362   g1p->record_concurrent_mark_remark_start();
1363 
1364   double start = os::elapsedTime();
1365 
1366   checkpointRootsFinalWork();
1367 
1368   double mark_work_end = os::elapsedTime();
1369 
1370   weakRefsWork(clear_all_soft_refs);
1371 
1372   if (has_overflown()) {
1373     // Oops.  We overflowed.  Restart concurrent marking.
1374     _restart_for_overflow = true;
1375     if (G1TraceMarkStackOverflow) {
1376       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1377     }
1378 
1379     // Verify the heap w.r.t. the previous marking bitmap.
1380     if (VerifyDuringGC) {
1381       HandleMark hm;  // handle scope
1382       Universe::heap()->prepare_for_verify();
1383       Universe::verify(VerifyOption_G1UsePrevMarking,
1384                        " VerifyDuringGC:(overflow)");
1385     }
1386 
1387     // Clear the marking state because we will be restarting
1388     // marking due to overflowing the global mark stack.
1389     reset_marking_state();
1390   } else {
1391     // Aggregate the per-task counting data that we have accumulated
1392     // while marking.
1393     aggregate_count_data();
1394 
1395     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1396     // We're done with marking.
1397     // This is the end of  the marking cycle, we're expected all
1398     // threads to have SATB queues with active set to true.
1399     satb_mq_set.set_active_all_threads(false, /* new active value */
1400                                        true /* expected_active */);
1401 
1402     if (VerifyDuringGC) {
1403       HandleMark hm;  // handle scope
1404       Universe::heap()->prepare_for_verify();
1405       Universe::verify(VerifyOption_G1UseNextMarking,
1406                        " VerifyDuringGC:(after)");
1407     }
1408     g1h->check_bitmaps("Remark End");
1409     assert(!restart_for_overflow(), "sanity");
1410     // Completely reset the marking state since marking completed
1411     set_non_marking_state();
1412   }
1413 
1414   // Expand the marking stack, if we have to and if we can.
1415   if (_markStack.should_expand()) {
1416     _markStack.expand();
1417   }
1418 
1419   // Statistics
1420   double now = os::elapsedTime();
1421   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1422   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1423   _remark_times.add((now - start) * 1000.0);
1424 
1425   g1p->record_concurrent_mark_remark_end();
1426 
1427   G1CMIsAliveClosure is_alive(g1h);
1428   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1429 }
1430 
1431 // Base class of the closures that finalize and verify the
1432 // liveness counting data.
1433 class CMCountDataClosureBase: public HeapRegionClosure {
1434 protected:
1435   G1CollectedHeap* _g1h;
1436   ConcurrentMark* _cm;
1437   CardTableModRefBS* _ct_bs;
1438 
1439   BitMap* _region_bm;
1440   BitMap* _card_bm;
1441 
1442   // Takes a region that's not empty (i.e., it has at least one
1443   // live object in it and sets its corresponding bit on the region
1444   // bitmap to 1. If the region is "starts humongous" it will also set
1445   // to 1 the bits on the region bitmap that correspond to its
1446   // associated "continues humongous" regions.
1447   void set_bit_for_region(HeapRegion* hr) {
1448     assert(!hr->continuesHumongous(), "should have filtered those out");
1449 
1450     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1451     if (!hr->startsHumongous()) {
1452       // Normal (non-humongous) case: just set the bit.
1453       _region_bm->par_at_put(index, true);
1454     } else {
1455       // Starts humongous case: calculate how many regions are part of
1456       // this humongous region and then set the bit range.
1457       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1458       _region_bm->par_at_put_range(index, end_index, true);
1459     }
1460   }
1461 
1462 public:
1463   CMCountDataClosureBase(G1CollectedHeap* g1h,
1464                          BitMap* region_bm, BitMap* card_bm):
1465     _g1h(g1h), _cm(g1h->concurrent_mark()),
1466     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1467     _region_bm(region_bm), _card_bm(card_bm) { }
1468 };
1469 
1470 // Closure that calculates the # live objects per region. Used
1471 // for verification purposes during the cleanup pause.
1472 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1473   CMBitMapRO* _bm;
1474   size_t _region_marked_bytes;
1475 
1476 public:
1477   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1478                          BitMap* region_bm, BitMap* card_bm) :
1479     CMCountDataClosureBase(g1h, region_bm, card_bm),
1480     _bm(bm), _region_marked_bytes(0) { }
1481 
1482   bool doHeapRegion(HeapRegion* hr) {
1483 
1484     if (hr->continuesHumongous()) {
1485       // We will ignore these here and process them when their
1486       // associated "starts humongous" region is processed (see
1487       // set_bit_for_heap_region()). Note that we cannot rely on their
1488       // associated "starts humongous" region to have their bit set to
1489       // 1 since, due to the region chunking in the parallel region
1490       // iteration, a "continues humongous" region might be visited
1491       // before its associated "starts humongous".
1492       return false;
1493     }
1494 
1495     HeapWord* ntams = hr->next_top_at_mark_start();
1496     HeapWord* start = hr->bottom();
1497 
1498     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1499            err_msg("Preconditions not met - "
1500                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1501                    p2i(start), p2i(ntams), p2i(hr->end())));
1502 
1503     // Find the first marked object at or after "start".
1504     start = _bm->getNextMarkedWordAddress(start, ntams);
1505 
1506     size_t marked_bytes = 0;
1507 
1508     while (start < ntams) {
1509       oop obj = oop(start);
1510       int obj_sz = obj->size();
1511       HeapWord* obj_end = start + obj_sz;
1512 
1513       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1514       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1515 
1516       // Note: if we're looking at the last region in heap - obj_end
1517       // could be actually just beyond the end of the heap; end_idx
1518       // will then correspond to a (non-existent) card that is also
1519       // just beyond the heap.
1520       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1521         // end of object is not card aligned - increment to cover
1522         // all the cards spanned by the object
1523         end_idx += 1;
1524       }
1525 
1526       // Set the bits in the card BM for the cards spanned by this object.
1527       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1528 
1529       // Add the size of this object to the number of marked bytes.
1530       marked_bytes += (size_t)obj_sz * HeapWordSize;
1531 
1532       // Find the next marked object after this one.
1533       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1534     }
1535 
1536     // Mark the allocated-since-marking portion...
1537     HeapWord* top = hr->top();
1538     if (ntams < top) {
1539       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1540       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1541 
1542       // Note: if we're looking at the last region in heap - top
1543       // could be actually just beyond the end of the heap; end_idx
1544       // will then correspond to a (non-existent) card that is also
1545       // just beyond the heap.
1546       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1547         // end of object is not card aligned - increment to cover
1548         // all the cards spanned by the object
1549         end_idx += 1;
1550       }
1551       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1552 
1553       // This definitely means the region has live objects.
1554       set_bit_for_region(hr);
1555     }
1556 
1557     // Update the live region bitmap.
1558     if (marked_bytes > 0) {
1559       set_bit_for_region(hr);
1560     }
1561 
1562     // Set the marked bytes for the current region so that
1563     // it can be queried by a calling verification routine
1564     _region_marked_bytes = marked_bytes;
1565 
1566     return false;
1567   }
1568 
1569   size_t region_marked_bytes() const { return _region_marked_bytes; }
1570 };
1571 
1572 // Heap region closure used for verifying the counting data
1573 // that was accumulated concurrently and aggregated during
1574 // the remark pause. This closure is applied to the heap
1575 // regions during the STW cleanup pause.
1576 
1577 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1578   G1CollectedHeap* _g1h;
1579   ConcurrentMark* _cm;
1580   CalcLiveObjectsClosure _calc_cl;
1581   BitMap* _region_bm;   // Region BM to be verified
1582   BitMap* _card_bm;     // Card BM to be verified
1583   bool _verbose;        // verbose output?
1584 
1585   BitMap* _exp_region_bm; // Expected Region BM values
1586   BitMap* _exp_card_bm;   // Expected card BM values
1587 
1588   int _failures;
1589 
1590 public:
1591   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1592                                 BitMap* region_bm,
1593                                 BitMap* card_bm,
1594                                 BitMap* exp_region_bm,
1595                                 BitMap* exp_card_bm,
1596                                 bool verbose) :
1597     _g1h(g1h), _cm(g1h->concurrent_mark()),
1598     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1599     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1600     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1601     _failures(0) { }
1602 
1603   int failures() const { return _failures; }
1604 
1605   bool doHeapRegion(HeapRegion* hr) {
1606     if (hr->continuesHumongous()) {
1607       // We will ignore these here and process them when their
1608       // associated "starts humongous" region is processed (see
1609       // set_bit_for_heap_region()). Note that we cannot rely on their
1610       // associated "starts humongous" region to have their bit set to
1611       // 1 since, due to the region chunking in the parallel region
1612       // iteration, a "continues humongous" region might be visited
1613       // before its associated "starts humongous".
1614       return false;
1615     }
1616 
1617     int failures = 0;
1618 
1619     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1620     // this region and set the corresponding bits in the expected region
1621     // and card bitmaps.
1622     bool res = _calc_cl.doHeapRegion(hr);
1623     assert(res == false, "should be continuing");
1624 
1625     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1626                     Mutex::_no_safepoint_check_flag);
1627 
1628     // Verify the marked bytes for this region.
1629     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1630     size_t act_marked_bytes = hr->next_marked_bytes();
1631 
1632     // We're not OK if expected marked bytes > actual marked bytes. It means
1633     // we have missed accounting some objects during the actual marking.
1634     if (exp_marked_bytes > act_marked_bytes) {
1635       if (_verbose) {
1636         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1637                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1638                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1639       }
1640       failures += 1;
1641     }
1642 
1643     // Verify the bit, for this region, in the actual and expected
1644     // (which was just calculated) region bit maps.
1645     // We're not OK if the bit in the calculated expected region
1646     // bitmap is set and the bit in the actual region bitmap is not.
1647     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1648 
1649     bool expected = _exp_region_bm->at(index);
1650     bool actual = _region_bm->at(index);
1651     if (expected && !actual) {
1652       if (_verbose) {
1653         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1654                                "expected: %s, actual: %s",
1655                                hr->hrs_index(),
1656                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1657       }
1658       failures += 1;
1659     }
1660 
1661     // Verify that the card bit maps for the cards spanned by the current
1662     // region match. We have an error if we have a set bit in the expected
1663     // bit map and the corresponding bit in the actual bitmap is not set.
1664 
1665     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1666     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1667 
1668     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1669       expected = _exp_card_bm->at(i);
1670       actual = _card_bm->at(i);
1671 
1672       if (expected && !actual) {
1673         if (_verbose) {
1674           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1675                                  "expected: %s, actual: %s",
1676                                  hr->hrs_index(), i,
1677                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1678         }
1679         failures += 1;
1680       }
1681     }
1682 
1683     if (failures > 0 && _verbose)  {
1684       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1685                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1686                              HR_FORMAT_PARAMS(hr), p2i(hr->next_top_at_mark_start()),
1687                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1688     }
1689 
1690     _failures += failures;
1691 
1692     // We could stop iteration over the heap when we
1693     // find the first violating region by returning true.
1694     return false;
1695   }
1696 };
1697 
1698 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1699 protected:
1700   G1CollectedHeap* _g1h;
1701   ConcurrentMark* _cm;
1702   BitMap* _actual_region_bm;
1703   BitMap* _actual_card_bm;
1704 
1705   uint    _n_workers;
1706 
1707   BitMap* _expected_region_bm;
1708   BitMap* _expected_card_bm;
1709 
1710   int  _failures;
1711   bool _verbose;
1712 
1713 public:
1714   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1715                             BitMap* region_bm, BitMap* card_bm,
1716                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1717     : AbstractGangTask("G1 verify final counting"),
1718       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1719       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1720       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1721       _failures(0), _verbose(false),
1722       _n_workers(0) {
1723     assert(VerifyDuringGC, "don't call this otherwise");
1724 
1725     // Use the value already set as the number of active threads
1726     // in the call to run_task().
1727     if (G1CollectedHeap::use_parallel_gc_threads()) {
1728       assert( _g1h->workers()->active_workers() > 0,
1729         "Should have been previously set");
1730       _n_workers = _g1h->workers()->active_workers();
1731     } else {
1732       _n_workers = 1;
1733     }
1734 
1735     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1736     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1737 
1738     _verbose = _cm->verbose_medium();
1739   }
1740 
1741   void work(uint worker_id) {
1742     assert(worker_id < _n_workers, "invariant");
1743 
1744     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1745                                             _actual_region_bm, _actual_card_bm,
1746                                             _expected_region_bm,
1747                                             _expected_card_bm,
1748                                             _verbose);
1749 
1750     if (G1CollectedHeap::use_parallel_gc_threads()) {
1751       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1752                                             worker_id,
1753                                             _n_workers,
1754                                             HeapRegion::VerifyCountClaimValue);
1755     } else {
1756       _g1h->heap_region_iterate(&verify_cl);
1757     }
1758 
1759     Atomic::add(verify_cl.failures(), &_failures);
1760   }
1761 
1762   int failures() const { return _failures; }
1763 };
1764 
1765 // Closure that finalizes the liveness counting data.
1766 // Used during the cleanup pause.
1767 // Sets the bits corresponding to the interval [NTAMS, top]
1768 // (which contains the implicitly live objects) in the
1769 // card liveness bitmap. Also sets the bit for each region,
1770 // containing live data, in the region liveness bitmap.
1771 
1772 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1773  public:
1774   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1775                               BitMap* region_bm,
1776                               BitMap* card_bm) :
1777     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1778 
1779   bool doHeapRegion(HeapRegion* hr) {
1780 
1781     if (hr->continuesHumongous()) {
1782       // We will ignore these here and process them when their
1783       // associated "starts humongous" region is processed (see
1784       // set_bit_for_heap_region()). Note that we cannot rely on their
1785       // associated "starts humongous" region to have their bit set to
1786       // 1 since, due to the region chunking in the parallel region
1787       // iteration, a "continues humongous" region might be visited
1788       // before its associated "starts humongous".
1789       return false;
1790     }
1791 
1792     HeapWord* ntams = hr->next_top_at_mark_start();
1793     HeapWord* top   = hr->top();
1794 
1795     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1796 
1797     // Mark the allocated-since-marking portion...
1798     if (ntams < top) {
1799       // This definitely means the region has live objects.
1800       set_bit_for_region(hr);
1801 
1802       // Now set the bits in the card bitmap for [ntams, top)
1803       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1804       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1805 
1806       // Note: if we're looking at the last region in heap - top
1807       // could be actually just beyond the end of the heap; end_idx
1808       // will then correspond to a (non-existent) card that is also
1809       // just beyond the heap.
1810       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1811         // end of object is not card aligned - increment to cover
1812         // all the cards spanned by the object
1813         end_idx += 1;
1814       }
1815 
1816       assert(end_idx <= _card_bm->size(),
1817              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1818                      end_idx, _card_bm->size()));
1819       assert(start_idx < _card_bm->size(),
1820              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1821                      start_idx, _card_bm->size()));
1822 
1823       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1824     }
1825 
1826     // Set the bit for the region if it contains live data
1827     if (hr->next_marked_bytes() > 0) {
1828       set_bit_for_region(hr);
1829     }
1830 
1831     return false;
1832   }
1833 };
1834 
1835 class G1ParFinalCountTask: public AbstractGangTask {
1836 protected:
1837   G1CollectedHeap* _g1h;
1838   ConcurrentMark* _cm;
1839   BitMap* _actual_region_bm;
1840   BitMap* _actual_card_bm;
1841 
1842   uint    _n_workers;
1843 
1844 public:
1845   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1846     : AbstractGangTask("G1 final counting"),
1847       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1848       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1849       _n_workers(0) {
1850     // Use the value already set as the number of active threads
1851     // in the call to run_task().
1852     if (G1CollectedHeap::use_parallel_gc_threads()) {
1853       assert( _g1h->workers()->active_workers() > 0,
1854         "Should have been previously set");
1855       _n_workers = _g1h->workers()->active_workers();
1856     } else {
1857       _n_workers = 1;
1858     }
1859   }
1860 
1861   void work(uint worker_id) {
1862     assert(worker_id < _n_workers, "invariant");
1863 
1864     FinalCountDataUpdateClosure final_update_cl(_g1h,
1865                                                 _actual_region_bm,
1866                                                 _actual_card_bm);
1867 
1868     if (G1CollectedHeap::use_parallel_gc_threads()) {
1869       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1870                                             worker_id,
1871                                             _n_workers,
1872                                             HeapRegion::FinalCountClaimValue);
1873     } else {
1874       _g1h->heap_region_iterate(&final_update_cl);
1875     }
1876   }
1877 };
1878 
1879 class G1ParNoteEndTask;
1880 
1881 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1882   G1CollectedHeap* _g1;
1883   size_t _max_live_bytes;
1884   uint _regions_claimed;
1885   size_t _freed_bytes;
1886   FreeRegionList* _local_cleanup_list;
1887   HeapRegionSetCount _old_regions_removed;
1888   HeapRegionSetCount _humongous_regions_removed;
1889   HRRSCleanupTask* _hrrs_cleanup_task;
1890   double _claimed_region_time;
1891   double _max_region_time;
1892 
1893 public:
1894   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1895                              FreeRegionList* local_cleanup_list,
1896                              HRRSCleanupTask* hrrs_cleanup_task) :
1897     _g1(g1),
1898     _max_live_bytes(0), _regions_claimed(0),
1899     _freed_bytes(0),
1900     _claimed_region_time(0.0), _max_region_time(0.0),
1901     _local_cleanup_list(local_cleanup_list),
1902     _old_regions_removed(),
1903     _humongous_regions_removed(),
1904     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1905 
1906   size_t freed_bytes() { return _freed_bytes; }
1907   const HeapRegionSetCount& old_regions_removed() { return _old_regions_removed; }
1908   const HeapRegionSetCount& humongous_regions_removed() { return _humongous_regions_removed; }
1909 
1910   bool doHeapRegion(HeapRegion *hr) {
1911     if (hr->continuesHumongous()) {
1912       return false;
1913     }
1914     // We use a claim value of zero here because all regions
1915     // were claimed with value 1 in the FinalCount task.
1916     _g1->reset_gc_time_stamps(hr);
1917     double start = os::elapsedTime();
1918     _regions_claimed++;
1919     hr->note_end_of_marking();
1920     _max_live_bytes += hr->max_live_bytes();
1921 
1922     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1923       _freed_bytes += hr->used();
1924       hr->set_containing_set(NULL);
1925       if (hr->isHumongous()) {
1926         assert(hr->startsHumongous(), "we should only see starts humongous");
1927         _humongous_regions_removed.increment(1u, hr->capacity());
1928         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1929       } else {
1930         _old_regions_removed.increment(1u, hr->capacity());
1931         _g1->free_region(hr, _local_cleanup_list, true);
1932       }
1933     } else {
1934       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1935     }
1936 
1937     double region_time = (os::elapsedTime() - start);
1938     _claimed_region_time += region_time;
1939     if (region_time > _max_region_time) {
1940       _max_region_time = region_time;
1941     }
1942     return false;
1943   }
1944 
1945   size_t max_live_bytes() { return _max_live_bytes; }
1946   uint regions_claimed() { return _regions_claimed; }
1947   double claimed_region_time_sec() { return _claimed_region_time; }
1948   double max_region_time_sec() { return _max_region_time; }
1949 };
1950 
1951 class G1ParNoteEndTask: public AbstractGangTask {
1952   friend class G1NoteEndOfConcMarkClosure;
1953 
1954 protected:
1955   G1CollectedHeap* _g1h;
1956   size_t _max_live_bytes;
1957   size_t _freed_bytes;
1958   FreeRegionList* _cleanup_list;
1959 
1960 public:
1961   G1ParNoteEndTask(G1CollectedHeap* g1h,
1962                    FreeRegionList* cleanup_list) :
1963     AbstractGangTask("G1 note end"), _g1h(g1h),
1964     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1965 
1966   void work(uint worker_id) {
1967     double start = os::elapsedTime();
1968     FreeRegionList local_cleanup_list("Local Cleanup List");
1969     HRRSCleanupTask hrrs_cleanup_task;
1970     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1971                                            &hrrs_cleanup_task);
1972     if (G1CollectedHeap::use_parallel_gc_threads()) {
1973       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1974                                             _g1h->workers()->active_workers(),
1975                                             HeapRegion::NoteEndClaimValue);
1976     } else {
1977       _g1h->heap_region_iterate(&g1_note_end);
1978     }
1979     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1980 
1981     // Now update the lists
1982     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1983     {
1984       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1985       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1986       _max_live_bytes += g1_note_end.max_live_bytes();
1987       _freed_bytes += g1_note_end.freed_bytes();
1988 
1989       // If we iterate over the global cleanup list at the end of
1990       // cleanup to do this printing we will not guarantee to only
1991       // generate output for the newly-reclaimed regions (the list
1992       // might not be empty at the beginning of cleanup; we might
1993       // still be working on its previous contents). So we do the
1994       // printing here, before we append the new regions to the global
1995       // cleanup list.
1996 
1997       G1HRPrinter* hr_printer = _g1h->hr_printer();
1998       if (hr_printer->is_active()) {
1999         FreeRegionListIterator iter(&local_cleanup_list);
2000         while (iter.more_available()) {
2001           HeapRegion* hr = iter.get_next();
2002           hr_printer->cleanup(hr);
2003         }
2004       }
2005 
2006       _cleanup_list->add_ordered(&local_cleanup_list);
2007       assert(local_cleanup_list.is_empty(), "post-condition");
2008 
2009       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
2010     }
2011   }
2012   size_t max_live_bytes() { return _max_live_bytes; }
2013   size_t freed_bytes() { return _freed_bytes; }
2014 };
2015 
2016 class G1ParScrubRemSetTask: public AbstractGangTask {
2017 protected:
2018   G1RemSet* _g1rs;
2019   BitMap* _region_bm;
2020   BitMap* _card_bm;
2021 public:
2022   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
2023                        BitMap* region_bm, BitMap* card_bm) :
2024     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
2025     _region_bm(region_bm), _card_bm(card_bm) { }
2026 
2027   void work(uint worker_id) {
2028     if (G1CollectedHeap::use_parallel_gc_threads()) {
2029       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
2030                        HeapRegion::ScrubRemSetClaimValue);
2031     } else {
2032       _g1rs->scrub(_region_bm, _card_bm);
2033     }
2034   }
2035 
2036 };
2037 
2038 void ConcurrentMark::cleanup() {
2039   // world is stopped at this checkpoint
2040   assert(SafepointSynchronize::is_at_safepoint(),
2041          "world should be stopped");
2042   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2043 
2044   // If a full collection has happened, we shouldn't do this.
2045   if (has_aborted()) {
2046     g1h->set_marking_complete(); // So bitmap clearing isn't confused
2047     return;
2048   }
2049 
2050   g1h->verify_region_sets_optional();
2051 
2052   if (VerifyDuringGC) {
2053     HandleMark hm;  // handle scope
2054     Universe::heap()->prepare_for_verify();
2055     Universe::verify(VerifyOption_G1UsePrevMarking,
2056                      " VerifyDuringGC:(before)");
2057   }
2058   g1h->check_bitmaps("Cleanup Start");
2059 
2060   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
2061   g1p->record_concurrent_mark_cleanup_start();
2062 
2063   double start = os::elapsedTime();
2064 
2065   HeapRegionRemSet::reset_for_cleanup_tasks();
2066 
2067   uint n_workers;
2068 
2069   // Do counting once more with the world stopped for good measure.
2070   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
2071 
2072   if (G1CollectedHeap::use_parallel_gc_threads()) {
2073    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
2074            "sanity check");
2075 
2076     g1h->set_par_threads();
2077     n_workers = g1h->n_par_threads();
2078     assert(g1h->n_par_threads() == n_workers,
2079            "Should not have been reset");
2080     g1h->workers()->run_task(&g1_par_count_task);
2081     // Done with the parallel phase so reset to 0.
2082     g1h->set_par_threads(0);
2083 
2084     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
2085            "sanity check");
2086   } else {
2087     n_workers = 1;
2088     g1_par_count_task.work(0);
2089   }
2090 
2091   if (VerifyDuringGC) {
2092     // Verify that the counting data accumulated during marking matches
2093     // that calculated by walking the marking bitmap.
2094 
2095     // Bitmaps to hold expected values
2096     BitMap expected_region_bm(_region_bm.size(), true);
2097     BitMap expected_card_bm(_card_bm.size(), true);
2098 
2099     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
2100                                                  &_region_bm,
2101                                                  &_card_bm,
2102                                                  &expected_region_bm,
2103                                                  &expected_card_bm);
2104 
2105     if (G1CollectedHeap::use_parallel_gc_threads()) {
2106       g1h->set_par_threads((int)n_workers);
2107       g1h->workers()->run_task(&g1_par_verify_task);
2108       // Done with the parallel phase so reset to 0.
2109       g1h->set_par_threads(0);
2110 
2111       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
2112              "sanity check");
2113     } else {
2114       g1_par_verify_task.work(0);
2115     }
2116 
2117     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
2118   }
2119 
2120   size_t start_used_bytes = g1h->used();
2121   g1h->set_marking_complete();
2122 
2123   double count_end = os::elapsedTime();
2124   double this_final_counting_time = (count_end - start);
2125   _total_counting_time += this_final_counting_time;
2126 
2127   if (G1PrintRegionLivenessInfo) {
2128     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
2129     _g1h->heap_region_iterate(&cl);
2130   }
2131 
2132   // Install newly created mark bitMap as "prev".
2133   swapMarkBitMaps();
2134 
2135   g1h->reset_gc_time_stamp();
2136 
2137   // Note end of marking in all heap regions.
2138   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
2139   if (G1CollectedHeap::use_parallel_gc_threads()) {
2140     g1h->set_par_threads((int)n_workers);
2141     g1h->workers()->run_task(&g1_par_note_end_task);
2142     g1h->set_par_threads(0);
2143 
2144     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
2145            "sanity check");
2146   } else {
2147     g1_par_note_end_task.work(0);
2148   }
2149   g1h->check_gc_time_stamps();
2150 
2151   if (!cleanup_list_is_empty()) {
2152     // The cleanup list is not empty, so we'll have to process it
2153     // concurrently. Notify anyone else that might be wanting free
2154     // regions that there will be more free regions coming soon.
2155     g1h->set_free_regions_coming();
2156   }
2157 
2158   // call below, since it affects the metric by which we sort the heap
2159   // regions.
2160   if (G1ScrubRemSets) {
2161     double rs_scrub_start = os::elapsedTime();
2162     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
2163     if (G1CollectedHeap::use_parallel_gc_threads()) {
2164       g1h->set_par_threads((int)n_workers);
2165       g1h->workers()->run_task(&g1_par_scrub_rs_task);
2166       g1h->set_par_threads(0);
2167 
2168       assert(g1h->check_heap_region_claim_values(
2169                                             HeapRegion::ScrubRemSetClaimValue),
2170              "sanity check");
2171     } else {
2172       g1_par_scrub_rs_task.work(0);
2173     }
2174 
2175     double rs_scrub_end = os::elapsedTime();
2176     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
2177     _total_rs_scrub_time += this_rs_scrub_time;
2178   }
2179 
2180   // this will also free any regions totally full of garbage objects,
2181   // and sort the regions.
2182   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
2183 
2184   // Statistics.
2185   double end = os::elapsedTime();
2186   _cleanup_times.add((end - start) * 1000.0);
2187 
2188   if (G1Log::fine()) {
2189     g1h->print_size_transition(gclog_or_tty,
2190                                start_used_bytes,
2191                                g1h->used(),
2192                                g1h->capacity());
2193   }
2194 
2195   // Clean up will have freed any regions completely full of garbage.
2196   // Update the soft reference policy with the new heap occupancy.
2197   Universe::update_heap_info_at_gc();
2198 
2199   if (VerifyDuringGC) {
2200     HandleMark hm;  // handle scope
2201     Universe::heap()->prepare_for_verify();
2202     Universe::verify(VerifyOption_G1UsePrevMarking,
2203                      " VerifyDuringGC:(after)");
2204   }
2205 
2206   g1h->check_bitmaps("Cleanup End");
2207 
2208   g1h->verify_region_sets_optional();
2209 
2210   // We need to make this be a "collection" so any collection pause that
2211   // races with it goes around and waits for completeCleanup to finish.
2212   g1h->increment_total_collections();
2213 
2214   // Clean out dead classes and update Metaspace sizes.
2215   if (ClassUnloadingWithConcurrentMark) {
2216     ClassLoaderDataGraph::purge();
2217   }
2218   MetaspaceGC::compute_new_size();
2219 
2220   // We reclaimed old regions so we should calculate the sizes to make
2221   // sure we update the old gen/space data.
2222   g1h->g1mm()->update_sizes();
2223 
2224   g1h->trace_heap_after_concurrent_cycle();
2225 }
2226 
2227 void ConcurrentMark::completeCleanup() {
2228   if (has_aborted()) return;
2229 
2230   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2231 
2232   _cleanup_list.verify_optional();
2233   FreeRegionList tmp_free_list("Tmp Free List");
2234 
2235   if (G1ConcRegionFreeingVerbose) {
2236     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2237                            "cleanup list has %u entries",
2238                            _cleanup_list.length());
2239   }
2240 
2241   // No one else should be accessing the _cleanup_list at this point,
2242   // so it is not necessary to take any locks
2243   while (!_cleanup_list.is_empty()) {
2244     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
2245     assert(hr != NULL, "Got NULL from a non-empty list");
2246     hr->par_clear();
2247     tmp_free_list.add_ordered(hr);
2248 
2249     // Instead of adding one region at a time to the secondary_free_list,
2250     // we accumulate them in the local list and move them a few at a
2251     // time. This also cuts down on the number of notify_all() calls
2252     // we do during this process. We'll also append the local list when
2253     // _cleanup_list is empty (which means we just removed the last
2254     // region from the _cleanup_list).
2255     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2256         _cleanup_list.is_empty()) {
2257       if (G1ConcRegionFreeingVerbose) {
2258         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2259                                "appending %u entries to the secondary_free_list, "
2260                                "cleanup list still has %u entries",
2261                                tmp_free_list.length(),
2262                                _cleanup_list.length());
2263       }
2264 
2265       {
2266         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2267         g1h->secondary_free_list_add(&tmp_free_list);
2268         SecondaryFreeList_lock->notify_all();
2269       }
2270 
2271       if (G1StressConcRegionFreeing) {
2272         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2273           os::sleep(Thread::current(), (jlong) 1, false);
2274         }
2275       }
2276     }
2277   }
2278   assert(tmp_free_list.is_empty(), "post-condition");
2279 }
2280 
2281 // Supporting Object and Oop closures for reference discovery
2282 // and processing in during marking
2283 
2284 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2285   HeapWord* addr = (HeapWord*)obj;
2286   return addr != NULL &&
2287          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2288 }
2289 
2290 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2291 // Uses the CMTask associated with a worker thread (for serial reference
2292 // processing the CMTask for worker 0 is used) to preserve (mark) and
2293 // trace referent objects.
2294 //
2295 // Using the CMTask and embedded local queues avoids having the worker
2296 // threads operating on the global mark stack. This reduces the risk
2297 // of overflowing the stack - which we would rather avoid at this late
2298 // state. Also using the tasks' local queues removes the potential
2299 // of the workers interfering with each other that could occur if
2300 // operating on the global stack.
2301 
2302 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2303   ConcurrentMark* _cm;
2304   CMTask*         _task;
2305   int             _ref_counter_limit;
2306   int             _ref_counter;
2307   bool            _is_serial;
2308  public:
2309   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2310     _cm(cm), _task(task), _is_serial(is_serial),
2311     _ref_counter_limit(G1RefProcDrainInterval) {
2312     assert(_ref_counter_limit > 0, "sanity");
2313     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2314     _ref_counter = _ref_counter_limit;
2315   }
2316 
2317   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2318   virtual void do_oop(      oop* p) { do_oop_work(p); }
2319 
2320   template <class T> void do_oop_work(T* p) {
2321     if (!_cm->has_overflown()) {
2322       oop obj = oopDesc::load_decode_heap_oop(p);
2323       if (_cm->verbose_high()) {
2324         gclog_or_tty->print_cr("\t[%u] we're looking at location "
2325                                "*"PTR_FORMAT" = "PTR_FORMAT,
2326                                _task->worker_id(), p2i(p), p2i((void*) obj));
2327       }
2328 
2329       _task->deal_with_reference(obj);
2330       _ref_counter--;
2331 
2332       if (_ref_counter == 0) {
2333         // We have dealt with _ref_counter_limit references, pushing them
2334         // and objects reachable from them on to the local stack (and
2335         // possibly the global stack). Call CMTask::do_marking_step() to
2336         // process these entries.
2337         //
2338         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2339         // there's nothing more to do (i.e. we're done with the entries that
2340         // were pushed as a result of the CMTask::deal_with_reference() calls
2341         // above) or we overflow.
2342         //
2343         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2344         // flag while there may still be some work to do. (See the comment at
2345         // the beginning of CMTask::do_marking_step() for those conditions -
2346         // one of which is reaching the specified time target.) It is only
2347         // when CMTask::do_marking_step() returns without setting the
2348         // has_aborted() flag that the marking step has completed.
2349         do {
2350           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2351           _task->do_marking_step(mark_step_duration_ms,
2352                                  false      /* do_termination */,
2353                                  _is_serial);
2354         } while (_task->has_aborted() && !_cm->has_overflown());
2355         _ref_counter = _ref_counter_limit;
2356       }
2357     } else {
2358       if (_cm->verbose_high()) {
2359          gclog_or_tty->print_cr("\t[%u] CM Overflow", _task->worker_id());
2360       }
2361     }
2362   }
2363 };
2364 
2365 // 'Drain' oop closure used by both serial and parallel reference processing.
2366 // Uses the CMTask associated with a given worker thread (for serial
2367 // reference processing the CMtask for worker 0 is used). Calls the
2368 // do_marking_step routine, with an unbelievably large timeout value,
2369 // to drain the marking data structures of the remaining entries
2370 // added by the 'keep alive' oop closure above.
2371 
2372 class G1CMDrainMarkingStackClosure: public VoidClosure {
2373   ConcurrentMark* _cm;
2374   CMTask*         _task;
2375   bool            _is_serial;
2376  public:
2377   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2378     _cm(cm), _task(task), _is_serial(is_serial) {
2379     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
2380   }
2381 
2382   void do_void() {
2383     do {
2384       if (_cm->verbose_high()) {
2385         gclog_or_tty->print_cr("\t[%u] Drain: Calling do_marking_step - serial: %s",
2386                                _task->worker_id(), BOOL_TO_STR(_is_serial));
2387       }
2388 
2389       // We call CMTask::do_marking_step() to completely drain the local
2390       // and global marking stacks of entries pushed by the 'keep alive'
2391       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2392       //
2393       // CMTask::do_marking_step() is called in a loop, which we'll exit
2394       // if there's nothing more to do (i.e. we've completely drained the
2395       // entries that were pushed as a a result of applying the 'keep alive'
2396       // closure to the entries on the discovered ref lists) or we overflow
2397       // the global marking stack.
2398       //
2399       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2400       // flag while there may still be some work to do. (See the comment at
2401       // the beginning of CMTask::do_marking_step() for those conditions -
2402       // one of which is reaching the specified time target.) It is only
2403       // when CMTask::do_marking_step() returns without setting the
2404       // has_aborted() flag that the marking step has completed.
2405 
2406       _task->do_marking_step(1000000000.0 /* something very large */,
2407                              true         /* do_termination */,
2408                              _is_serial);
2409     } while (_task->has_aborted() && !_cm->has_overflown());
2410   }
2411 };
2412 
2413 // Implementation of AbstractRefProcTaskExecutor for parallel
2414 // reference processing at the end of G1 concurrent marking
2415 
2416 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2417 private:
2418   G1CollectedHeap* _g1h;
2419   ConcurrentMark*  _cm;
2420   WorkGang*        _workers;
2421   int              _active_workers;
2422 
2423 public:
2424   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2425                         ConcurrentMark* cm,
2426                         WorkGang* workers,
2427                         int n_workers) :
2428     _g1h(g1h), _cm(cm),
2429     _workers(workers), _active_workers(n_workers) { }
2430 
2431   // Executes the given task using concurrent marking worker threads.
2432   virtual void execute(ProcessTask& task);
2433   virtual void execute(EnqueueTask& task);
2434 };
2435 
2436 class G1CMRefProcTaskProxy: public AbstractGangTask {
2437   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2438   ProcessTask&     _proc_task;
2439   G1CollectedHeap* _g1h;
2440   ConcurrentMark*  _cm;
2441 
2442 public:
2443   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2444                      G1CollectedHeap* g1h,
2445                      ConcurrentMark* cm) :
2446     AbstractGangTask("Process reference objects in parallel"),
2447     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2448     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2449     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2450   }
2451 
2452   virtual void work(uint worker_id) {
2453     ResourceMark rm;
2454     HandleMark hm;
2455     CMTask* task = _cm->task(worker_id);
2456     G1CMIsAliveClosure g1_is_alive(_g1h);
2457     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2458     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2459 
2460     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2461   }
2462 };
2463 
2464 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2465   assert(_workers != NULL, "Need parallel worker threads.");
2466   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2467 
2468   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2469 
2470   // We need to reset the concurrency level before each
2471   // proxy task execution, so that the termination protocol
2472   // and overflow handling in CMTask::do_marking_step() knows
2473   // how many workers to wait for.
2474   _cm->set_concurrency(_active_workers);
2475   _g1h->set_par_threads(_active_workers);
2476   _workers->run_task(&proc_task_proxy);
2477   _g1h->set_par_threads(0);
2478 }
2479 
2480 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2481   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2482   EnqueueTask& _enq_task;
2483 
2484 public:
2485   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2486     AbstractGangTask("Enqueue reference objects in parallel"),
2487     _enq_task(enq_task) { }
2488 
2489   virtual void work(uint worker_id) {
2490     _enq_task.work(worker_id);
2491   }
2492 };
2493 
2494 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2495   assert(_workers != NULL, "Need parallel worker threads.");
2496   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2497 
2498   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2499 
2500   // Not strictly necessary but...
2501   //
2502   // We need to reset the concurrency level before each
2503   // proxy task execution, so that the termination protocol
2504   // and overflow handling in CMTask::do_marking_step() knows
2505   // how many workers to wait for.
2506   _cm->set_concurrency(_active_workers);
2507   _g1h->set_par_threads(_active_workers);
2508   _workers->run_task(&enq_task_proxy);
2509   _g1h->set_par_threads(0);
2510 }
2511 
2512 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
2513   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2514 }
2515 
2516 // Helper class to get rid of some boilerplate code.
2517 class G1RemarkGCTraceTime : public GCTraceTime {
2518   static bool doit_and_prepend(bool doit) {
2519     if (doit) {
2520       gclog_or_tty->put(' ');
2521     }
2522     return doit;
2523   }
2524 
2525  public:
2526   G1RemarkGCTraceTime(const char* title, bool doit)
2527     : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
2528         G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
2529   }
2530 };
2531 
2532 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2533   if (has_overflown()) {
2534     // Skip processing the discovered references if we have
2535     // overflown the global marking stack. Reference objects
2536     // only get discovered once so it is OK to not
2537     // de-populate the discovered reference lists. We could have,
2538     // but the only benefit would be that, when marking restarts,
2539     // less reference objects are discovered.
2540     return;
2541   }
2542 
2543   ResourceMark rm;
2544   HandleMark   hm;
2545 
2546   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2547 
2548   // Is alive closure.
2549   G1CMIsAliveClosure g1_is_alive(g1h);
2550 
2551   // Inner scope to exclude the cleaning of the string and symbol
2552   // tables from the displayed time.
2553   {
2554     if (G1Log::finer()) {
2555       gclog_or_tty->put(' ');
2556     }
2557     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm(), concurrent_gc_id());
2558 
2559     ReferenceProcessor* rp = g1h->ref_processor_cm();
2560 
2561     // See the comment in G1CollectedHeap::ref_processing_init()
2562     // about how reference processing currently works in G1.
2563 
2564     // Set the soft reference policy
2565     rp->setup_policy(clear_all_soft_refs);
2566     assert(_markStack.isEmpty(), "mark stack should be empty");
2567 
2568     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2569     // in serial reference processing. Note these closures are also
2570     // used for serially processing (by the the current thread) the
2571     // JNI references during parallel reference processing.
2572     //
2573     // These closures do not need to synchronize with the worker
2574     // threads involved in parallel reference processing as these
2575     // instances are executed serially by the current thread (e.g.
2576     // reference processing is not multi-threaded and is thus
2577     // performed by the current thread instead of a gang worker).
2578     //
2579     // The gang tasks involved in parallel reference processing create
2580     // their own instances of these closures, which do their own
2581     // synchronization among themselves.
2582     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2583     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2584 
2585     // We need at least one active thread. If reference processing
2586     // is not multi-threaded we use the current (VMThread) thread,
2587     // otherwise we use the work gang from the G1CollectedHeap and
2588     // we utilize all the worker threads we can.
2589     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2590     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2591     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2592 
2593     // Parallel processing task executor.
2594     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2595                                               g1h->workers(), active_workers);
2596     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2597 
2598     // Set the concurrency level. The phase was already set prior to
2599     // executing the remark task.
2600     set_concurrency(active_workers);
2601 
2602     // Set the degree of MT processing here.  If the discovery was done MT,
2603     // the number of threads involved during discovery could differ from
2604     // the number of active workers.  This is OK as long as the discovered
2605     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2606     rp->set_active_mt_degree(active_workers);
2607 
2608     // Process the weak references.
2609     const ReferenceProcessorStats& stats =
2610         rp->process_discovered_references(&g1_is_alive,
2611                                           &g1_keep_alive,
2612                                           &g1_drain_mark_stack,
2613                                           executor,
2614                                           g1h->gc_timer_cm(),
2615                                           concurrent_gc_id());
2616     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2617 
2618     // The do_oop work routines of the keep_alive and drain_marking_stack
2619     // oop closures will set the has_overflown flag if we overflow the
2620     // global marking stack.
2621 
2622     assert(_markStack.overflow() || _markStack.isEmpty(),
2623             "mark stack should be empty (unless it overflowed)");
2624 
2625     if (_markStack.overflow()) {
2626       // This should have been done already when we tried to push an
2627       // entry on to the global mark stack. But let's do it again.
2628       set_has_overflown();
2629     }
2630 
2631     assert(rp->num_q() == active_workers, "why not");
2632 
2633     rp->enqueue_discovered_references(executor);
2634 
2635     rp->verify_no_references_recorded();
2636     assert(!rp->discovery_enabled(), "Post condition");
2637   }
2638 
2639   if (has_overflown()) {
2640     // We can not trust g1_is_alive if the marking stack overflowed
2641     return;
2642   }
2643 
2644   assert(_markStack.isEmpty(), "Marking should have completed");
2645 
2646   // Unload Klasses, String, Symbols, Code Cache, etc.
2647   {
2648     G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
2649 
2650     if (ClassUnloadingWithConcurrentMark) {
2651       bool purged_classes;
2652 
2653       {
2654         G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
2655         purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
2656       }
2657 
2658       {
2659         G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
2660         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2661       }
2662     }
2663 
2664     if (G1StringDedup::is_enabled()) {
2665       G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
2666       G1StringDedup::unlink(&g1_is_alive);
2667     }
2668   }
2669 }
2670 
2671 void ConcurrentMark::swapMarkBitMaps() {
2672   CMBitMapRO* temp = _prevMarkBitMap;
2673   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2674   _nextMarkBitMap  = (CMBitMap*)  temp;
2675 }
2676 
2677 class CMObjectClosure;
2678 
2679 // Closure for iterating over objects, currently only used for
2680 // processing SATB buffers.
2681 class CMObjectClosure : public ObjectClosure {
2682 private:
2683   CMTask* _task;
2684 
2685 public:
2686   void do_object(oop obj) {
2687     _task->deal_with_reference(obj);
2688   }
2689 
2690   CMObjectClosure(CMTask* task) : _task(task) { }
2691 };
2692 
2693 class G1RemarkThreadsClosure : public ThreadClosure {
2694   CMObjectClosure _cm_obj;
2695   G1CMOopClosure _cm_cl;
2696   MarkingCodeBlobClosure _code_cl;
2697   int _thread_parity;
2698   bool _is_par;
2699 
2700  public:
2701   G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
2702     _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2703     _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
2704 
2705   void do_thread(Thread* thread) {
2706     if (thread->is_Java_thread()) {
2707       if (thread->claim_oops_do(_is_par, _thread_parity)) {
2708         JavaThread* jt = (JavaThread*)thread;
2709 
2710         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2711         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2712         // * Alive if on the stack of an executing method
2713         // * Weakly reachable otherwise
2714         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2715         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2716         jt->nmethods_do(&_code_cl);
2717 
2718         jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
2719       }
2720     } else if (thread->is_VM_thread()) {
2721       if (thread->claim_oops_do(_is_par, _thread_parity)) {
2722         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
2723       }
2724     }
2725   }
2726 };
2727 
2728 class CMRemarkTask: public AbstractGangTask {
2729 private:
2730   ConcurrentMark* _cm;
2731   bool            _is_serial;
2732 public:
2733   void work(uint worker_id) {
2734     // Since all available tasks are actually started, we should
2735     // only proceed if we're supposed to be active.
2736     if (worker_id < _cm->active_tasks()) {
2737       CMTask* task = _cm->task(worker_id);
2738       task->record_start_time();
2739       {
2740         ResourceMark rm;
2741         HandleMark hm;
2742 
2743         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
2744         Threads::threads_do(&threads_f);
2745       }
2746 
2747       do {
2748         task->do_marking_step(1000000000.0 /* something very large */,
2749                               true         /* do_termination       */,
2750                               _is_serial);
2751       } while (task->has_aborted() && !_cm->has_overflown());
2752       // If we overflow, then we do not want to restart. We instead
2753       // want to abort remark and do concurrent marking again.
2754       task->record_end_time();
2755     }
2756   }
2757 
2758   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2759     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2760     _cm->terminator()->reset_for_reuse(active_workers);
2761   }
2762 };
2763 
2764 void ConcurrentMark::checkpointRootsFinalWork() {
2765   ResourceMark rm;
2766   HandleMark   hm;
2767   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2768 
2769   G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
2770 
2771   g1h->ensure_parsability(false);
2772 
2773   if (G1CollectedHeap::use_parallel_gc_threads()) {
2774     G1CollectedHeap::StrongRootsScope srs(g1h);
2775     // this is remark, so we'll use up all active threads
2776     uint active_workers = g1h->workers()->active_workers();
2777     if (active_workers == 0) {
2778       assert(active_workers > 0, "Should have been set earlier");
2779       active_workers = (uint) ParallelGCThreads;
2780       g1h->workers()->set_active_workers(active_workers);
2781     }
2782     set_concurrency_and_phase(active_workers, false /* concurrent */);
2783     // Leave _parallel_marking_threads at it's
2784     // value originally calculated in the ConcurrentMark
2785     // constructor and pass values of the active workers
2786     // through the gang in the task.
2787 
2788     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2789     // We will start all available threads, even if we decide that the
2790     // active_workers will be fewer. The extra ones will just bail out
2791     // immediately.
2792     g1h->set_par_threads(active_workers);
2793     g1h->workers()->run_task(&remarkTask);
2794     g1h->set_par_threads(0);
2795   } else {
2796     G1CollectedHeap::StrongRootsScope srs(g1h);
2797     uint active_workers = 1;
2798     set_concurrency_and_phase(active_workers, false /* concurrent */);
2799 
2800     // Note - if there's no work gang then the VMThread will be
2801     // the thread to execute the remark - serially. We have
2802     // to pass true for the is_serial parameter so that
2803     // CMTask::do_marking_step() doesn't enter the sync
2804     // barriers in the event of an overflow. Doing so will
2805     // cause an assert that the current thread is not a
2806     // concurrent GC thread.
2807     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2808     remarkTask.work(0);
2809   }
2810   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2811   guarantee(has_overflown() ||
2812             satb_mq_set.completed_buffers_num() == 0,
2813             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2814                     BOOL_TO_STR(has_overflown()),
2815                     satb_mq_set.completed_buffers_num()));
2816 
2817   print_stats();
2818 }
2819 
2820 #ifndef PRODUCT
2821 
2822 class PrintReachableOopClosure: public OopClosure {
2823 private:
2824   G1CollectedHeap* _g1h;
2825   outputStream*    _out;
2826   VerifyOption     _vo;
2827   bool             _all;
2828 
2829 public:
2830   PrintReachableOopClosure(outputStream* out,
2831                            VerifyOption  vo,
2832                            bool          all) :
2833     _g1h(G1CollectedHeap::heap()),
2834     _out(out), _vo(vo), _all(all) { }
2835 
2836   void do_oop(narrowOop* p) { do_oop_work(p); }
2837   void do_oop(      oop* p) { do_oop_work(p); }
2838 
2839   template <class T> void do_oop_work(T* p) {
2840     oop         obj = oopDesc::load_decode_heap_oop(p);
2841     const char* str = NULL;
2842     const char* str2 = "";
2843 
2844     if (obj == NULL) {
2845       str = "";
2846     } else if (!_g1h->is_in_g1_reserved(obj)) {
2847       str = " O";
2848     } else {
2849       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2850       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2851       bool marked = _g1h->is_marked(obj, _vo);
2852 
2853       if (over_tams) {
2854         str = " >";
2855         if (marked) {
2856           str2 = " AND MARKED";
2857         }
2858       } else if (marked) {
2859         str = " M";
2860       } else {
2861         str = " NOT";
2862       }
2863     }
2864 
2865     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2866                    p2i(p), p2i((void*) obj), str, str2);
2867   }
2868 };
2869 
2870 class PrintReachableObjectClosure : public ObjectClosure {
2871 private:
2872   G1CollectedHeap* _g1h;
2873   outputStream*    _out;
2874   VerifyOption     _vo;
2875   bool             _all;
2876   HeapRegion*      _hr;
2877 
2878 public:
2879   PrintReachableObjectClosure(outputStream* out,
2880                               VerifyOption  vo,
2881                               bool          all,
2882                               HeapRegion*   hr) :
2883     _g1h(G1CollectedHeap::heap()),
2884     _out(out), _vo(vo), _all(all), _hr(hr) { }
2885 
2886   void do_object(oop o) {
2887     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2888     bool marked = _g1h->is_marked(o, _vo);
2889     bool print_it = _all || over_tams || marked;
2890 
2891     if (print_it) {
2892       _out->print_cr(" "PTR_FORMAT"%s",
2893                      p2i((void *)o), (over_tams) ? " >" : (marked) ? " M" : "");
2894       PrintReachableOopClosure oopCl(_out, _vo, _all);
2895       o->oop_iterate_no_header(&oopCl);
2896     }
2897   }
2898 };
2899 
2900 class PrintReachableRegionClosure : public HeapRegionClosure {
2901 private:
2902   G1CollectedHeap* _g1h;
2903   outputStream*    _out;
2904   VerifyOption     _vo;
2905   bool             _all;
2906 
2907 public:
2908   bool doHeapRegion(HeapRegion* hr) {
2909     HeapWord* b = hr->bottom();
2910     HeapWord* e = hr->end();
2911     HeapWord* t = hr->top();
2912     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2913     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2914                    "TAMS: " PTR_FORMAT, p2i(b), p2i(e), p2i(t), p2i(p));
2915     _out->cr();
2916 
2917     HeapWord* from = b;
2918     HeapWord* to   = t;
2919 
2920     if (to > from) {
2921       _out->print_cr("Objects in [" PTR_FORMAT ", " PTR_FORMAT "]", p2i(from), p2i(to));
2922       _out->cr();
2923       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2924       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2925       _out->cr();
2926     }
2927 
2928     return false;
2929   }
2930 
2931   PrintReachableRegionClosure(outputStream* out,
2932                               VerifyOption  vo,
2933                               bool          all) :
2934     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2935 };
2936 
2937 void ConcurrentMark::print_reachable(const char* str,
2938                                      VerifyOption vo,
2939                                      bool all) {
2940   gclog_or_tty->cr();
2941   gclog_or_tty->print_cr("== Doing heap dump... ");
2942 
2943   if (G1PrintReachableBaseFile == NULL) {
2944     gclog_or_tty->print_cr("  #### error: no base file defined");
2945     return;
2946   }
2947 
2948   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2949       (JVM_MAXPATHLEN - 1)) {
2950     gclog_or_tty->print_cr("  #### error: file name too long");
2951     return;
2952   }
2953 
2954   char file_name[JVM_MAXPATHLEN];
2955   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2956   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2957 
2958   fileStream fout(file_name);
2959   if (!fout.is_open()) {
2960     gclog_or_tty->print_cr("  #### error: could not open file");
2961     return;
2962   }
2963 
2964   outputStream* out = &fout;
2965   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2966   out->cr();
2967 
2968   out->print_cr("--- ITERATING OVER REGIONS");
2969   out->cr();
2970   PrintReachableRegionClosure rcl(out, vo, all);
2971   _g1h->heap_region_iterate(&rcl);
2972   out->cr();
2973 
2974   gclog_or_tty->print_cr("  done");
2975   gclog_or_tty->flush();
2976 }
2977 
2978 #endif // PRODUCT
2979 
2980 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2981   // Note we are overriding the read-only view of the prev map here, via
2982   // the cast.
2983   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2984 }
2985 
2986 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2987   _nextMarkBitMap->clearRange(mr);
2988 }
2989 
2990 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2991   clearRangePrevBitmap(mr);
2992   clearRangeNextBitmap(mr);
2993 }
2994 
2995 HeapRegion*
2996 ConcurrentMark::claim_region(uint worker_id) {
2997   // "checkpoint" the finger
2998   HeapWord* finger = _finger;
2999 
3000   // _heap_end will not change underneath our feet; it only changes at
3001   // yield points.
3002   while (finger < _heap_end) {
3003     assert(_g1h->is_in_g1_reserved(finger), "invariant");
3004 
3005     // Note on how this code handles humongous regions. In the
3006     // normal case the finger will reach the start of a "starts
3007     // humongous" (SH) region. Its end will either be the end of the
3008     // last "continues humongous" (CH) region in the sequence, or the
3009     // standard end of the SH region (if the SH is the only region in
3010     // the sequence). That way claim_region() will skip over the CH
3011     // regions. However, there is a subtle race between a CM thread
3012     // executing this method and a mutator thread doing a humongous
3013     // object allocation. The two are not mutually exclusive as the CM
3014     // thread does not need to hold the Heap_lock when it gets
3015     // here. So there is a chance that claim_region() will come across
3016     // a free region that's in the progress of becoming a SH or a CH
3017     // region. In the former case, it will either
3018     //   a) Miss the update to the region's end, in which case it will
3019     //      visit every subsequent CH region, will find their bitmaps
3020     //      empty, and do nothing, or
3021     //   b) Will observe the update of the region's end (in which case
3022     //      it will skip the subsequent CH regions).
3023     // If it comes across a region that suddenly becomes CH, the
3024     // scenario will be similar to b). So, the race between
3025     // claim_region() and a humongous object allocation might force us
3026     // to do a bit of unnecessary work (due to some unnecessary bitmap
3027     // iterations) but it should not introduce and correctness issues.
3028     HeapRegion* curr_region = _g1h->heap_region_containing_raw(finger);
3029 
3030     // Above heap_region_containing_raw may return NULL as we always scan claim
3031     // until the end of the heap. In this case, just jump to the next region.
3032     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
3033 
3034     // Is the gap between reading the finger and doing the CAS too long?
3035     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
3036     if (res == finger && curr_region != NULL) {
3037       // we succeeded
3038       HeapWord*   bottom        = curr_region->bottom();
3039       HeapWord*   limit         = curr_region->next_top_at_mark_start();
3040 
3041       if (verbose_low()) {
3042         gclog_or_tty->print_cr("[%u] curr_region = "PTR_FORMAT" "
3043                                "["PTR_FORMAT", "PTR_FORMAT"), "
3044                                "limit = "PTR_FORMAT,
3045                                worker_id, p2i(curr_region), p2i(bottom), p2i(end), p2i(limit));
3046       }
3047 
3048       // notice that _finger == end cannot be guaranteed here since,
3049       // someone else might have moved the finger even further
3050       assert(_finger >= end, "the finger should have moved forward");
3051 
3052       if (verbose_low()) {
3053         gclog_or_tty->print_cr("[%u] we were successful with region = "
3054                                PTR_FORMAT, worker_id, p2i(curr_region));
3055       }
3056 
3057       if (limit > bottom) {
3058         if (verbose_low()) {
3059           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is not empty, "
3060                                  "returning it ", worker_id, p2i(curr_region));
3061         }
3062         return curr_region;
3063       } else {
3064         assert(limit == bottom,
3065                "the region limit should be at bottom");
3066         if (verbose_low()) {
3067           gclog_or_tty->print_cr("[%u] region "PTR_FORMAT" is empty, "
3068                                  "returning NULL", worker_id, p2i(curr_region));
3069         }
3070         // we return NULL and the caller should try calling
3071         // claim_region() again.
3072         return NULL;
3073       }
3074     } else {
3075       assert(_finger > finger, "the finger should have moved forward");
3076       if (verbose_low()) {
3077         if (curr_region == NULL) {
3078           gclog_or_tty->print_cr("[%u] found uncommitted region, moving finger, "
3079                                  "global finger = "PTR_FORMAT", "
3080                                  "our finger = "PTR_FORMAT,
3081                                  worker_id, p2i(_finger), p2i(finger));
3082         } else {
3083           gclog_or_tty->print_cr("[%u] somebody else moved the finger, "
3084                                  "global finger = "PTR_FORMAT", "
3085                                  "our finger = "PTR_FORMAT,
3086                                  worker_id, p2i(_finger), p2i(finger));
3087         }
3088       }
3089 
3090       // read it again
3091       finger = _finger;
3092     }
3093   }
3094 
3095   return NULL;
3096 }
3097 
3098 #ifndef PRODUCT
3099 enum VerifyNoCSetOopsPhase {
3100   VerifyNoCSetOopsStack,
3101   VerifyNoCSetOopsQueues,
3102   VerifyNoCSetOopsSATBCompleted,
3103   VerifyNoCSetOopsSATBThread
3104 };
3105 
3106 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
3107 private:
3108   G1CollectedHeap* _g1h;
3109   VerifyNoCSetOopsPhase _phase;
3110   int _info;
3111 
3112   const char* phase_str() {
3113     switch (_phase) {
3114     case VerifyNoCSetOopsStack:         return "Stack";
3115     case VerifyNoCSetOopsQueues:        return "Queue";
3116     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
3117     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
3118     default:                            ShouldNotReachHere();
3119     }
3120     return NULL;
3121   }
3122 
3123   void do_object_work(oop obj) {
3124     guarantee(!_g1h->obj_in_cs(obj),
3125               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
3126                       p2i((void*) obj), phase_str(), _info));
3127   }
3128 
3129 public:
3130   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
3131 
3132   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
3133     _phase = phase;
3134     _info = info;
3135   }
3136 
3137   virtual void do_oop(oop* p) {
3138     oop obj = oopDesc::load_decode_heap_oop(p);
3139     do_object_work(obj);
3140   }
3141 
3142   virtual void do_oop(narrowOop* p) {
3143     // We should not come across narrow oops while scanning marking
3144     // stacks and SATB buffers.
3145     ShouldNotReachHere();
3146   }
3147 
3148   virtual void do_object(oop obj) {
3149     do_object_work(obj);
3150   }
3151 };
3152 
3153 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
3154                                          bool verify_enqueued_buffers,
3155                                          bool verify_thread_buffers,
3156                                          bool verify_fingers) {
3157   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
3158   if (!G1CollectedHeap::heap()->mark_in_progress()) {
3159     return;
3160   }
3161 
3162   VerifyNoCSetOopsClosure cl;
3163 
3164   if (verify_stacks) {
3165     // Verify entries on the global mark stack
3166     cl.set_phase(VerifyNoCSetOopsStack);
3167     _markStack.oops_do(&cl);
3168 
3169     // Verify entries on the task queues
3170     for (uint i = 0; i < _max_worker_id; i += 1) {
3171       cl.set_phase(VerifyNoCSetOopsQueues, i);
3172       CMTaskQueue* queue = _task_queues->queue(i);
3173       queue->oops_do(&cl);
3174     }
3175   }
3176 
3177   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
3178 
3179   // Verify entries on the enqueued SATB buffers
3180   if (verify_enqueued_buffers) {
3181     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
3182     satb_qs.iterate_completed_buffers_read_only(&cl);
3183   }
3184 
3185   // Verify entries on the per-thread SATB buffers
3186   if (verify_thread_buffers) {
3187     cl.set_phase(VerifyNoCSetOopsSATBThread);
3188     satb_qs.iterate_thread_buffers_read_only(&cl);
3189   }
3190 
3191   if (verify_fingers) {
3192     // Verify the global finger
3193     HeapWord* global_finger = finger();
3194     if (global_finger != NULL && global_finger < _heap_end) {
3195       // The global finger always points to a heap region boundary. We
3196       // use heap_region_containing_raw() to get the containing region
3197       // given that the global finger could be pointing to a free region
3198       // which subsequently becomes continues humongous. If that
3199       // happens, heap_region_containing() will return the bottom of the
3200       // corresponding starts humongous region and the check below will
3201       // not hold any more.
3202       // Since we always iterate over all regions, we might get a NULL HeapRegion
3203       // here.
3204       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
3205       guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
3206                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
3207                         p2i(global_finger), HR_FORMAT_PARAMS(global_hr)));
3208     }
3209 
3210     // Verify the task fingers
3211     assert(parallel_marking_threads() <= _max_worker_id, "sanity");
3212     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
3213       CMTask* task = _tasks[i];
3214       HeapWord* task_finger = task->finger();
3215       if (task_finger != NULL && task_finger < _heap_end) {
3216         // See above note on the global finger verification.
3217         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
3218         guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
3219                   !task_hr->in_collection_set(),
3220                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
3221                           p2i(task_finger), HR_FORMAT_PARAMS(task_hr)));
3222       }
3223     }
3224   }
3225 }
3226 #endif // PRODUCT
3227 
3228 // Aggregate the counting data that was constructed concurrently
3229 // with marking.
3230 class AggregateCountDataHRClosure: public HeapRegionClosure {
3231   G1CollectedHeap* _g1h;
3232   ConcurrentMark* _cm;
3233   CardTableModRefBS* _ct_bs;
3234   BitMap* _cm_card_bm;
3235   uint _max_worker_id;
3236 
3237  public:
3238   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
3239                               BitMap* cm_card_bm,
3240                               uint max_worker_id) :
3241     _g1h(g1h), _cm(g1h->concurrent_mark()),
3242     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
3243     _cm_card_bm(cm_card_bm), _max_worker_id(max_worker_id) { }
3244 
3245   bool doHeapRegion(HeapRegion* hr) {
3246     if (hr->continuesHumongous()) {
3247       // We will ignore these here and process them when their
3248       // associated "starts humongous" region is processed.
3249       // Note that we cannot rely on their associated
3250       // "starts humongous" region to have their bit set to 1
3251       // since, due to the region chunking in the parallel region
3252       // iteration, a "continues humongous" region might be visited
3253       // before its associated "starts humongous".
3254       return false;
3255     }
3256 
3257     HeapWord* start = hr->bottom();
3258     HeapWord* limit = hr->next_top_at_mark_start();
3259     HeapWord* end = hr->end();
3260 
3261     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
3262            err_msg("Preconditions not met - "
3263                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
3264                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
3265                    p2i(start), p2i(limit), p2i(hr->top()), p2i(hr->end())));
3266 
3267     assert(hr->next_marked_bytes() == 0, "Precondition");
3268 
3269     if (start == limit) {
3270       // NTAMS of this region has not been set so nothing to do.
3271       return false;
3272     }
3273 
3274     // 'start' should be in the heap.
3275     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
3276     // 'end' *may* be just beyond the end of the heap (if hr is the last region)
3277     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
3278 
3279     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
3280     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
3281     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
3282 
3283     // If ntams is not card aligned then we bump card bitmap index
3284     // for limit so that we get the all the cards spanned by
3285     // the object ending at ntams.
3286     // Note: if this is the last region in the heap then ntams
3287     // could be actually just beyond the end of the the heap;
3288     // limit_idx will then  correspond to a (non-existent) card
3289     // that is also outside the heap.
3290     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
3291       limit_idx += 1;
3292     }
3293 
3294     assert(limit_idx <= end_idx, "or else use atomics");
3295 
3296     // Aggregate the "stripe" in the count data associated with hr.
3297     uint hrs_index = hr->hrs_index();
3298     size_t marked_bytes = 0;
3299 
3300     for (uint i = 0; i < _max_worker_id; i += 1) {
3301       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
3302       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
3303 
3304       // Fetch the marked_bytes in this region for task i and
3305       // add it to the running total for this region.
3306       marked_bytes += marked_bytes_array[hrs_index];
3307 
3308       // Now union the bitmaps[0,max_worker_id)[start_idx..limit_idx)
3309       // into the global card bitmap.
3310       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3311 
3312       while (scan_idx < limit_idx) {
3313         assert(task_card_bm->at(scan_idx) == true, "should be");
3314         _cm_card_bm->set_bit(scan_idx);
3315         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3316 
3317         // BitMap::get_next_one_offset() can handle the case when
3318         // its left_offset parameter is greater than its right_offset
3319         // parameter. It does, however, have an early exit if
3320         // left_offset == right_offset. So let's limit the value
3321         // passed in for left offset here.
3322         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3323         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3324       }
3325     }
3326 
3327     // Update the marked bytes for this region.
3328     hr->add_to_marked_bytes(marked_bytes);
3329 
3330     // Next heap region
3331     return false;
3332   }
3333 };
3334 
3335 class G1AggregateCountDataTask: public AbstractGangTask {
3336 protected:
3337   G1CollectedHeap* _g1h;
3338   ConcurrentMark* _cm;
3339   BitMap* _cm_card_bm;
3340   uint _max_worker_id;
3341   int _active_workers;
3342 
3343 public:
3344   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3345                            ConcurrentMark* cm,
3346                            BitMap* cm_card_bm,
3347                            uint max_worker_id,
3348                            int n_workers) :
3349     AbstractGangTask("Count Aggregation"),
3350     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3351     _max_worker_id(max_worker_id),
3352     _active_workers(n_workers) { }
3353 
3354   void work(uint worker_id) {
3355     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_worker_id);
3356 
3357     if (G1CollectedHeap::use_parallel_gc_threads()) {
3358       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3359                                             _active_workers,
3360                                             HeapRegion::AggregateCountClaimValue);
3361     } else {
3362       _g1h->heap_region_iterate(&cl);
3363     }
3364   }
3365 };
3366 
3367 
3368 void ConcurrentMark::aggregate_count_data() {
3369   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3370                         _g1h->workers()->active_workers() :
3371                         1);
3372 
3373   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3374                                            _max_worker_id, n_workers);
3375 
3376   if (G1CollectedHeap::use_parallel_gc_threads()) {
3377     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3378            "sanity check");
3379     _g1h->set_par_threads(n_workers);
3380     _g1h->workers()->run_task(&g1_par_agg_task);
3381     _g1h->set_par_threads(0);
3382 
3383     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3384            "sanity check");
3385     _g1h->reset_heap_region_claim_values();
3386   } else {
3387     g1_par_agg_task.work(0);
3388   }
3389 }
3390 
3391 // Clear the per-worker arrays used to store the per-region counting data
3392 void ConcurrentMark::clear_all_count_data() {
3393   // Clear the global card bitmap - it will be filled during
3394   // liveness count aggregation (during remark) and the
3395   // final counting task.
3396   _card_bm.clear();
3397 
3398   // Clear the global region bitmap - it will be filled as part
3399   // of the final counting task.
3400   _region_bm.clear();
3401 
3402   uint max_regions = _g1h->max_regions();
3403   assert(_max_worker_id > 0, "uninitialized");
3404 
3405   for (uint i = 0; i < _max_worker_id; i += 1) {
3406     BitMap* task_card_bm = count_card_bitmap_for(i);
3407     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3408 
3409     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3410     assert(marked_bytes_array != NULL, "uninitialized");
3411 
3412     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3413     task_card_bm->clear();
3414   }
3415 }
3416 
3417 void ConcurrentMark::print_stats() {
3418   if (verbose_stats()) {
3419     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3420     for (size_t i = 0; i < _active_tasks; ++i) {
3421       _tasks[i]->print_stats();
3422       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3423     }
3424   }
3425 }
3426 
3427 // abandon current marking iteration due to a Full GC
3428 void ConcurrentMark::abort() {
3429   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
3430   // concurrent bitmap clearing.
3431   _nextMarkBitMap->clearAll();
3432 
3433   // Note we cannot clear the previous marking bitmap here
3434   // since VerifyDuringGC verifies the objects marked during
3435   // a full GC against the previous bitmap.
3436 
3437   // Clear the liveness counting data
3438   clear_all_count_data();
3439   // Empty mark stack
3440   reset_marking_state();
3441   for (uint i = 0; i < _max_worker_id; ++i) {
3442     _tasks[i]->clear_region_fields();
3443   }
3444   _first_overflow_barrier_sync.abort();
3445   _second_overflow_barrier_sync.abort();
3446   const GCId& gc_id = _g1h->gc_tracer_cm()->gc_id();
3447   if (!gc_id.is_undefined()) {
3448     // We can do multiple full GCs before ConcurrentMarkThread::run() gets a chance
3449     // to detect that it was aborted. Only keep track of the first GC id that we aborted.
3450     _aborted_gc_id = gc_id;
3451    }
3452   _has_aborted = true;
3453 
3454   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3455   satb_mq_set.abandon_partial_marking();
3456   // This can be called either during or outside marking, we'll read
3457   // the expected_active value from the SATB queue set.
3458   satb_mq_set.set_active_all_threads(
3459                                  false, /* new active value */
3460                                  satb_mq_set.is_active() /* expected_active */);
3461 
3462   _g1h->trace_heap_after_concurrent_cycle();
3463   _g1h->register_concurrent_cycle_end();
3464 }
3465 
3466 const GCId& ConcurrentMark::concurrent_gc_id() {
3467   if (has_aborted()) {
3468     return _aborted_gc_id;
3469   }
3470   return _g1h->gc_tracer_cm()->gc_id();
3471 }
3472 
3473 static void print_ms_time_info(const char* prefix, const char* name,
3474                                NumberSeq& ns) {
3475   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3476                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3477   if (ns.num() > 0) {
3478     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3479                            prefix, ns.sd(), ns.maximum());
3480   }
3481 }
3482 
3483 void ConcurrentMark::print_summary_info() {
3484   gclog_or_tty->print_cr(" Concurrent marking:");
3485   print_ms_time_info("  ", "init marks", _init_times);
3486   print_ms_time_info("  ", "remarks", _remark_times);
3487   {
3488     print_ms_time_info("     ", "final marks", _remark_mark_times);
3489     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3490 
3491   }
3492   print_ms_time_info("  ", "cleanups", _cleanup_times);
3493   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3494                          _total_counting_time,
3495                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3496                           (double)_cleanup_times.num()
3497                          : 0.0));
3498   if (G1ScrubRemSets) {
3499     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3500                            _total_rs_scrub_time,
3501                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3502                             (double)_cleanup_times.num()
3503                            : 0.0));
3504   }
3505   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3506                          (_init_times.sum() + _remark_times.sum() +
3507                           _cleanup_times.sum())/1000.0);
3508   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3509                 "(%8.2f s marking).",
3510                 cmThread()->vtime_accum(),
3511                 cmThread()->vtime_mark_accum());
3512 }
3513 
3514 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3515   if (use_parallel_marking_threads()) {
3516     _parallel_workers->print_worker_threads_on(st);
3517   }
3518 }
3519 
3520 void ConcurrentMark::print_on_error(outputStream* st) const {
3521   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
3522       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
3523   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
3524   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
3525 }
3526 
3527 // We take a break if someone is trying to stop the world.
3528 bool ConcurrentMark::do_yield_check(uint worker_id) {
3529   if (SuspendibleThreadSet::should_yield()) {
3530     if (worker_id == 0) {
3531       _g1h->g1_policy()->record_concurrent_pause();
3532     }
3533     SuspendibleThreadSet::yield();
3534     return true;
3535   } else {
3536     return false;
3537   }
3538 }
3539 
3540 bool ConcurrentMark::containing_card_is_marked(void* p) {
3541   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3542   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3543 }
3544 
3545 bool ConcurrentMark::containing_cards_are_marked(void* start,
3546                                                  void* last) {
3547   return containing_card_is_marked(start) &&
3548          containing_card_is_marked(last);
3549 }
3550 
3551 #ifndef PRODUCT
3552 // for debugging purposes
3553 void ConcurrentMark::print_finger() {
3554   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3555                          p2i(_heap_start), p2i(_heap_end), p2i(_finger));
3556   for (uint i = 0; i < _max_worker_id; ++i) {
3557     gclog_or_tty->print("   %u: " PTR_FORMAT, i, p2i(_tasks[i]->finger()));
3558   }
3559   gclog_or_tty->cr();
3560 }
3561 #endif
3562 
3563 void CMTask::scan_object(oop obj) {
3564   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3565 
3566   if (_cm->verbose_high()) {
3567     gclog_or_tty->print_cr("[%u] we're scanning object "PTR_FORMAT,
3568                            _worker_id, p2i((void*) obj));
3569   }
3570 
3571   size_t obj_size = obj->size();
3572   _words_scanned += obj_size;
3573 
3574   obj->oop_iterate(_cm_oop_closure);
3575   statsOnly( ++_objs_scanned );
3576   check_limits();
3577 }
3578 
3579 // Closure for iteration over bitmaps
3580 class CMBitMapClosure : public BitMapClosure {
3581 private:
3582   // the bitmap that is being iterated over
3583   CMBitMap*                   _nextMarkBitMap;
3584   ConcurrentMark*             _cm;
3585   CMTask*                     _task;
3586 
3587 public:
3588   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3589     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3590 
3591   bool do_bit(size_t offset) {
3592     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3593     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3594     assert( addr < _cm->finger(), "invariant");
3595 
3596     statsOnly( _task->increase_objs_found_on_bitmap() );
3597     assert(addr >= _task->finger(), "invariant");
3598 
3599     // We move that task's local finger along.
3600     _task->move_finger_to(addr);
3601 
3602     _task->scan_object(oop(addr));
3603     // we only partially drain the local queue and global stack
3604     _task->drain_local_queue(true);
3605     _task->drain_global_stack(true);
3606 
3607     // if the has_aborted flag has been raised, we need to bail out of
3608     // the iteration
3609     return !_task->has_aborted();
3610   }
3611 };
3612 
3613 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3614                                ConcurrentMark* cm,
3615                                CMTask* task)
3616   : _g1h(g1h), _cm(cm), _task(task) {
3617   assert(_ref_processor == NULL, "should be initialized to NULL");
3618 
3619   if (G1UseConcMarkReferenceProcessing) {
3620     _ref_processor = g1h->ref_processor_cm();
3621     assert(_ref_processor != NULL, "should not be NULL");
3622   }
3623 }
3624 
3625 void CMTask::setup_for_region(HeapRegion* hr) {
3626   assert(hr != NULL,
3627         "claim_region() should have filtered out NULL regions");
3628   assert(!hr->continuesHumongous(),
3629         "claim_region() should have filtered out continues humongous regions");
3630 
3631   if (_cm->verbose_low()) {
3632     gclog_or_tty->print_cr("[%u] setting up for region "PTR_FORMAT,
3633                            _worker_id, p2i(hr));
3634   }
3635 
3636   _curr_region  = hr;
3637   _finger       = hr->bottom();
3638   update_region_limit();
3639 }
3640 
3641 void CMTask::update_region_limit() {
3642   HeapRegion* hr            = _curr_region;
3643   HeapWord* bottom          = hr->bottom();
3644   HeapWord* limit           = hr->next_top_at_mark_start();
3645 
3646   if (limit == bottom) {
3647     if (_cm->verbose_low()) {
3648       gclog_or_tty->print_cr("[%u] found an empty region "
3649                              "["PTR_FORMAT", "PTR_FORMAT")",
3650                              _worker_id, p2i(bottom), p2i(limit));
3651     }
3652     // The region was collected underneath our feet.
3653     // We set the finger to bottom to ensure that the bitmap
3654     // iteration that will follow this will not do anything.
3655     // (this is not a condition that holds when we set the region up,
3656     // as the region is not supposed to be empty in the first place)
3657     _finger = bottom;
3658   } else if (limit >= _region_limit) {
3659     assert(limit >= _finger, "peace of mind");
3660   } else {
3661     assert(limit < _region_limit, "only way to get here");
3662     // This can happen under some pretty unusual circumstances.  An
3663     // evacuation pause empties the region underneath our feet (NTAMS
3664     // at bottom). We then do some allocation in the region (NTAMS
3665     // stays at bottom), followed by the region being used as a GC
3666     // alloc region (NTAMS will move to top() and the objects
3667     // originally below it will be grayed). All objects now marked in
3668     // the region are explicitly grayed, if below the global finger,
3669     // and we do not need in fact to scan anything else. So, we simply
3670     // set _finger to be limit to ensure that the bitmap iteration
3671     // doesn't do anything.
3672     _finger = limit;
3673   }
3674 
3675   _region_limit = limit;
3676 }
3677 
3678 void CMTask::giveup_current_region() {
3679   assert(_curr_region != NULL, "invariant");
3680   if (_cm->verbose_low()) {
3681     gclog_or_tty->print_cr("[%u] giving up region "PTR_FORMAT,
3682                            _worker_id, p2i(_curr_region));
3683   }
3684   clear_region_fields();
3685 }
3686 
3687 void CMTask::clear_region_fields() {
3688   // Values for these three fields that indicate that we're not
3689   // holding on to a region.
3690   _curr_region   = NULL;
3691   _finger        = NULL;
3692   _region_limit  = NULL;
3693 }
3694 
3695 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3696   if (cm_oop_closure == NULL) {
3697     assert(_cm_oop_closure != NULL, "invariant");
3698   } else {
3699     assert(_cm_oop_closure == NULL, "invariant");
3700   }
3701   _cm_oop_closure = cm_oop_closure;
3702 }
3703 
3704 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3705   guarantee(nextMarkBitMap != NULL, "invariant");
3706 
3707   if (_cm->verbose_low()) {
3708     gclog_or_tty->print_cr("[%u] resetting", _worker_id);
3709   }
3710 
3711   _nextMarkBitMap                = nextMarkBitMap;
3712   clear_region_fields();
3713 
3714   _calls                         = 0;
3715   _elapsed_time_ms               = 0.0;
3716   _termination_time_ms           = 0.0;
3717   _termination_start_time_ms     = 0.0;
3718 
3719 #if _MARKING_STATS_
3720   _local_pushes                  = 0;
3721   _local_pops                    = 0;
3722   _local_max_size                = 0;
3723   _objs_scanned                  = 0;
3724   _global_pushes                 = 0;
3725   _global_pops                   = 0;
3726   _global_max_size               = 0;
3727   _global_transfers_to           = 0;
3728   _global_transfers_from         = 0;
3729   _regions_claimed               = 0;
3730   _objs_found_on_bitmap          = 0;
3731   _satb_buffers_processed        = 0;
3732   _steal_attempts                = 0;
3733   _steals                        = 0;
3734   _aborted                       = 0;
3735   _aborted_overflow              = 0;
3736   _aborted_cm_aborted            = 0;
3737   _aborted_yield                 = 0;
3738   _aborted_timed_out             = 0;
3739   _aborted_satb                  = 0;
3740   _aborted_termination           = 0;
3741 #endif // _MARKING_STATS_
3742 }
3743 
3744 bool CMTask::should_exit_termination() {
3745   regular_clock_call();
3746   // This is called when we are in the termination protocol. We should
3747   // quit if, for some reason, this task wants to abort or the global
3748   // stack is not empty (this means that we can get work from it).
3749   return !_cm->mark_stack_empty() || has_aborted();
3750 }
3751 
3752 void CMTask::reached_limit() {
3753   assert(_words_scanned >= _words_scanned_limit ||
3754          _refs_reached >= _refs_reached_limit ,
3755          "shouldn't have been called otherwise");
3756   regular_clock_call();
3757 }
3758 
3759 void CMTask::regular_clock_call() {
3760   if (has_aborted()) return;
3761 
3762   // First, we need to recalculate the words scanned and refs reached
3763   // limits for the next clock call.
3764   recalculate_limits();
3765 
3766   // During the regular clock call we do the following
3767 
3768   // (1) If an overflow has been flagged, then we abort.
3769   if (_cm->has_overflown()) {
3770     set_has_aborted();
3771     return;
3772   }
3773 
3774   // If we are not concurrent (i.e. we're doing remark) we don't need
3775   // to check anything else. The other steps are only needed during
3776   // the concurrent marking phase.
3777   if (!concurrent()) return;
3778 
3779   // (2) If marking has been aborted for Full GC, then we also abort.
3780   if (_cm->has_aborted()) {
3781     set_has_aborted();
3782     statsOnly( ++_aborted_cm_aborted );
3783     return;
3784   }
3785 
3786   double curr_time_ms = os::elapsedVTime() * 1000.0;
3787 
3788   // (3) If marking stats are enabled, then we update the step history.
3789 #if _MARKING_STATS_
3790   if (_words_scanned >= _words_scanned_limit) {
3791     ++_clock_due_to_scanning;
3792   }
3793   if (_refs_reached >= _refs_reached_limit) {
3794     ++_clock_due_to_marking;
3795   }
3796 
3797   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3798   _interval_start_time_ms = curr_time_ms;
3799   _all_clock_intervals_ms.add(last_interval_ms);
3800 
3801   if (_cm->verbose_medium()) {
3802       gclog_or_tty->print_cr("[%u] regular clock, interval = %1.2lfms, "
3803                         "scanned = %d%s, refs reached = %d%s",
3804                         _worker_id, last_interval_ms,
3805                         _words_scanned,
3806                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3807                         _refs_reached,
3808                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3809   }
3810 #endif // _MARKING_STATS_
3811 
3812   // (4) We check whether we should yield. If we have to, then we abort.
3813   if (SuspendibleThreadSet::should_yield()) {
3814     // We should yield. To do this we abort the task. The caller is
3815     // responsible for yielding.
3816     set_has_aborted();
3817     statsOnly( ++_aborted_yield );
3818     return;
3819   }
3820 
3821   // (5) We check whether we've reached our time quota. If we have,
3822   // then we abort.
3823   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3824   if (elapsed_time_ms > _time_target_ms) {
3825     set_has_aborted();
3826     _has_timed_out = true;
3827     statsOnly( ++_aborted_timed_out );
3828     return;
3829   }
3830 
3831   // (6) Finally, we check whether there are enough completed STAB
3832   // buffers available for processing. If there are, we abort.
3833   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3834   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3835     if (_cm->verbose_low()) {
3836       gclog_or_tty->print_cr("[%u] aborting to deal with pending SATB buffers",
3837                              _worker_id);
3838     }
3839     // we do need to process SATB buffers, we'll abort and restart
3840     // the marking task to do so
3841     set_has_aborted();
3842     statsOnly( ++_aborted_satb );
3843     return;
3844   }
3845 }
3846 
3847 void CMTask::recalculate_limits() {
3848   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3849   _words_scanned_limit      = _real_words_scanned_limit;
3850 
3851   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3852   _refs_reached_limit       = _real_refs_reached_limit;
3853 }
3854 
3855 void CMTask::decrease_limits() {
3856   // This is called when we believe that we're going to do an infrequent
3857   // operation which will increase the per byte scanned cost (i.e. move
3858   // entries to/from the global stack). It basically tries to decrease the
3859   // scanning limit so that the clock is called earlier.
3860 
3861   if (_cm->verbose_medium()) {
3862     gclog_or_tty->print_cr("[%u] decreasing limits", _worker_id);
3863   }
3864 
3865   _words_scanned_limit = _real_words_scanned_limit -
3866     3 * words_scanned_period / 4;
3867   _refs_reached_limit  = _real_refs_reached_limit -
3868     3 * refs_reached_period / 4;
3869 }
3870 
3871 void CMTask::move_entries_to_global_stack() {
3872   // local array where we'll store the entries that will be popped
3873   // from the local queue
3874   oop buffer[global_stack_transfer_size];
3875 
3876   int n = 0;
3877   oop obj;
3878   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3879     buffer[n] = obj;
3880     ++n;
3881   }
3882 
3883   if (n > 0) {
3884     // we popped at least one entry from the local queue
3885 
3886     statsOnly( ++_global_transfers_to; _local_pops += n );
3887 
3888     if (!_cm->mark_stack_push(buffer, n)) {
3889       if (_cm->verbose_low()) {
3890         gclog_or_tty->print_cr("[%u] aborting due to global stack overflow",
3891                                _worker_id);
3892       }
3893       set_has_aborted();
3894     } else {
3895       // the transfer was successful
3896 
3897       if (_cm->verbose_medium()) {
3898         gclog_or_tty->print_cr("[%u] pushed %d entries to the global stack",
3899                                _worker_id, n);
3900       }
3901       statsOnly( int tmp_size = _cm->mark_stack_size();
3902                  if (tmp_size > _global_max_size) {
3903                    _global_max_size = tmp_size;
3904                  }
3905                  _global_pushes += n );
3906     }
3907   }
3908 
3909   // this operation was quite expensive, so decrease the limits
3910   decrease_limits();
3911 }
3912 
3913 void CMTask::get_entries_from_global_stack() {
3914   // local array where we'll store the entries that will be popped
3915   // from the global stack.
3916   oop buffer[global_stack_transfer_size];
3917   int n;
3918   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3919   assert(n <= global_stack_transfer_size,
3920          "we should not pop more than the given limit");
3921   if (n > 0) {
3922     // yes, we did actually pop at least one entry
3923 
3924     statsOnly( ++_global_transfers_from; _global_pops += n );
3925     if (_cm->verbose_medium()) {
3926       gclog_or_tty->print_cr("[%u] popped %d entries from the global stack",
3927                              _worker_id, n);
3928     }
3929     for (int i = 0; i < n; ++i) {
3930       bool success = _task_queue->push(buffer[i]);
3931       // We only call this when the local queue is empty or under a
3932       // given target limit. So, we do not expect this push to fail.
3933       assert(success, "invariant");
3934     }
3935 
3936     statsOnly( int tmp_size = _task_queue->size();
3937                if (tmp_size > _local_max_size) {
3938                  _local_max_size = tmp_size;
3939                }
3940                _local_pushes += n );
3941   }
3942 
3943   // this operation was quite expensive, so decrease the limits
3944   decrease_limits();
3945 }
3946 
3947 void CMTask::drain_local_queue(bool partially) {
3948   if (has_aborted()) return;
3949 
3950   // Decide what the target size is, depending whether we're going to
3951   // drain it partially (so that other tasks can steal if they run out
3952   // of things to do) or totally (at the very end).
3953   size_t target_size;
3954   if (partially) {
3955     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3956   } else {
3957     target_size = 0;
3958   }
3959 
3960   if (_task_queue->size() > target_size) {
3961     if (_cm->verbose_high()) {
3962       gclog_or_tty->print_cr("[%u] draining local queue, target size = " SIZE_FORMAT,
3963                              _worker_id, target_size);
3964     }
3965 
3966     oop obj;
3967     bool ret = _task_queue->pop_local(obj);
3968     while (ret) {
3969       statsOnly( ++_local_pops );
3970 
3971       if (_cm->verbose_high()) {
3972         gclog_or_tty->print_cr("[%u] popped "PTR_FORMAT, _worker_id,
3973                                p2i((void*) obj));
3974       }
3975 
3976       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3977       assert(!_g1h->is_on_master_free_list(
3978                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3979 
3980       scan_object(obj);
3981 
3982       if (_task_queue->size() <= target_size || has_aborted()) {
3983         ret = false;
3984       } else {
3985         ret = _task_queue->pop_local(obj);
3986       }
3987     }
3988 
3989     if (_cm->verbose_high()) {
3990       gclog_or_tty->print_cr("[%u] drained local queue, size = %u",
3991                              _worker_id, _task_queue->size());
3992     }
3993   }
3994 }
3995 
3996 void CMTask::drain_global_stack(bool partially) {
3997   if (has_aborted()) return;
3998 
3999   // We have a policy to drain the local queue before we attempt to
4000   // drain the global stack.
4001   assert(partially || _task_queue->size() == 0, "invariant");
4002 
4003   // Decide what the target size is, depending whether we're going to
4004   // drain it partially (so that other tasks can steal if they run out
4005   // of things to do) or totally (at the very end).  Notice that,
4006   // because we move entries from the global stack in chunks or
4007   // because another task might be doing the same, we might in fact
4008   // drop below the target. But, this is not a problem.
4009   size_t target_size;
4010   if (partially) {
4011     target_size = _cm->partial_mark_stack_size_target();
4012   } else {
4013     target_size = 0;
4014   }
4015 
4016   if (_cm->mark_stack_size() > target_size) {
4017     if (_cm->verbose_low()) {
4018       gclog_or_tty->print_cr("[%u] draining global_stack, target size " SIZE_FORMAT,
4019                              _worker_id, target_size);
4020     }
4021 
4022     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
4023       get_entries_from_global_stack();
4024       drain_local_queue(partially);
4025     }
4026 
4027     if (_cm->verbose_low()) {
4028       gclog_or_tty->print_cr("[%u] drained global stack, size = " SIZE_FORMAT,
4029                              _worker_id, _cm->mark_stack_size());
4030     }
4031   }
4032 }
4033 
4034 // SATB Queue has several assumptions on whether to call the par or
4035 // non-par versions of the methods. this is why some of the code is
4036 // replicated. We should really get rid of the single-threaded version
4037 // of the code to simplify things.
4038 void CMTask::drain_satb_buffers() {
4039   if (has_aborted()) return;
4040 
4041   // We set this so that the regular clock knows that we're in the
4042   // middle of draining buffers and doesn't set the abort flag when it
4043   // notices that SATB buffers are available for draining. It'd be
4044   // very counter productive if it did that. :-)
4045   _draining_satb_buffers = true;
4046 
4047   CMObjectClosure oc(this);
4048   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
4049   if (G1CollectedHeap::use_parallel_gc_threads()) {
4050     satb_mq_set.set_par_closure(_worker_id, &oc);
4051   } else {
4052     satb_mq_set.set_closure(&oc);
4053   }
4054 
4055   // This keeps claiming and applying the closure to completed buffers
4056   // until we run out of buffers or we need to abort.
4057   if (G1CollectedHeap::use_parallel_gc_threads()) {
4058     while (!has_aborted() &&
4059            satb_mq_set.par_apply_closure_to_completed_buffer(_worker_id)) {
4060       if (_cm->verbose_medium()) {
4061         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4062       }
4063       statsOnly( ++_satb_buffers_processed );
4064       regular_clock_call();
4065     }
4066   } else {
4067     while (!has_aborted() &&
4068            satb_mq_set.apply_closure_to_completed_buffer()) {
4069       if (_cm->verbose_medium()) {
4070         gclog_or_tty->print_cr("[%u] processed an SATB buffer", _worker_id);
4071       }
4072       statsOnly( ++_satb_buffers_processed );
4073       regular_clock_call();
4074     }
4075   }
4076 
4077   _draining_satb_buffers = false;
4078 
4079   assert(has_aborted() ||
4080          concurrent() ||
4081          satb_mq_set.completed_buffers_num() == 0, "invariant");
4082 
4083   if (G1CollectedHeap::use_parallel_gc_threads()) {
4084     satb_mq_set.set_par_closure(_worker_id, NULL);
4085   } else {
4086     satb_mq_set.set_closure(NULL);
4087   }
4088 
4089   // again, this was a potentially expensive operation, decrease the
4090   // limits to get the regular clock call early
4091   decrease_limits();
4092 }
4093 
4094 void CMTask::print_stats() {
4095   gclog_or_tty->print_cr("Marking Stats, task = %u, calls = %d",
4096                          _worker_id, _calls);
4097   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
4098                          _elapsed_time_ms, _termination_time_ms);
4099   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4100                          _step_times_ms.num(), _step_times_ms.avg(),
4101                          _step_times_ms.sd());
4102   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
4103                          _step_times_ms.maximum(), _step_times_ms.sum());
4104 
4105 #if _MARKING_STATS_
4106   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
4107                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
4108                          _all_clock_intervals_ms.sd());
4109   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
4110                          _all_clock_intervals_ms.maximum(),
4111                          _all_clock_intervals_ms.sum());
4112   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
4113                          _clock_due_to_scanning, _clock_due_to_marking);
4114   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
4115                          _objs_scanned, _objs_found_on_bitmap);
4116   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
4117                          _local_pushes, _local_pops, _local_max_size);
4118   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
4119                          _global_pushes, _global_pops, _global_max_size);
4120   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
4121                          _global_transfers_to,_global_transfers_from);
4122   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
4123   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
4124   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
4125                          _steal_attempts, _steals);
4126   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
4127   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
4128                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
4129   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
4130                          _aborted_timed_out, _aborted_satb, _aborted_termination);
4131 #endif // _MARKING_STATS_
4132 }
4133 
4134 /*****************************************************************************
4135 
4136     The do_marking_step(time_target_ms, ...) method is the building
4137     block of the parallel marking framework. It can be called in parallel
4138     with other invocations of do_marking_step() on different tasks
4139     (but only one per task, obviously) and concurrently with the
4140     mutator threads, or during remark, hence it eliminates the need
4141     for two versions of the code. When called during remark, it will
4142     pick up from where the task left off during the concurrent marking
4143     phase. Interestingly, tasks are also claimable during evacuation
4144     pauses too, since do_marking_step() ensures that it aborts before
4145     it needs to yield.
4146 
4147     The data structures that it uses to do marking work are the
4148     following:
4149 
4150       (1) Marking Bitmap. If there are gray objects that appear only
4151       on the bitmap (this happens either when dealing with an overflow
4152       or when the initial marking phase has simply marked the roots
4153       and didn't push them on the stack), then tasks claim heap
4154       regions whose bitmap they then scan to find gray objects. A
4155       global finger indicates where the end of the last claimed region
4156       is. A local finger indicates how far into the region a task has
4157       scanned. The two fingers are used to determine how to gray an
4158       object (i.e. whether simply marking it is OK, as it will be
4159       visited by a task in the future, or whether it needs to be also
4160       pushed on a stack).
4161 
4162       (2) Local Queue. The local queue of the task which is accessed
4163       reasonably efficiently by the task. Other tasks can steal from
4164       it when they run out of work. Throughout the marking phase, a
4165       task attempts to keep its local queue short but not totally
4166       empty, so that entries are available for stealing by other
4167       tasks. Only when there is no more work, a task will totally
4168       drain its local queue.
4169 
4170       (3) Global Mark Stack. This handles local queue overflow. During
4171       marking only sets of entries are moved between it and the local
4172       queues, as access to it requires a mutex and more fine-grain
4173       interaction with it which might cause contention. If it
4174       overflows, then the marking phase should restart and iterate
4175       over the bitmap to identify gray objects. Throughout the marking
4176       phase, tasks attempt to keep the global mark stack at a small
4177       length but not totally empty, so that entries are available for
4178       popping by other tasks. Only when there is no more work, tasks
4179       will totally drain the global mark stack.
4180 
4181       (4) SATB Buffer Queue. This is where completed SATB buffers are
4182       made available. Buffers are regularly removed from this queue
4183       and scanned for roots, so that the queue doesn't get too
4184       long. During remark, all completed buffers are processed, as
4185       well as the filled in parts of any uncompleted buffers.
4186 
4187     The do_marking_step() method tries to abort when the time target
4188     has been reached. There are a few other cases when the
4189     do_marking_step() method also aborts:
4190 
4191       (1) When the marking phase has been aborted (after a Full GC).
4192 
4193       (2) When a global overflow (on the global stack) has been
4194       triggered. Before the task aborts, it will actually sync up with
4195       the other tasks to ensure that all the marking data structures
4196       (local queues, stacks, fingers etc.)  are re-initialized so that
4197       when do_marking_step() completes, the marking phase can
4198       immediately restart.
4199 
4200       (3) When enough completed SATB buffers are available. The
4201       do_marking_step() method only tries to drain SATB buffers right
4202       at the beginning. So, if enough buffers are available, the
4203       marking step aborts and the SATB buffers are processed at
4204       the beginning of the next invocation.
4205 
4206       (4) To yield. when we have to yield then we abort and yield
4207       right at the end of do_marking_step(). This saves us from a lot
4208       of hassle as, by yielding we might allow a Full GC. If this
4209       happens then objects will be compacted underneath our feet, the
4210       heap might shrink, etc. We save checking for this by just
4211       aborting and doing the yield right at the end.
4212 
4213     From the above it follows that the do_marking_step() method should
4214     be called in a loop (or, otherwise, regularly) until it completes.
4215 
4216     If a marking step completes without its has_aborted() flag being
4217     true, it means it has completed the current marking phase (and
4218     also all other marking tasks have done so and have all synced up).
4219 
4220     A method called regular_clock_call() is invoked "regularly" (in
4221     sub ms intervals) throughout marking. It is this clock method that
4222     checks all the abort conditions which were mentioned above and
4223     decides when the task should abort. A work-based scheme is used to
4224     trigger this clock method: when the number of object words the
4225     marking phase has scanned or the number of references the marking
4226     phase has visited reach a given limit. Additional invocations to
4227     the method clock have been planted in a few other strategic places
4228     too. The initial reason for the clock method was to avoid calling
4229     vtime too regularly, as it is quite expensive. So, once it was in
4230     place, it was natural to piggy-back all the other conditions on it
4231     too and not constantly check them throughout the code.
4232 
4233     If do_termination is true then do_marking_step will enter its
4234     termination protocol.
4235 
4236     The value of is_serial must be true when do_marking_step is being
4237     called serially (i.e. by the VMThread) and do_marking_step should
4238     skip any synchronization in the termination and overflow code.
4239     Examples include the serial remark code and the serial reference
4240     processing closures.
4241 
4242     The value of is_serial must be false when do_marking_step is
4243     being called by any of the worker threads in a work gang.
4244     Examples include the concurrent marking code (CMMarkingTask),
4245     the MT remark code, and the MT reference processing closures.
4246 
4247  *****************************************************************************/
4248 
4249 void CMTask::do_marking_step(double time_target_ms,
4250                              bool do_termination,
4251                              bool is_serial) {
4252   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4253   assert(concurrent() == _cm->concurrent(), "they should be the same");
4254 
4255   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4256   assert(_task_queues != NULL, "invariant");
4257   assert(_task_queue != NULL, "invariant");
4258   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
4259 
4260   assert(!_claimed,
4261          "only one thread should claim this task at any one time");
4262 
4263   // OK, this doesn't safeguard again all possible scenarios, as it is
4264   // possible for two threads to set the _claimed flag at the same
4265   // time. But it is only for debugging purposes anyway and it will
4266   // catch most problems.
4267   _claimed = true;
4268 
4269   _start_time_ms = os::elapsedVTime() * 1000.0;
4270   statsOnly( _interval_start_time_ms = _start_time_ms );
4271 
4272   // If do_stealing is true then do_marking_step will attempt to
4273   // steal work from the other CMTasks. It only makes sense to
4274   // enable stealing when the termination protocol is enabled
4275   // and do_marking_step() is not being called serially.
4276   bool do_stealing = do_termination && !is_serial;
4277 
4278   double diff_prediction_ms =
4279     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4280   _time_target_ms = time_target_ms - diff_prediction_ms;
4281 
4282   // set up the variables that are used in the work-based scheme to
4283   // call the regular clock method
4284   _words_scanned = 0;
4285   _refs_reached  = 0;
4286   recalculate_limits();
4287 
4288   // clear all flags
4289   clear_has_aborted();
4290   _has_timed_out = false;
4291   _draining_satb_buffers = false;
4292 
4293   ++_calls;
4294 
4295   if (_cm->verbose_low()) {
4296     gclog_or_tty->print_cr("[%u] >>>>>>>>>> START, call = %d, "
4297                            "target = %1.2lfms >>>>>>>>>>",
4298                            _worker_id, _calls, _time_target_ms);
4299   }
4300 
4301   // Set up the bitmap and oop closures. Anything that uses them is
4302   // eventually called from this method, so it is OK to allocate these
4303   // statically.
4304   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4305   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4306   set_cm_oop_closure(&cm_oop_closure);
4307 
4308   if (_cm->has_overflown()) {
4309     // This can happen if the mark stack overflows during a GC pause
4310     // and this task, after a yield point, restarts. We have to abort
4311     // as we need to get into the overflow protocol which happens
4312     // right at the end of this task.
4313     set_has_aborted();
4314   }
4315 
4316   // First drain any available SATB buffers. After this, we will not
4317   // look at SATB buffers before the next invocation of this method.
4318   // If enough completed SATB buffers are queued up, the regular clock
4319   // will abort this task so that it restarts.
4320   drain_satb_buffers();
4321   // ...then partially drain the local queue and the global stack
4322   drain_local_queue(true);
4323   drain_global_stack(true);
4324 
4325   do {
4326     if (!has_aborted() && _curr_region != NULL) {
4327       // This means that we're already holding on to a region.
4328       assert(_finger != NULL, "if region is not NULL, then the finger "
4329              "should not be NULL either");
4330 
4331       // We might have restarted this task after an evacuation pause
4332       // which might have evacuated the region we're holding on to
4333       // underneath our feet. Let's read its limit again to make sure
4334       // that we do not iterate over a region of the heap that
4335       // contains garbage (update_region_limit() will also move
4336       // _finger to the start of the region if it is found empty).
4337       update_region_limit();
4338       // We will start from _finger not from the start of the region,
4339       // as we might be restarting this task after aborting half-way
4340       // through scanning this region. In this case, _finger points to
4341       // the address where we last found a marked object. If this is a
4342       // fresh region, _finger points to start().
4343       MemRegion mr = MemRegion(_finger, _region_limit);
4344 
4345       if (_cm->verbose_low()) {
4346         gclog_or_tty->print_cr("[%u] we're scanning part "
4347                                "["PTR_FORMAT", "PTR_FORMAT") "
4348                                "of region "HR_FORMAT,
4349                                _worker_id, p2i(_finger), p2i(_region_limit),
4350                                HR_FORMAT_PARAMS(_curr_region));
4351       }
4352 
4353       assert(!_curr_region->isHumongous() || mr.start() == _curr_region->bottom(),
4354              "humongous regions should go around loop once only");
4355 
4356       // Some special cases:
4357       // If the memory region is empty, we can just give up the region.
4358       // If the current region is humongous then we only need to check
4359       // the bitmap for the bit associated with the start of the object,
4360       // scan the object if it's live, and give up the region.
4361       // Otherwise, let's iterate over the bitmap of the part of the region
4362       // that is left.
4363       // If the iteration is successful, give up the region.
4364       if (mr.is_empty()) {
4365         giveup_current_region();
4366         regular_clock_call();
4367       } else if (_curr_region->isHumongous() && mr.start() == _curr_region->bottom()) {
4368         if (_nextMarkBitMap->isMarked(mr.start())) {
4369           // The object is marked - apply the closure
4370           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
4371           bitmap_closure.do_bit(offset);
4372         }
4373         // Even if this task aborted while scanning the humongous object
4374         // we can (and should) give up the current region.
4375         giveup_current_region();
4376         regular_clock_call();
4377       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4378         giveup_current_region();
4379         regular_clock_call();
4380       } else {
4381         assert(has_aborted(), "currently the only way to do so");
4382         // The only way to abort the bitmap iteration is to return
4383         // false from the do_bit() method. However, inside the
4384         // do_bit() method we move the _finger to point to the
4385         // object currently being looked at. So, if we bail out, we
4386         // have definitely set _finger to something non-null.
4387         assert(_finger != NULL, "invariant");
4388 
4389         // Region iteration was actually aborted. So now _finger
4390         // points to the address of the object we last scanned. If we
4391         // leave it there, when we restart this task, we will rescan
4392         // the object. It is easy to avoid this. We move the finger by
4393         // enough to point to the next possible object header (the
4394         // bitmap knows by how much we need to move it as it knows its
4395         // granularity).
4396         assert(_finger < _region_limit, "invariant");
4397         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
4398         // Check if bitmap iteration was aborted while scanning the last object
4399         if (new_finger >= _region_limit) {
4400           giveup_current_region();
4401         } else {
4402           move_finger_to(new_finger);
4403         }
4404       }
4405     }
4406     // At this point we have either completed iterating over the
4407     // region we were holding on to, or we have aborted.
4408 
4409     // We then partially drain the local queue and the global stack.
4410     // (Do we really need this?)
4411     drain_local_queue(true);
4412     drain_global_stack(true);
4413 
4414     // Read the note on the claim_region() method on why it might
4415     // return NULL with potentially more regions available for
4416     // claiming and why we have to check out_of_regions() to determine
4417     // whether we're done or not.
4418     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4419       // We are going to try to claim a new region. We should have
4420       // given up on the previous one.
4421       // Separated the asserts so that we know which one fires.
4422       assert(_curr_region  == NULL, "invariant");
4423       assert(_finger       == NULL, "invariant");
4424       assert(_region_limit == NULL, "invariant");
4425       if (_cm->verbose_low()) {
4426         gclog_or_tty->print_cr("[%u] trying to claim a new region", _worker_id);
4427       }
4428       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
4429       if (claimed_region != NULL) {
4430         // Yes, we managed to claim one
4431         statsOnly( ++_regions_claimed );
4432 
4433         if (_cm->verbose_low()) {
4434           gclog_or_tty->print_cr("[%u] we successfully claimed "
4435                                  "region "PTR_FORMAT,
4436                                  _worker_id, p2i(claimed_region));
4437         }
4438 
4439         setup_for_region(claimed_region);
4440         assert(_curr_region == claimed_region, "invariant");
4441       }
4442       // It is important to call the regular clock here. It might take
4443       // a while to claim a region if, for example, we hit a large
4444       // block of empty regions. So we need to call the regular clock
4445       // method once round the loop to make sure it's called
4446       // frequently enough.
4447       regular_clock_call();
4448     }
4449 
4450     if (!has_aborted() && _curr_region == NULL) {
4451       assert(_cm->out_of_regions(),
4452              "at this point we should be out of regions");
4453     }
4454   } while ( _curr_region != NULL && !has_aborted());
4455 
4456   if (!has_aborted()) {
4457     // We cannot check whether the global stack is empty, since other
4458     // tasks might be pushing objects to it concurrently.
4459     assert(_cm->out_of_regions(),
4460            "at this point we should be out of regions");
4461 
4462     if (_cm->verbose_low()) {
4463       gclog_or_tty->print_cr("[%u] all regions claimed", _worker_id);
4464     }
4465 
4466     // Try to reduce the number of available SATB buffers so that
4467     // remark has less work to do.
4468     drain_satb_buffers();
4469   }
4470 
4471   // Since we've done everything else, we can now totally drain the
4472   // local queue and global stack.
4473   drain_local_queue(false);
4474   drain_global_stack(false);
4475 
4476   // Attempt at work stealing from other task's queues.
4477   if (do_stealing && !has_aborted()) {
4478     // We have not aborted. This means that we have finished all that
4479     // we could. Let's try to do some stealing...
4480 
4481     // We cannot check whether the global stack is empty, since other
4482     // tasks might be pushing objects to it concurrently.
4483     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4484            "only way to reach here");
4485 
4486     if (_cm->verbose_low()) {
4487       gclog_or_tty->print_cr("[%u] starting to steal", _worker_id);
4488     }
4489 
4490     while (!has_aborted()) {
4491       oop obj;
4492       statsOnly( ++_steal_attempts );
4493 
4494       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
4495         if (_cm->verbose_medium()) {
4496           gclog_or_tty->print_cr("[%u] stolen "PTR_FORMAT" successfully",
4497                                  _worker_id, p2i((void*) obj));
4498         }
4499 
4500         statsOnly( ++_steals );
4501 
4502         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4503                "any stolen object should be marked");
4504         scan_object(obj);
4505 
4506         // And since we're towards the end, let's totally drain the
4507         // local queue and global stack.
4508         drain_local_queue(false);
4509         drain_global_stack(false);
4510       } else {
4511         break;
4512       }
4513     }
4514   }
4515 
4516   // If we are about to wrap up and go into termination, check if we
4517   // should raise the overflow flag.
4518   if (do_termination && !has_aborted()) {
4519     if (_cm->force_overflow()->should_force()) {
4520       _cm->set_has_overflown();
4521       regular_clock_call();
4522     }
4523   }
4524 
4525   // We still haven't aborted. Now, let's try to get into the
4526   // termination protocol.
4527   if (do_termination && !has_aborted()) {
4528     // We cannot check whether the global stack is empty, since other
4529     // tasks might be concurrently pushing objects on it.
4530     // Separated the asserts so that we know which one fires.
4531     assert(_cm->out_of_regions(), "only way to reach here");
4532     assert(_task_queue->size() == 0, "only way to reach here");
4533 
4534     if (_cm->verbose_low()) {
4535       gclog_or_tty->print_cr("[%u] starting termination protocol", _worker_id);
4536     }
4537 
4538     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4539 
4540     // The CMTask class also extends the TerminatorTerminator class,
4541     // hence its should_exit_termination() method will also decide
4542     // whether to exit the termination protocol or not.
4543     bool finished = (is_serial ||
4544                      _cm->terminator()->offer_termination(this));
4545     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4546     _termination_time_ms +=
4547       termination_end_time_ms - _termination_start_time_ms;
4548 
4549     if (finished) {
4550       // We're all done.
4551 
4552       if (_worker_id == 0) {
4553         // let's allow task 0 to do this
4554         if (concurrent()) {
4555           assert(_cm->concurrent_marking_in_progress(), "invariant");
4556           // we need to set this to false before the next
4557           // safepoint. This way we ensure that the marking phase
4558           // doesn't observe any more heap expansions.
4559           _cm->clear_concurrent_marking_in_progress();
4560         }
4561       }
4562 
4563       // We can now guarantee that the global stack is empty, since
4564       // all other tasks have finished. We separated the guarantees so
4565       // that, if a condition is false, we can immediately find out
4566       // which one.
4567       guarantee(_cm->out_of_regions(), "only way to reach here");
4568       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4569       guarantee(_task_queue->size() == 0, "only way to reach here");
4570       guarantee(!_cm->has_overflown(), "only way to reach here");
4571       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4572 
4573       if (_cm->verbose_low()) {
4574         gclog_or_tty->print_cr("[%u] all tasks terminated", _worker_id);
4575       }
4576     } else {
4577       // Apparently there's more work to do. Let's abort this task. It
4578       // will restart it and we can hopefully find more things to do.
4579 
4580       if (_cm->verbose_low()) {
4581         gclog_or_tty->print_cr("[%u] apparently there is more work to do",
4582                                _worker_id);
4583       }
4584 
4585       set_has_aborted();
4586       statsOnly( ++_aborted_termination );
4587     }
4588   }
4589 
4590   // Mainly for debugging purposes to make sure that a pointer to the
4591   // closure which was statically allocated in this frame doesn't
4592   // escape it by accident.
4593   set_cm_oop_closure(NULL);
4594   double end_time_ms = os::elapsedVTime() * 1000.0;
4595   double elapsed_time_ms = end_time_ms - _start_time_ms;
4596   // Update the step history.
4597   _step_times_ms.add(elapsed_time_ms);
4598 
4599   if (has_aborted()) {
4600     // The task was aborted for some reason.
4601 
4602     statsOnly( ++_aborted );
4603 
4604     if (_has_timed_out) {
4605       double diff_ms = elapsed_time_ms - _time_target_ms;
4606       // Keep statistics of how well we did with respect to hitting
4607       // our target only if we actually timed out (if we aborted for
4608       // other reasons, then the results might get skewed).
4609       _marking_step_diffs_ms.add(diff_ms);
4610     }
4611 
4612     if (_cm->has_overflown()) {
4613       // This is the interesting one. We aborted because a global
4614       // overflow was raised. This means we have to restart the
4615       // marking phase and start iterating over regions. However, in
4616       // order to do this we have to make sure that all tasks stop
4617       // what they are doing and re-initialize in a safe manner. We
4618       // will achieve this with the use of two barrier sync points.
4619 
4620       if (_cm->verbose_low()) {
4621         gclog_or_tty->print_cr("[%u] detected overflow", _worker_id);
4622       }
4623 
4624       if (!is_serial) {
4625         // We only need to enter the sync barrier if being called
4626         // from a parallel context
4627         _cm->enter_first_sync_barrier(_worker_id);
4628 
4629         // When we exit this sync barrier we know that all tasks have
4630         // stopped doing marking work. So, it's now safe to
4631         // re-initialize our data structures. At the end of this method,
4632         // task 0 will clear the global data structures.
4633       }
4634 
4635       statsOnly( ++_aborted_overflow );
4636 
4637       // We clear the local state of this task...
4638       clear_region_fields();
4639 
4640       if (!is_serial) {
4641         // ...and enter the second barrier.
4642         _cm->enter_second_sync_barrier(_worker_id);
4643       }
4644       // At this point, if we're during the concurrent phase of
4645       // marking, everything has been re-initialized and we're
4646       // ready to restart.
4647     }
4648 
4649     if (_cm->verbose_low()) {
4650       gclog_or_tty->print_cr("[%u] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4651                              "elapsed = %1.2lfms <<<<<<<<<<",
4652                              _worker_id, _time_target_ms, elapsed_time_ms);
4653       if (_cm->has_aborted()) {
4654         gclog_or_tty->print_cr("[%u] ========== MARKING ABORTED ==========",
4655                                _worker_id);
4656       }
4657     }
4658   } else {
4659     if (_cm->verbose_low()) {
4660       gclog_or_tty->print_cr("[%u] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4661                              "elapsed = %1.2lfms <<<<<<<<<<",
4662                              _worker_id, _time_target_ms, elapsed_time_ms);
4663     }
4664   }
4665 
4666   _claimed = false;
4667 }
4668 
4669 CMTask::CMTask(uint worker_id,
4670                ConcurrentMark* cm,
4671                size_t* marked_bytes,
4672                BitMap* card_bm,
4673                CMTaskQueue* task_queue,
4674                CMTaskQueueSet* task_queues)
4675   : _g1h(G1CollectedHeap::heap()),
4676     _worker_id(worker_id), _cm(cm),
4677     _claimed(false),
4678     _nextMarkBitMap(NULL), _hash_seed(17),
4679     _task_queue(task_queue),
4680     _task_queues(task_queues),
4681     _cm_oop_closure(NULL),
4682     _marked_bytes_array(marked_bytes),
4683     _card_bm(card_bm) {
4684   guarantee(task_queue != NULL, "invariant");
4685   guarantee(task_queues != NULL, "invariant");
4686 
4687   statsOnly( _clock_due_to_scanning = 0;
4688              _clock_due_to_marking  = 0 );
4689 
4690   _marking_step_diffs_ms.add(0.5);
4691 }
4692 
4693 // These are formatting macros that are used below to ensure
4694 // consistent formatting. The *_H_* versions are used to format the
4695 // header for a particular value and they should be kept consistent
4696 // with the corresponding macro. Also note that most of the macros add
4697 // the necessary white space (as a prefix) which makes them a bit
4698 // easier to compose.
4699 
4700 // All the output lines are prefixed with this string to be able to
4701 // identify them easily in a large log file.
4702 #define G1PPRL_LINE_PREFIX            "###"
4703 
4704 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4705 #ifdef _LP64
4706 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4707 #else // _LP64
4708 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4709 #endif // _LP64
4710 
4711 // For per-region info
4712 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4713 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4714 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4715 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4716 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4717 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4718 
4719 // For summary info
4720 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4721 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4722 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4723 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4724 
4725 G1PrintRegionLivenessInfoClosure::
4726 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4727   : _out(out),
4728     _total_used_bytes(0), _total_capacity_bytes(0),
4729     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4730     _hum_used_bytes(0), _hum_capacity_bytes(0),
4731     _hum_prev_live_bytes(0), _hum_next_live_bytes(0),
4732     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
4733   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4734   MemRegion g1_reserved = g1h->g1_reserved();
4735   double now = os::elapsedTime();
4736 
4737   // Print the header of the output.
4738   _out->cr();
4739   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4740   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4741                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4742                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4743                  p2i(g1_reserved.start()), p2i(g1_reserved.end()),
4744                  HeapRegion::GrainBytes);
4745   _out->print_cr(G1PPRL_LINE_PREFIX);
4746   _out->print_cr(G1PPRL_LINE_PREFIX
4747                 G1PPRL_TYPE_H_FORMAT
4748                 G1PPRL_ADDR_BASE_H_FORMAT
4749                 G1PPRL_BYTE_H_FORMAT
4750                 G1PPRL_BYTE_H_FORMAT
4751                 G1PPRL_BYTE_H_FORMAT
4752                 G1PPRL_DOUBLE_H_FORMAT
4753                 G1PPRL_BYTE_H_FORMAT
4754                 G1PPRL_BYTE_H_FORMAT,
4755                 "type", "address-range",
4756                 "used", "prev-live", "next-live", "gc-eff",
4757                 "remset", "code-roots");
4758   _out->print_cr(G1PPRL_LINE_PREFIX
4759                 G1PPRL_TYPE_H_FORMAT
4760                 G1PPRL_ADDR_BASE_H_FORMAT
4761                 G1PPRL_BYTE_H_FORMAT
4762                 G1PPRL_BYTE_H_FORMAT
4763                 G1PPRL_BYTE_H_FORMAT
4764                 G1PPRL_DOUBLE_H_FORMAT
4765                 G1PPRL_BYTE_H_FORMAT
4766                 G1PPRL_BYTE_H_FORMAT,
4767                 "", "",
4768                 "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
4769                 "(bytes)", "(bytes)");
4770 }
4771 
4772 // It takes as a parameter a reference to one of the _hum_* fields, it
4773 // deduces the corresponding value for a region in a humongous region
4774 // series (either the region size, or what's left if the _hum_* field
4775 // is < the region size), and updates the _hum_* field accordingly.
4776 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4777   size_t bytes = 0;
4778   // The > 0 check is to deal with the prev and next live bytes which
4779   // could be 0.
4780   if (*hum_bytes > 0) {
4781     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4782     *hum_bytes -= bytes;
4783   }
4784   return bytes;
4785 }
4786 
4787 // It deduces the values for a region in a humongous region series
4788 // from the _hum_* fields and updates those accordingly. It assumes
4789 // that that _hum_* fields have already been set up from the "starts
4790 // humongous" region and we visit the regions in address order.
4791 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4792                                                      size_t* capacity_bytes,
4793                                                      size_t* prev_live_bytes,
4794                                                      size_t* next_live_bytes) {
4795   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4796   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4797   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4798   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4799   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4800 }
4801 
4802 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4803   const char* type = "";
4804   HeapWord* bottom       = r->bottom();
4805   HeapWord* end          = r->end();
4806   size_t capacity_bytes  = r->capacity();
4807   size_t used_bytes      = r->used();
4808   size_t prev_live_bytes = r->live_bytes();
4809   size_t next_live_bytes = r->next_live_bytes();
4810   double gc_eff          = r->gc_efficiency();
4811   size_t remset_bytes    = r->rem_set()->mem_size();
4812   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
4813 
4814   if (r->used() == 0) {
4815     type = "FREE";
4816   } else if (r->is_survivor()) {
4817     type = "SURV";
4818   } else if (r->is_young()) {
4819     type = "EDEN";
4820   } else if (r->startsHumongous()) {
4821     type = "HUMS";
4822 
4823     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4824            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4825            "they should have been zeroed after the last time we used them");
4826     // Set up the _hum_* fields.
4827     _hum_capacity_bytes  = capacity_bytes;
4828     _hum_used_bytes      = used_bytes;
4829     _hum_prev_live_bytes = prev_live_bytes;
4830     _hum_next_live_bytes = next_live_bytes;
4831     get_hum_bytes(&used_bytes, &capacity_bytes,
4832                   &prev_live_bytes, &next_live_bytes);
4833     end = bottom + HeapRegion::GrainWords;
4834   } else if (r->continuesHumongous()) {
4835     type = "HUMC";
4836     get_hum_bytes(&used_bytes, &capacity_bytes,
4837                   &prev_live_bytes, &next_live_bytes);
4838     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4839   } else {
4840     type = "OLD";
4841   }
4842 
4843   _total_used_bytes      += used_bytes;
4844   _total_capacity_bytes  += capacity_bytes;
4845   _total_prev_live_bytes += prev_live_bytes;
4846   _total_next_live_bytes += next_live_bytes;
4847   _total_remset_bytes    += remset_bytes;
4848   _total_strong_code_roots_bytes += strong_code_roots_bytes;
4849 
4850   // Print a line for this particular region.
4851   _out->print_cr(G1PPRL_LINE_PREFIX
4852                  G1PPRL_TYPE_FORMAT
4853                  G1PPRL_ADDR_BASE_FORMAT
4854                  G1PPRL_BYTE_FORMAT
4855                  G1PPRL_BYTE_FORMAT
4856                  G1PPRL_BYTE_FORMAT
4857                  G1PPRL_DOUBLE_FORMAT
4858                  G1PPRL_BYTE_FORMAT
4859                  G1PPRL_BYTE_FORMAT,
4860                  type, p2i(bottom), p2i(end),
4861                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
4862                  remset_bytes, strong_code_roots_bytes);
4863 
4864   return false;
4865 }
4866 
4867 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4868   // add static memory usages to remembered set sizes
4869   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
4870   // Print the footer of the output.
4871   _out->print_cr(G1PPRL_LINE_PREFIX);
4872   _out->print_cr(G1PPRL_LINE_PREFIX
4873                  " SUMMARY"
4874                  G1PPRL_SUM_MB_FORMAT("capacity")
4875                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4876                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4877                  G1PPRL_SUM_MB_PERC_FORMAT("next-live")
4878                  G1PPRL_SUM_MB_FORMAT("remset")
4879                  G1PPRL_SUM_MB_FORMAT("code-roots"),
4880                  bytes_to_mb(_total_capacity_bytes),
4881                  bytes_to_mb(_total_used_bytes),
4882                  perc(_total_used_bytes, _total_capacity_bytes),
4883                  bytes_to_mb(_total_prev_live_bytes),
4884                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4885                  bytes_to_mb(_total_next_live_bytes),
4886                  perc(_total_next_live_bytes, _total_capacity_bytes),
4887                  bytes_to_mb(_total_remset_bytes),
4888                  bytes_to_mb(_total_strong_code_roots_bytes));
4889   _out->cr();
4890 }