1 /*
   2  * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/metadataOnStackMark.hpp"
  27 #include "classfile/symbolTable.hpp"
  28 #include "code/codeCache.hpp"
  29 #include "gc/g1/concurrentMarkThread.inline.hpp"
  30 #include "gc/g1/g1CollectedHeap.inline.hpp"
  31 #include "gc/g1/g1CollectorPolicy.hpp"
  32 #include "gc/g1/g1CollectorState.hpp"
  33 #include "gc/g1/g1ConcurrentMark.inline.hpp"
  34 #include "gc/g1/g1HeapVerifier.hpp"
  35 #include "gc/g1/g1OopClosures.inline.hpp"
  36 #include "gc/g1/g1StringDedup.hpp"
  37 #include "gc/g1/heapRegion.inline.hpp"
  38 #include "gc/g1/heapRegionRemSet.hpp"
  39 #include "gc/g1/heapRegionSet.inline.hpp"
  40 #include "gc/g1/suspendibleThreadSet.hpp"
  41 #include "gc/shared/gcId.hpp"
  42 #include "gc/shared/gcTimer.hpp"
  43 #include "gc/shared/gcTrace.hpp"
  44 #include "gc/shared/gcTraceTime.inline.hpp"
  45 #include "gc/shared/genOopClosures.inline.hpp"
  46 #include "gc/shared/referencePolicy.hpp"
  47 #include "gc/shared/strongRootsScope.hpp"
  48 #include "gc/shared/taskqueue.inline.hpp"
  49 #include "gc/shared/vmGCOperations.hpp"
  50 #include "logging/log.hpp"
  51 #include "logging/logTag.hpp"
  52 #include "memory/allocation.hpp"
  53 #include "memory/resourceArea.hpp"
  54 #include "oops/oop.inline.hpp"
  55 #include "runtime/atomic.inline.hpp"
  56 #include "runtime/handles.inline.hpp"
  57 #include "runtime/java.hpp"
  58 #include "runtime/prefetch.inline.hpp"
  59 #include "services/memTracker.hpp"
  60 
  61 // Concurrent marking bit map wrapper
  62 
  63 G1CMBitMapRO::G1CMBitMapRO(int shifter) :
  64   _bm(),
  65   _shifter(shifter) {
  66   _bmStartWord = 0;
  67   _bmWordSize = 0;
  68 }
  69 
  70 HeapWord* G1CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
  71                                                  const HeapWord* limit) const {
  72   // First we must round addr *up* to a possible object boundary.
  73   addr = (HeapWord*)align_size_up((intptr_t)addr,
  74                                   HeapWordSize << _shifter);
  75   size_t addrOffset = heapWordToOffset(addr);
  76   assert(limit != NULL, "limit must not be NULL");
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 #ifndef PRODUCT
  87 bool G1CMBitMapRO::covers(MemRegion heap_rs) const {
  88   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
  89   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
  90          "size inconsistency");
  91   return _bmStartWord == (HeapWord*)(heap_rs.start()) &&
  92          _bmWordSize  == heap_rs.word_size();
  93 }
  94 #endif
  95 
  96 void G1CMBitMapRO::print_on_error(outputStream* st, const char* prefix) const {
  97   _bm.print_on_error(st, prefix);
  98 }
  99 
 100 size_t G1CMBitMap::compute_size(size_t heap_size) {
 101   return ReservedSpace::allocation_align_size_up(heap_size / mark_distance());
 102 }
 103 
 104 size_t G1CMBitMap::mark_distance() {
 105   return MinObjAlignmentInBytes * BitsPerByte;
 106 }
 107 
 108 void G1CMBitMap::initialize(MemRegion heap, G1RegionToSpaceMapper* storage) {
 109   _bmStartWord = heap.start();
 110   _bmWordSize = heap.word_size();
 111 
 112   _bm.set_map((BitMap::bm_word_t*) storage->reserved().start());
 113   _bm.set_size(_bmWordSize >> _shifter);
 114 
 115   storage->set_mapping_changed_listener(&_listener);
 116 }
 117 
 118 void G1CMBitMapMappingChangedListener::on_commit(uint start_region, size_t num_regions, bool zero_filled) {
 119   if (zero_filled) {
 120     return;
 121   }
 122   // We need to clear the bitmap on commit, removing any existing information.
 123   MemRegion mr(G1CollectedHeap::heap()->bottom_addr_for_region(start_region), num_regions * HeapRegion::GrainWords);
 124   _bm->clear_range(mr);
 125 }
 126 
 127 void G1CMBitMap::clear_range(MemRegion mr) {
 128   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 129   assert(!mr.is_empty(), "unexpected empty region");
 130   // convert address range into offset range
 131   _bm.at_put_range(heapWordToOffset(mr.start()),
 132                    heapWordToOffset(mr.end()), false);
 133 }
 134 
 135 G1CMMarkStack::G1CMMarkStack(G1ConcurrentMark* cm) :
 136   _base(NULL), _cm(cm)
 137 {}
 138 
 139 bool G1CMMarkStack::allocate(size_t capacity) {
 140   // allocate a stack of the requisite depth
 141   ReservedSpace rs(ReservedSpace::allocation_align_size_up(capacity * sizeof(oop)));
 142   if (!rs.is_reserved()) {
 143     log_warning(gc)("ConcurrentMark MarkStack allocation failure");
 144     return false;
 145   }
 146   MemTracker::record_virtual_memory_type((address)rs.base(), mtGC);
 147   if (!_virtual_space.initialize(rs, rs.size())) {
 148     log_warning(gc)("ConcurrentMark MarkStack backing store failure");
 149     // Release the virtual memory reserved for the marking stack
 150     rs.release();
 151     return false;
 152   }
 153   assert(_virtual_space.committed_size() == rs.size(),
 154          "Didn't reserve backing store for all of G1ConcurrentMark stack?");
 155   _base = (oop*) _virtual_space.low();
 156   setEmpty();
 157   _capacity = (jint) capacity;
 158   _saved_index = -1;
 159   _should_expand = false;
 160   return true;
 161 }
 162 
 163 void G1CMMarkStack::expand() {
 164   // Called, during remark, if we've overflown the marking stack during marking.
 165   assert(isEmpty(), "stack should been emptied while handling overflow");
 166   assert(_capacity <= (jint) MarkStackSizeMax, "stack bigger than permitted");
 167   // Clear expansion flag
 168   _should_expand = false;
 169   if (_capacity == (jint) MarkStackSizeMax) {
 170     log_trace(gc)("(benign) Can't expand marking stack capacity, at max size limit");
 171     return;
 172   }
 173   // Double capacity if possible
 174   jint new_capacity = MIN2(_capacity*2, (jint) MarkStackSizeMax);
 175   // Do not give up existing stack until we have managed to
 176   // get the double capacity that we desired.
 177   ReservedSpace rs(ReservedSpace::allocation_align_size_up(new_capacity *
 178                                                            sizeof(oop)));
 179   if (rs.is_reserved()) {
 180     // Release the backing store associated with old stack
 181     _virtual_space.release();
 182     // Reinitialize virtual space for new stack
 183     if (!_virtual_space.initialize(rs, rs.size())) {
 184       fatal("Not enough swap for expanded marking stack capacity");
 185     }
 186     _base = (oop*)(_virtual_space.low());
 187     _index = 0;
 188     _capacity = new_capacity;
 189   } else {
 190     // Failed to double capacity, continue;
 191     log_trace(gc)("(benign) Failed to expand marking stack capacity from " SIZE_FORMAT "K to " SIZE_FORMAT "K",
 192                   _capacity / K, new_capacity / K);
 193   }
 194 }
 195 
 196 void G1CMMarkStack::set_should_expand() {
 197   // If we're resetting the marking state because of an
 198   // marking stack overflow, record that we should, if
 199   // possible, expand the stack.
 200   _should_expand = _cm->has_overflown();
 201 }
 202 
 203 G1CMMarkStack::~G1CMMarkStack() {
 204   if (_base != NULL) {
 205     _base = NULL;
 206     _virtual_space.release();
 207   }
 208 }
 209 
 210 void G1CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 211   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 212   jint start = _index;
 213   jint next_index = start + n;
 214   if (next_index > _capacity) {
 215     _overflow = true;
 216     return;
 217   }
 218   // Otherwise.
 219   _index = next_index;
 220   for (int i = 0; i < n; i++) {
 221     int ind = start + i;
 222     assert(ind < _capacity, "By overflow test above.");
 223     _base[ind] = ptr_arr[i];
 224   }
 225 }
 226 
 227 bool G1CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 228   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 229   jint index = _index;
 230   if (index == 0) {
 231     *n = 0;
 232     return false;
 233   } else {
 234     int k = MIN2(max, index);
 235     jint  new_ind = index - k;
 236     for (int j = 0; j < k; j++) {
 237       ptr_arr[j] = _base[new_ind + j];
 238     }
 239     _index = new_ind;
 240     *n = k;
 241     return true;
 242   }
 243 }
 244 
 245 void G1CMMarkStack::note_start_of_gc() {
 246   assert(_saved_index == -1,
 247          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 248   _saved_index = _index;
 249 }
 250 
 251 void G1CMMarkStack::note_end_of_gc() {
 252   // This is intentionally a guarantee, instead of an assert. If we
 253   // accidentally add something to the mark stack during GC, it
 254   // will be a correctness issue so it's better if we crash. we'll
 255   // only check this once per GC anyway, so it won't be a performance
 256   // issue in any way.
 257   guarantee(_saved_index == _index,
 258             "saved index: %d index: %d", _saved_index, _index);
 259   _saved_index = -1;
 260 }
 261 
 262 G1CMRootRegions::G1CMRootRegions() :
 263   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 264   _should_abort(false),  _next_survivor(NULL) { }
 265 
 266 void G1CMRootRegions::init(G1CollectedHeap* g1h, G1ConcurrentMark* cm) {
 267   _young_list = g1h->young_list();
 268   _cm = cm;
 269 }
 270 
 271 void G1CMRootRegions::prepare_for_scan() {
 272   assert(!scan_in_progress(), "pre-condition");
 273 
 274   // Currently, only survivors can be root regions.
 275   assert(_next_survivor == NULL, "pre-condition");
 276   _next_survivor = _young_list->first_survivor_region();
 277   _scan_in_progress = (_next_survivor != NULL);
 278   _should_abort = false;
 279 }
 280 
 281 HeapRegion* G1CMRootRegions::claim_next() {
 282   if (_should_abort) {
 283     // If someone has set the should_abort flag, we return NULL to
 284     // force the caller to bail out of their loop.
 285     return NULL;
 286   }
 287 
 288   // Currently, only survivors can be root regions.
 289   HeapRegion* res = _next_survivor;
 290   if (res != NULL) {
 291     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 292     // Read it again in case it changed while we were waiting for the lock.
 293     res = _next_survivor;
 294     if (res != NULL) {
 295       if (res == _young_list->last_survivor_region()) {
 296         // We just claimed the last survivor so store NULL to indicate
 297         // that we're done.
 298         _next_survivor = NULL;
 299       } else {
 300         _next_survivor = res->get_next_young_region();
 301       }
 302     } else {
 303       // Someone else claimed the last survivor while we were trying
 304       // to take the lock so nothing else to do.
 305     }
 306   }
 307   assert(res == NULL || res->is_survivor(), "post-condition");
 308 
 309   return res;
 310 }
 311 
 312 void G1CMRootRegions::notify_scan_done() {
 313   MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 314   _scan_in_progress = false;
 315   RootRegionScan_lock->notify_all();
 316 }
 317 
 318 void G1CMRootRegions::cancel_scan() {
 319   notify_scan_done();
 320 }
 321 
 322 void G1CMRootRegions::scan_finished() {
 323   assert(scan_in_progress(), "pre-condition");
 324 
 325   // Currently, only survivors can be root regions.
 326   if (!_should_abort) {
 327     assert(_next_survivor == NULL, "we should have claimed all survivors");
 328   }
 329   _next_survivor = NULL;
 330 
 331   notify_scan_done();
 332 }
 333 
 334 bool G1CMRootRegions::wait_until_scan_finished() {
 335   if (!scan_in_progress()) return false;
 336 
 337   {
 338     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 339     while (scan_in_progress()) {
 340       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 341     }
 342   }
 343   return true;
 344 }
 345 
 346 uint G1ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 347   return MAX2((n_par_threads + 2) / 4, 1U);
 348 }
 349 
 350 G1ConcurrentMark::G1ConcurrentMark(G1CollectedHeap* g1h, G1RegionToSpaceMapper* prev_bitmap_storage, G1RegionToSpaceMapper* next_bitmap_storage) :
 351   _g1h(g1h),
 352   _markBitMap1(),
 353   _markBitMap2(),
 354   _parallel_marking_threads(0),
 355   _max_parallel_marking_threads(0),
 356   _sleep_factor(0.0),
 357   _marking_task_overhead(1.0),
 358   _cleanup_list("Cleanup List"),
 359   _region_live_bm(),
 360   _card_live_bm(),
 361 
 362   _prevMarkBitMap(&_markBitMap1),
 363   _nextMarkBitMap(&_markBitMap2),
 364 
 365   _markStack(this),
 366   // _finger set in set_non_marking_state
 367 
 368   _max_worker_id(ParallelGCThreads),
 369   // _active_tasks set in set_non_marking_state
 370   // _tasks set inside the constructor
 371   _task_queues(new G1CMTaskQueueSet((int) _max_worker_id)),
 372   _terminator(ParallelTaskTerminator((int) _max_worker_id, _task_queues)),
 373 
 374   _has_overflown(false),
 375   _concurrent(false),
 376   _has_aborted(false),
 377   _restart_for_overflow(false),
 378   _concurrent_marking_in_progress(false),
 379   _gc_timer_cm(new (ResourceObj::C_HEAP, mtGC) ConcurrentGCTimer()),
 380   _gc_tracer_cm(new (ResourceObj::C_HEAP, mtGC) G1OldTracer()),
 381 
 382   // _verbose_level set below
 383 
 384   _init_times(),
 385   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 386   _cleanup_times(),
 387   _total_counting_time(0.0),
 388   _total_rs_scrub_time(0.0),
 389 
 390   _parallel_workers(NULL),
 391 
 392   _completed_initialization(false) {
 393 
 394   _markBitMap1.initialize(g1h->reserved_region(), prev_bitmap_storage);
 395   _markBitMap2.initialize(g1h->reserved_region(), next_bitmap_storage);
 396 
 397   // Create & start a ConcurrentMark thread.
 398   _cmThread = new ConcurrentMarkThread(this);
 399   assert(cmThread() != NULL, "CM Thread should have been created");
 400   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 401   if (_cmThread->osthread() == NULL) {
 402       vm_shutdown_during_initialization("Could not create ConcurrentMarkThread");
 403   }
 404 
 405   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 406   assert(_markBitMap1.covers(g1h->reserved_region()), "_markBitMap1 inconsistency");
 407   assert(_markBitMap2.covers(g1h->reserved_region()), "_markBitMap2 inconsistency");
 408 
 409   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 410   satb_qs.set_buffer_size(G1SATBBufferSize);
 411 
 412   _root_regions.init(_g1h, this);
 413 
 414   if (ConcGCThreads > ParallelGCThreads) {
 415     log_warning(gc)("Can't have more ConcGCThreads (%u) than ParallelGCThreads (%u).",
 416                     ConcGCThreads, ParallelGCThreads);
 417     return;
 418   }
 419   if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 420     // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 421     // if both are set
 422     _sleep_factor             = 0.0;
 423     _marking_task_overhead    = 1.0;
 424   } else if (G1MarkingOverheadPercent > 0) {
 425     // We will calculate the number of parallel marking threads based
 426     // on a target overhead with respect to the soft real-time goal
 427     double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 428     double overall_cm_overhead =
 429       (double) MaxGCPauseMillis * marking_overhead /
 430       (double) GCPauseIntervalMillis;
 431     double cpu_ratio = 1.0 / (double) os::processor_count();
 432     double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 433     double marking_task_overhead =
 434       overall_cm_overhead / marking_thread_num *
 435                                               (double) os::processor_count();
 436     double sleep_factor =
 437                        (1.0 - marking_task_overhead) / marking_task_overhead;
 438 
 439     FLAG_SET_ERGO(uint, ConcGCThreads, (uint) marking_thread_num);
 440     _sleep_factor             = sleep_factor;
 441     _marking_task_overhead    = marking_task_overhead;
 442   } else {
 443     // Calculate the number of parallel marking threads by scaling
 444     // the number of parallel GC threads.
 445     uint marking_thread_num = scale_parallel_threads(ParallelGCThreads);
 446     FLAG_SET_ERGO(uint, ConcGCThreads, marking_thread_num);
 447     _sleep_factor             = 0.0;
 448     _marking_task_overhead    = 1.0;
 449   }
 450 
 451   assert(ConcGCThreads > 0, "Should have been set");
 452   _parallel_marking_threads = ConcGCThreads;
 453   _max_parallel_marking_threads = _parallel_marking_threads;
 454 
 455   _parallel_workers = new WorkGang("G1 Marker",
 456        _max_parallel_marking_threads, false, true);
 457   if (_parallel_workers == NULL) {
 458     vm_exit_during_initialization("Failed necessary allocation.");
 459   } else {
 460     _parallel_workers->initialize_workers();
 461   }
 462 
 463   if (FLAG_IS_DEFAULT(MarkStackSize)) {
 464     size_t mark_stack_size =
 465       MIN2(MarkStackSizeMax,
 466           MAX2(MarkStackSize, (size_t) (parallel_marking_threads() * TASKQUEUE_SIZE)));
 467     // Verify that the calculated value for MarkStackSize is in range.
 468     // It would be nice to use the private utility routine from Arguments.
 469     if (!(mark_stack_size >= 1 && mark_stack_size <= MarkStackSizeMax)) {
 470       log_warning(gc)("Invalid value calculated for MarkStackSize (" SIZE_FORMAT "): "
 471                       "must be between 1 and " SIZE_FORMAT,
 472                       mark_stack_size, MarkStackSizeMax);
 473       return;
 474     }
 475     FLAG_SET_ERGO(size_t, MarkStackSize, mark_stack_size);
 476   } else {
 477     // Verify MarkStackSize is in range.
 478     if (FLAG_IS_CMDLINE(MarkStackSize)) {
 479       if (FLAG_IS_DEFAULT(MarkStackSizeMax)) {
 480         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 481           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT "): "
 482                           "must be between 1 and " SIZE_FORMAT,
 483                           MarkStackSize, MarkStackSizeMax);
 484           return;
 485         }
 486       } else if (FLAG_IS_CMDLINE(MarkStackSizeMax)) {
 487         if (!(MarkStackSize >= 1 && MarkStackSize <= MarkStackSizeMax)) {
 488           log_warning(gc)("Invalid value specified for MarkStackSize (" SIZE_FORMAT ")"
 489                           " or for MarkStackSizeMax (" SIZE_FORMAT ")",
 490                           MarkStackSize, MarkStackSizeMax);
 491           return;
 492         }
 493       }
 494     }
 495   }
 496 
 497   if (!_markStack.allocate(MarkStackSize)) {
 498     log_warning(gc)("Failed to allocate CM marking stack");
 499     return;
 500   }
 501 
 502   allocate_internal_bitmaps();
 503 
 504   if (G1PretouchAuxiliaryMemory) {
 505     pretouch_internal_bitmaps();
 506   }
 507 
 508   _tasks = NEW_C_HEAP_ARRAY(G1CMTask*, _max_worker_id, mtGC);
 509   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_worker_id, mtGC);
 510 
 511   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 512   _active_tasks = _max_worker_id;
 513 
 514   for (uint i = 0; i < _max_worker_id; ++i) {
 515     G1CMTaskQueue* task_queue = new G1CMTaskQueue();
 516     task_queue->initialize();
 517     _task_queues->register_queue(i, task_queue);
 518 
 519     _tasks[i] = new G1CMTask(i, this, task_queue, _task_queues);
 520 
 521     _accum_task_vtime[i] = 0.0;
 522   }
 523 
 524   // Calculate the card number for the bottom of the heap. Used
 525   // in biasing indexes into the accounting card bitmaps.
 526   _heap_bottom_card_num =
 527     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 528                                 CardTableModRefBS::card_shift);
 529 
 530   // so that the call below can read a sensible value
 531   _heap_start = g1h->reserved_region().start();
 532   set_non_marking_state();
 533   _completed_initialization = true;
 534 }
 535 
 536 void G1ConcurrentMark::reset() {
 537   // Starting values for these two. This should be called in a STW
 538   // phase.
 539   MemRegion reserved = _g1h->g1_reserved();
 540   _heap_start = reserved.start();
 541   _heap_end   = reserved.end();
 542 
 543   // Separated the asserts so that we know which one fires.
 544   assert(_heap_start != NULL, "heap bounds should look ok");
 545   assert(_heap_end != NULL, "heap bounds should look ok");
 546   assert(_heap_start < _heap_end, "heap bounds should look ok");
 547 
 548   // Reset all the marking data structures and any necessary flags
 549   reset_marking_state();
 550 
 551   // We do reset all of them, since different phases will use
 552   // different number of active threads. So, it's easiest to have all
 553   // of them ready.
 554   for (uint i = 0; i < _max_worker_id; ++i) {
 555     _tasks[i]->reset(_nextMarkBitMap);
 556   }
 557 
 558   // we need this to make sure that the flag is on during the evac
 559   // pause with initial mark piggy-backed
 560   set_concurrent_marking_in_progress();
 561 }
 562 
 563 
 564 void G1ConcurrentMark::reset_marking_state(bool clear_overflow) {
 565   _markStack.set_should_expand();
 566   _markStack.setEmpty();        // Also clears the _markStack overflow flag
 567   if (clear_overflow) {
 568     clear_has_overflown();
 569   } else {
 570     assert(has_overflown(), "pre-condition");
 571   }
 572   _finger = _heap_start;
 573 
 574   for (uint i = 0; i < _max_worker_id; ++i) {
 575     G1CMTaskQueue* queue = _task_queues->queue(i);
 576     queue->set_empty();
 577   }
 578 }
 579 
 580 void G1ConcurrentMark::set_concurrency(uint active_tasks) {
 581   assert(active_tasks <= _max_worker_id, "we should not have more");
 582 
 583   _active_tasks = active_tasks;
 584   // Need to update the three data structures below according to the
 585   // number of active threads for this phase.
 586   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 587   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 588   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 589 }
 590 
 591 void G1ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 592   set_concurrency(active_tasks);
 593 
 594   _concurrent = concurrent;
 595   // We propagate this to all tasks, not just the active ones.
 596   for (uint i = 0; i < _max_worker_id; ++i)
 597     _tasks[i]->set_concurrent(concurrent);
 598 
 599   if (concurrent) {
 600     set_concurrent_marking_in_progress();
 601   } else {
 602     // We currently assume that the concurrent flag has been set to
 603     // false before we start remark. At this point we should also be
 604     // in a STW phase.
 605     assert(!concurrent_marking_in_progress(), "invariant");
 606     assert(out_of_regions(),
 607            "only way to get here: _finger: " PTR_FORMAT ", _heap_end: " PTR_FORMAT,
 608            p2i(_finger), p2i(_heap_end));
 609   }
 610 }
 611 
 612 void G1ConcurrentMark::set_non_marking_state() {
 613   // We set the global marking state to some default values when we're
 614   // not doing marking.
 615   reset_marking_state();
 616   _active_tasks = 0;
 617   clear_concurrent_marking_in_progress();
 618 }
 619 
 620 G1ConcurrentMark::~G1ConcurrentMark() {
 621   // The G1ConcurrentMark instance is never freed.
 622   ShouldNotReachHere();
 623 }
 624 
 625 class G1ClearBitMapTask : public AbstractGangTask {
 626   // Heap region closure used for clearing the given mark bitmap.
 627   class G1ClearBitmapHRClosure : public HeapRegionClosure {
 628   private:
 629     G1CMBitMap* _bitmap;
 630     G1ConcurrentMark* _cm;
 631   public:
 632     G1ClearBitmapHRClosure(G1CMBitMap* bitmap, G1ConcurrentMark* cm) : HeapRegionClosure(), _cm(cm), _bitmap(bitmap) {
 633     }
 634 
 635     virtual bool doHeapRegion(HeapRegion* r) {
 636       size_t const chunk_size_in_words = M / HeapWordSize;
 637 
 638       HeapWord* cur = r->bottom();
 639       HeapWord* const end = r->end();
 640 
 641       while (cur < end) {
 642         MemRegion mr(cur, MIN2(cur + chunk_size_in_words, end));
 643         _bitmap->clear_range(mr);
 644 
 645         cur += chunk_size_in_words;
 646 
 647         // Abort iteration if after yielding the marking has been aborted.
 648         if (_cm != NULL && _cm->do_yield_check() && _cm->has_aborted()) {
 649           return true;
 650         }
 651         // Repeat the asserts from before the start of the closure. We will do them
 652         // as asserts here to minimize their overhead on the product. However, we
 653         // will have them as guarantees at the beginning / end of the bitmap
 654         // clearing to get some checking in the product.
 655         assert(_cm == NULL || _cm->cmThread()->during_cycle(), "invariant");
 656         assert(_cm == NULL || !G1CollectedHeap::heap()->collector_state()->mark_in_progress(), "invariant");
 657       }
 658       assert(cur == end, "Must have completed iteration over the bitmap for region %u.", r->hrm_index());
 659 
 660       return false;
 661     }
 662   };
 663 
 664   G1ClearBitmapHRClosure _cl;
 665   HeapRegionClaimer _hr_claimer;
 666   bool _suspendible; // If the task is suspendible, workers must join the STS.
 667 
 668 public:
 669   G1ClearBitMapTask(G1CMBitMap* bitmap, G1ConcurrentMark* cm, uint n_workers, bool suspendible) :
 670     AbstractGangTask("Parallel Clear Bitmap Task"),
 671     _cl(bitmap, suspendible ? cm : NULL),
 672     _hr_claimer(n_workers),
 673     _suspendible(suspendible)
 674   { }
 675 
 676   void work(uint worker_id) {
 677     SuspendibleThreadSetJoiner sts_join(_suspendible);
 678     G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true);
 679   }
 680 
 681   bool is_complete() {
 682     return _cl.complete();
 683   }
 684 };
 685 
 686 void G1ConcurrentMark::clear_bitmap(G1CMBitMap* bitmap, WorkGang* workers, bool may_yield) {
 687   assert(may_yield || SafepointSynchronize::is_at_safepoint(), "Non-yielding bitmap clear only allowed at safepoint.");
 688 
 689   G1ClearBitMapTask task(bitmap, this, workers->active_workers(), may_yield);
 690   workers->run_task(&task);
 691   guarantee(!may_yield || task.is_complete(), "Must have completed iteration when not yielding.");
 692 }
 693 
 694 void G1ConcurrentMark::cleanup_for_next_mark() {
 695   // Make sure that the concurrent mark thread looks to still be in
 696   // the current cycle.
 697   guarantee(cmThread()->during_cycle(), "invariant");
 698 
 699   // We are finishing up the current cycle by clearing the next
 700   // marking bitmap and getting it ready for the next cycle. During
 701   // this time no other cycle can start. So, let's make sure that this
 702   // is the case.
 703   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 704 
 705   clear_bitmap(_nextMarkBitMap, _parallel_workers, true);
 706 
 707   // Clear the live count data. If the marking has been aborted, the abort()
 708   // call already did that.
 709   if (!has_aborted()) {
 710     clear_all_live_data(_parallel_workers);
 711     DEBUG_ONLY(verify_all_live_data());
 712   }
 713 
 714   // Repeat the asserts from above.
 715   guarantee(cmThread()->during_cycle(), "invariant");
 716   guarantee(!_g1h->collector_state()->mark_in_progress(), "invariant");
 717 }
 718 
 719 void G1ConcurrentMark::clear_prev_bitmap(WorkGang* workers) {
 720   assert(SafepointSynchronize::is_at_safepoint(), "Should only clear the entire prev bitmap at a safepoint.");
 721   clear_bitmap((G1CMBitMap*)_prevMarkBitMap, workers, false);
 722 }
 723 
 724 class CheckBitmapClearHRClosure : public HeapRegionClosure {
 725   G1CMBitMap* _bitmap;
 726   bool _error;
 727  public:
 728   CheckBitmapClearHRClosure(G1CMBitMap* bitmap) : _bitmap(bitmap) {
 729   }
 730 
 731   virtual bool doHeapRegion(HeapRegion* r) {
 732     // This closure can be called concurrently to the mutator, so we must make sure
 733     // that the result of the getNextMarkedWordAddress() call is compared to the
 734     // value passed to it as limit to detect any found bits.
 735     // end never changes in G1.
 736     HeapWord* end = r->end();
 737     return _bitmap->getNextMarkedWordAddress(r->bottom(), end) != end;
 738   }
 739 };
 740 
 741 bool G1ConcurrentMark::nextMarkBitmapIsClear() {
 742   CheckBitmapClearHRClosure cl(_nextMarkBitMap);
 743   _g1h->heap_region_iterate(&cl);
 744   return cl.complete();
 745 }
 746 
 747 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 748 public:
 749   bool doHeapRegion(HeapRegion* r) {
 750     r->note_start_of_marking();
 751     return false;
 752   }
 753 };
 754 
 755 void G1ConcurrentMark::checkpointRootsInitialPre() {
 756   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 757   G1CollectorPolicy* g1p = g1h->g1_policy();
 758 
 759   _has_aborted = false;
 760 
 761   // Initialize marking structures. This has to be done in a STW phase.
 762   reset();
 763 
 764   // For each region note start of marking.
 765   NoteStartOfMarkHRClosure startcl;
 766   g1h->heap_region_iterate(&startcl);
 767 }
 768 
 769 
 770 void G1ConcurrentMark::checkpointRootsInitialPost() {
 771   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 772 
 773   // Start Concurrent Marking weak-reference discovery.
 774   ReferenceProcessor* rp = g1h->ref_processor_cm();
 775   // enable ("weak") refs discovery
 776   rp->enable_discovery();
 777   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 778 
 779   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 780   // This is the start of  the marking cycle, we're expected all
 781   // threads to have SATB queues with active set to false.
 782   satb_mq_set.set_active_all_threads(true, /* new active value */
 783                                      false /* expected_active */);
 784 
 785   _root_regions.prepare_for_scan();
 786 
 787   // update_g1_committed() will be called at the end of an evac pause
 788   // when marking is on. So, it's also called at the end of the
 789   // initial-mark pause to update the heap end, if the heap expands
 790   // during it. No need to call it here.
 791 }
 792 
 793 /*
 794  * Notice that in the next two methods, we actually leave the STS
 795  * during the barrier sync and join it immediately afterwards. If we
 796  * do not do this, the following deadlock can occur: one thread could
 797  * be in the barrier sync code, waiting for the other thread to also
 798  * sync up, whereas another one could be trying to yield, while also
 799  * waiting for the other threads to sync up too.
 800  *
 801  * Note, however, that this code is also used during remark and in
 802  * this case we should not attempt to leave / enter the STS, otherwise
 803  * we'll either hit an assert (debug / fastdebug) or deadlock
 804  * (product). So we should only leave / enter the STS if we are
 805  * operating concurrently.
 806  *
 807  * Because the thread that does the sync barrier has left the STS, it
 808  * is possible to be suspended for a Full GC or an evacuation pause
 809  * could occur. This is actually safe, since the entering the sync
 810  * barrier is one of the last things do_marking_step() does, and it
 811  * doesn't manipulate any data structures afterwards.
 812  */
 813 
 814 void G1ConcurrentMark::enter_first_sync_barrier(uint worker_id) {
 815   bool barrier_aborted;
 816   {
 817     SuspendibleThreadSetLeaver sts_leave(concurrent());
 818     barrier_aborted = !_first_overflow_barrier_sync.enter();
 819   }
 820 
 821   // at this point everyone should have synced up and not be doing any
 822   // more work
 823 
 824   if (barrier_aborted) {
 825     // If the barrier aborted we ignore the overflow condition and
 826     // just abort the whole marking phase as quickly as possible.
 827     return;
 828   }
 829 
 830   // If we're executing the concurrent phase of marking, reset the marking
 831   // state; otherwise the marking state is reset after reference processing,
 832   // during the remark pause.
 833   // If we reset here as a result of an overflow during the remark we will
 834   // see assertion failures from any subsequent set_concurrency_and_phase()
 835   // calls.
 836   if (concurrent()) {
 837     // let the task associated with with worker 0 do this
 838     if (worker_id == 0) {
 839       // task 0 is responsible for clearing the global data structures
 840       // We should be here because of an overflow. During STW we should
 841       // not clear the overflow flag since we rely on it being true when
 842       // we exit this method to abort the pause and restart concurrent
 843       // marking.
 844       reset_marking_state(true /* clear_overflow */);
 845 
 846       log_info(gc, marking)("Concurrent Mark reset for overflow");
 847     }
 848   }
 849 
 850   // after this, each task should reset its own data structures then
 851   // then go into the second barrier
 852 }
 853 
 854 void G1ConcurrentMark::enter_second_sync_barrier(uint worker_id) {
 855   SuspendibleThreadSetLeaver sts_leave(concurrent());
 856   _second_overflow_barrier_sync.enter();
 857 
 858   // at this point everything should be re-initialized and ready to go
 859 }
 860 
 861 class G1CMConcurrentMarkingTask: public AbstractGangTask {
 862 private:
 863   G1ConcurrentMark*     _cm;
 864   ConcurrentMarkThread* _cmt;
 865 
 866 public:
 867   void work(uint worker_id) {
 868     assert(Thread::current()->is_ConcurrentGC_thread(),
 869            "this should only be done by a conc GC thread");
 870     ResourceMark rm;
 871 
 872     double start_vtime = os::elapsedVTime();
 873 
 874     {
 875       SuspendibleThreadSetJoiner sts_join;
 876 
 877       assert(worker_id < _cm->active_tasks(), "invariant");
 878       G1CMTask* the_task = _cm->task(worker_id);
 879       the_task->record_start_time();
 880       if (!_cm->has_aborted()) {
 881         do {
 882           double start_vtime_sec = os::elapsedVTime();
 883           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 884 
 885           the_task->do_marking_step(mark_step_duration_ms,
 886                                     true  /* do_termination */,
 887                                     false /* is_serial*/);
 888 
 889           double end_vtime_sec = os::elapsedVTime();
 890           double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 891           _cm->clear_has_overflown();
 892 
 893           _cm->do_yield_check(worker_id);
 894 
 895           jlong sleep_time_ms;
 896           if (!_cm->has_aborted() && the_task->has_aborted()) {
 897             sleep_time_ms =
 898               (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 899             {
 900               SuspendibleThreadSetLeaver sts_leave;
 901               os::sleep(Thread::current(), sleep_time_ms, false);
 902             }
 903           }
 904         } while (!_cm->has_aborted() && the_task->has_aborted());
 905       }
 906       the_task->record_end_time();
 907       guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 908     }
 909 
 910     double end_vtime = os::elapsedVTime();
 911     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 912   }
 913 
 914   G1CMConcurrentMarkingTask(G1ConcurrentMark* cm,
 915                             ConcurrentMarkThread* cmt) :
 916       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 917 
 918   ~G1CMConcurrentMarkingTask() { }
 919 };
 920 
 921 // Calculates the number of active workers for a concurrent
 922 // phase.
 923 uint G1ConcurrentMark::calc_parallel_marking_threads() {
 924   uint n_conc_workers = 0;
 925   if (!UseDynamicNumberOfGCThreads ||
 926       (!FLAG_IS_DEFAULT(ConcGCThreads) &&
 927        !ForceDynamicNumberOfGCThreads)) {
 928     n_conc_workers = max_parallel_marking_threads();
 929   } else {
 930     n_conc_workers =
 931       AdaptiveSizePolicy::calc_default_active_workers(
 932                                    max_parallel_marking_threads(),
 933                                    1, /* Minimum workers */
 934                                    parallel_marking_threads(),
 935                                    Threads::number_of_non_daemon_threads());
 936     // Don't scale down "n_conc_workers" by scale_parallel_threads() because
 937     // that scaling has already gone into "_max_parallel_marking_threads".
 938   }
 939   assert(n_conc_workers > 0, "Always need at least 1");
 940   return n_conc_workers;
 941 }
 942 
 943 void G1ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
 944   // Currently, only survivors can be root regions.
 945   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
 946   G1RootRegionScanClosure cl(_g1h, this, worker_id);
 947 
 948   const uintx interval = PrefetchScanIntervalInBytes;
 949   HeapWord* curr = hr->bottom();
 950   const HeapWord* end = hr->top();
 951   while (curr < end) {
 952     Prefetch::read(curr, interval);
 953     oop obj = oop(curr);
 954     int size = obj->oop_iterate_size(&cl);
 955     assert(size == obj->size(), "sanity");
 956     curr += size;
 957   }
 958 }
 959 
 960 class G1CMRootRegionScanTask : public AbstractGangTask {
 961 private:
 962   G1ConcurrentMark* _cm;
 963 
 964 public:
 965   G1CMRootRegionScanTask(G1ConcurrentMark* cm) :
 966     AbstractGangTask("Root Region Scan"), _cm(cm) { }
 967 
 968   void work(uint worker_id) {
 969     assert(Thread::current()->is_ConcurrentGC_thread(),
 970            "this should only be done by a conc GC thread");
 971 
 972     G1CMRootRegions* root_regions = _cm->root_regions();
 973     HeapRegion* hr = root_regions->claim_next();
 974     while (hr != NULL) {
 975       _cm->scanRootRegion(hr, worker_id);
 976       hr = root_regions->claim_next();
 977     }
 978   }
 979 };
 980 
 981 void G1ConcurrentMark::scan_root_regions() {
 982   // scan_in_progress() will have been set to true only if there was
 983   // at least one root region to scan. So, if it's false, we
 984   // should not attempt to do any further work.
 985   if (root_regions()->scan_in_progress()) {
 986     assert(!has_aborted(), "Aborting before root region scanning is finished not supported.");
 987 
 988     _parallel_marking_threads = calc_parallel_marking_threads();
 989     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
 990            "Maximum number of marking threads exceeded");
 991     uint active_workers = MAX2(1U, parallel_marking_threads());
 992 
 993     G1CMRootRegionScanTask task(this);
 994     _parallel_workers->set_active_workers(active_workers);
 995     _parallel_workers->run_task(&task);
 996 
 997     // It's possible that has_aborted() is true here without actually
 998     // aborting the survivor scan earlier. This is OK as it's
 999     // mainly used for sanity checking.
1000     root_regions()->scan_finished();
1001   }
1002 }
1003 
1004 void G1ConcurrentMark::concurrent_cycle_start() {
1005   _gc_timer_cm->register_gc_start();
1006 
1007   _gc_tracer_cm->report_gc_start(GCCause::_no_gc /* first parameter is not used */, _gc_timer_cm->gc_start());
1008 
1009   _g1h->trace_heap_before_gc(_gc_tracer_cm);
1010 }
1011 
1012 void G1ConcurrentMark::concurrent_cycle_end() {
1013   _g1h->trace_heap_after_gc(_gc_tracer_cm);
1014 
1015   if (has_aborted()) {
1016     _gc_tracer_cm->report_concurrent_mode_failure();
1017   }
1018 
1019   _gc_timer_cm->register_gc_end();
1020 
1021   _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
1022 }
1023 
1024 void G1ConcurrentMark::mark_from_roots() {
1025   // we might be tempted to assert that:
1026   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1027   //        "inconsistent argument?");
1028   // However that wouldn't be right, because it's possible that
1029   // a safepoint is indeed in progress as a younger generation
1030   // stop-the-world GC happens even as we mark in this generation.
1031 
1032   _restart_for_overflow = false;
1033 
1034   // _g1h has _n_par_threads
1035   _parallel_marking_threads = calc_parallel_marking_threads();
1036   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1037     "Maximum number of marking threads exceeded");
1038 
1039   uint active_workers = MAX2(1U, parallel_marking_threads());
1040   assert(active_workers > 0, "Should have been set");
1041 
1042   // Parallel task terminator is set in "set_concurrency_and_phase()"
1043   set_concurrency_and_phase(active_workers, true /* concurrent */);
1044 
1045   G1CMConcurrentMarkingTask markingTask(this, cmThread());
1046   _parallel_workers->set_active_workers(active_workers);
1047   _parallel_workers->run_task(&markingTask);
1048   print_stats();
1049 }
1050 
1051 void G1ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1052   // world is stopped at this checkpoint
1053   assert(SafepointSynchronize::is_at_safepoint(),
1054          "world should be stopped");
1055 
1056   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1057 
1058   // If a full collection has happened, we shouldn't do this.
1059   if (has_aborted()) {
1060     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1061     return;
1062   }
1063 
1064   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1065 
1066   if (VerifyDuringGC) {
1067     HandleMark hm;  // handle scope
1068     g1h->prepare_for_verify();
1069     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1070   }
1071   g1h->verifier()->check_bitmaps("Remark Start");
1072 
1073   G1CollectorPolicy* g1p = g1h->g1_policy();
1074   g1p->record_concurrent_mark_remark_start();
1075 
1076   double start = os::elapsedTime();
1077 
1078   checkpointRootsFinalWork();
1079 
1080   double mark_work_end = os::elapsedTime();
1081 
1082   weakRefsWork(clear_all_soft_refs);
1083 
1084   if (has_overflown()) {
1085     // Oops.  We overflowed.  Restart concurrent marking.
1086     _restart_for_overflow = true;
1087 
1088     // Verify the heap w.r.t. the previous marking bitmap.
1089     if (VerifyDuringGC) {
1090       HandleMark hm;  // handle scope
1091       g1h->prepare_for_verify();
1092       Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (overflow)");
1093     }
1094 
1095     // Clear the marking state because we will be restarting
1096     // marking due to overflowing the global mark stack.
1097     reset_marking_state();
1098   } else {
1099     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1100     // We're done with marking.
1101     // This is the end of  the marking cycle, we're expected all
1102     // threads to have SATB queues with active set to true.
1103     satb_mq_set.set_active_all_threads(false, /* new active value */
1104                                        true /* expected_active */);
1105 
1106     if (VerifyDuringGC) {
1107       HandleMark hm;  // handle scope
1108       g1h->prepare_for_verify();
1109       Universe::verify(VerifyOption_G1UseNextMarking, "During GC (after)");
1110     }
1111     g1h->verifier()->check_bitmaps("Remark End");
1112     assert(!restart_for_overflow(), "sanity");
1113     // Completely reset the marking state since marking completed
1114     set_non_marking_state();
1115   }
1116 
1117   // Expand the marking stack, if we have to and if we can.
1118   if (_markStack.should_expand()) {
1119     _markStack.expand();
1120   }
1121 
1122   // Statistics
1123   double now = os::elapsedTime();
1124   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1125   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1126   _remark_times.add((now - start) * 1000.0);
1127 
1128   g1p->record_concurrent_mark_remark_end();
1129 
1130   G1CMIsAliveClosure is_alive(g1h);
1131   _gc_tracer_cm->report_object_count_after_gc(&is_alive);
1132 }
1133 
1134 // Base class of the closures that finalize and verify the
1135 // liveness count data.
1136 class G1LiveDataClosureBase: public HeapRegionClosure {
1137 protected:
1138   G1ConcurrentMark* _cm;
1139 
1140   BitMap* _region_bm;
1141   BitMap* _card_bm;
1142 
1143   // Takes a region that's not empty (i.e., it has at least one
1144   // live object in it and sets its corresponding bit on the region
1145   // bitmap to 1.
1146   void set_bit_for_region(HeapRegion* hr) {
1147     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1148     _region_bm->par_at_put(index, true);
1149   }
1150 
1151   // Utility routine to set an exclusive range of cards on the given
1152   // bitmap.
1153   inline void set_card_bitmap_range(BitMap* card_bm,
1154                                     BitMap::idx_t start_idx,
1155                                     BitMap::idx_t end_idx) {
1156 
1157     // Set the exclusive bit range [start_idx, end_idx).
1158     assert((end_idx - start_idx) > 0, "at least one card");
1159     assert(end_idx <= card_bm->size(), "sanity");
1160 
1161     // For small ranges use a simple loop; otherwise use set_range or
1162     // use par_at_put_range (if parallel). The range is made up of the
1163     // cards that are spanned by an object/mem region so 8 cards will
1164     // allow up to object sizes up to 4K to be handled using the loop.
1165     if ((end_idx - start_idx) <= 8) {
1166       for (BitMap::idx_t i = start_idx; i < end_idx; i += 1) {
1167         card_bm->set_bit(i);
1168       }
1169     } else {
1170       card_bm->set_range(start_idx, end_idx);
1171     }    
1172   }
1173 
1174   void mark_card_bitmap_range(HeapWord* start, HeapWord* end) {
1175     BitMap::idx_t start_idx = _cm->card_live_bitmap_index_for(start);
1176     BitMap::idx_t end_idx = _cm->card_live_bitmap_index_for((HeapWord*)align_ptr_up(end, CardTableModRefBS::card_size));
1177 
1178     assert((end_idx - start_idx) > 0, "Trying to mark zero sized range.");
1179     
1180     if (start_idx == _last_marked_bit_idx) {
1181       start_idx++;
1182     }
1183     if (start_idx == end_idx) {
1184       return;
1185     }
1186     
1187     // Set the bits in the card bitmap for the cards spanned by this object.
1188     set_card_bitmap_range(_card_bm, start_idx, end_idx);
1189     _last_marked_bit_idx = end_idx - 1;
1190   }
1191 
1192   // We cache the last mark set. This avoids setting the same bit multiple times.
1193   // This is particularly interesting for dense bitmaps, as this avoids doing
1194   // any work most of the time.
1195   BitMap::idx_t _last_marked_bit_idx;
1196 
1197   void reset_mark_cache() {
1198     _last_marked_bit_idx = (BitMap::idx_t)-1;
1199   }
1200 
1201   void mark_allocated_since_marking(HeapRegion* hr) {
1202     reset_mark_cache();
1203 
1204     HeapWord* ntams = hr->next_top_at_mark_start();
1205     HeapWord* top   = hr->top();
1206 
1207     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1208 
1209     // Mark the allocated-since-marking portion...
1210     if (ntams < top) {
1211       mark_card_bitmap_range(ntams, top);
1212       // This definitely means the region has live objects.
1213       set_bit_for_region(hr);
1214     }
1215   }
1216 
1217   bool mark_marked_during_marking(HeapRegion* hr, bool may_suspend, size_t* total_bytes_marked) {
1218     reset_mark_cache();
1219 
1220     size_t marked_bytes = 0;
1221 
1222     HeapWord* ntams = hr->next_top_at_mark_start();
1223     HeapWord* start = hr->bottom();
1224 
1225     if (ntams <= start) {
1226       // Empty region (at the start of marking). Nothing to do.
1227       // hr->add_to_marked_bytes(0);
1228       *total_bytes_marked = marked_bytes;
1229       return false;
1230     } else if (hr->is_starts_humongous()) {
1231       // Humongous object: distribute the marked bytes across the humongous object.
1232       do {
1233         mark_card_bitmap_range(start, hr->top());
1234 
1235         marked_bytes += pointer_delta(hr->top(), start, 1);
1236         hr->add_to_marked_bytes(marked_bytes);
1237 
1238         hr = G1CollectedHeap::heap()->next_region_in_humongous(hr);
1239       } while (hr != NULL);
1240       *total_bytes_marked = marked_bytes;
1241       return false;
1242     } else if (hr->is_continues_humongous()) {
1243       // Humongous continues regions were handled during processing of the start region.
1244       *total_bytes_marked = marked_bytes;
1245       return false;
1246     }
1247 
1248     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1249            "Preconditions not met - "
1250            "start: " PTR_FORMAT ", ntams: " PTR_FORMAT ", end: " PTR_FORMAT,
1251            p2i(start), p2i(ntams), p2i(hr->end()));
1252 
1253     G1CMBitMap* bitmap = _cm->nextMarkBitMap();
1254     // Find the first marked object at or after "start".
1255     start = bitmap->getNextMarkedWordAddress(start, ntams);
1256     while (start < ntams) {
1257       oop obj = oop(start);
1258       int obj_sz = obj->size();
1259       HeapWord* obj_end = start + obj_sz;
1260 
1261       assert(obj_end <= hr->end(), "Humongous objects must have been handled elsewhere.");
1262         
1263       mark_card_bitmap_range(start, obj_end);
1264 
1265       // Add the size of this object to the number of marked bytes.
1266       marked_bytes += (size_t)obj_sz * HeapWordSize;
1267 
1268       // Find the next marked object after this one.
1269       start = bitmap->getNextMarkedWordAddress(obj_end, ntams);
1270     }
1271 
1272     // Update the marked bytes for this region.
1273     hr->add_to_marked_bytes(marked_bytes);
1274     *total_bytes_marked = marked_bytes;
1275 
1276     // Abort iteration if after yielding the marking has been aborted.
1277     if (may_suspend && _cm->do_yield_check() && _cm->has_aborted()) {
1278        return true;
1279     }
1280     // Next heap region
1281     return false;
1282   }
1283 
1284 public:
1285   G1LiveDataClosureBase(G1CollectedHeap* g1h,
1286                         BitMap* region_bm,
1287                         BitMap* card_bm):
1288     _cm(g1h->concurrent_mark()),
1289     _region_bm(region_bm),
1290     _card_bm(card_bm) { }
1291 };
1292 
1293 // Heap region closure used for verifying the live count data
1294 // that was created concurrently and finalized during
1295 // the remark pause. This closure is applied to the heap
1296 // regions during the STW cleanup pause.
1297 class G1VerifyLiveDataHRClosure: public HeapRegionClosure {
1298   // Calculates the # live objects per region.
1299   class G1VerifyLiveDataClosure: public G1LiveDataClosureBase {
1300     size_t _region_marked_bytes;
1301 
1302   public:
1303     G1VerifyLiveDataClosure(G1CollectedHeap* g1h,
1304                             BitMap* region_bm,
1305                             BitMap* card_bm) :
1306       G1LiveDataClosureBase(g1h, region_bm, card_bm),
1307       _region_marked_bytes(0) { }
1308 
1309     bool doHeapRegion(HeapRegion* hr) {
1310       mark_marked_during_marking(hr, false, &_region_marked_bytes);
1311       mark_allocated_since_marking(hr);
1312       return false;
1313     }
1314 
1315     size_t region_marked_bytes() const { return _region_marked_bytes; }
1316   };
1317 
1318   G1CollectedHeap* _g1h;
1319   G1ConcurrentMark* _cm;
1320   G1VerifyLiveDataClosure _calc_cl;
1321   BitMap* _act_region_bm;   // Region BM to be verified
1322   BitMap* _act_card_bm;     // Card BM to be verified
1323 
1324   BitMap* _exp_region_bm; // Expected Region BM values
1325   BitMap* _exp_card_bm;   // Expected card BM values
1326 
1327   int _failures;
1328 
1329 public:
1330   G1VerifyLiveDataHRClosure(G1CollectedHeap* g1h,
1331                             BitMap* act_region_bm,
1332                             BitMap* act_card_bm,
1333                             BitMap* exp_region_bm,
1334                             BitMap* exp_card_bm) :
1335     _g1h(g1h),
1336     _cm(g1h->concurrent_mark()),
1337     _calc_cl(g1h, exp_region_bm, exp_card_bm),
1338     _act_region_bm(act_region_bm),
1339     _act_card_bm(act_card_bm),
1340     _exp_region_bm(exp_region_bm),
1341     _exp_card_bm(exp_card_bm),
1342     _failures(0) { }
1343 
1344   int failures() const { return _failures; }
1345 
1346   bool doHeapRegion(HeapRegion* hr) {
1347     int failures = 0;
1348 
1349     // Call the G1VerifyLiveDataClosure to walk the marking bitmap for
1350     // this region and set the corresponding bits in the expected region
1351     // and card bitmaps.
1352     bool res = _calc_cl.doHeapRegion(hr);
1353     assert(!res, "Should be completed.");
1354 
1355     // Verify the marked bytes for this region.
1356     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1357     size_t act_marked_bytes = hr->next_marked_bytes();
1358 
1359     if (exp_marked_bytes > act_marked_bytes) {
1360       if (hr->is_starts_humongous()) {
1361         // For start_humongous regions, the size of the whole object will be
1362         // in exp_marked_bytes.
1363         HeapRegion* region = hr;
1364         int num_regions;
1365         for (num_regions = 0; region != NULL; num_regions++) {
1366           region = _g1h->next_region_in_humongous(region);
1367         }
1368         if ((num_regions-1) * HeapRegion::GrainBytes >= exp_marked_bytes) {
1369           failures += 1;
1370         } else if (num_regions * HeapRegion::GrainBytes < exp_marked_bytes) {
1371           failures += 1;
1372         }
1373       } else {
1374         // We're not OK if expected marked bytes > actual marked bytes. It means
1375         // we have missed accounting some objects during the actual marking.
1376         failures += 1;
1377       }
1378     }
1379 
1380     // Verify the bit, for this region, in the actual and expected
1381     // (which was just calculated) region bit maps.
1382     // We're not OK if the bit in the calculated expected region
1383     // bitmap is set and the bit in the actual region bitmap is not.
1384     BitMap::idx_t index = (BitMap::idx_t) hr->hrm_index();
1385 
1386     bool expected = _exp_region_bm->at(index);
1387     bool actual = _act_region_bm->at(index);
1388     if (expected && !actual) {
1389       failures += 1;
1390     }
1391 
1392     // Verify that the card bit maps for the cards spanned by the current
1393     // region match. We have an error if we have a set bit in the expected
1394     // bit map and the corresponding bit in the actual bitmap is not set.
1395 
1396     BitMap::idx_t start_idx = _cm->card_live_bitmap_index_for(hr->bottom());
1397     BitMap::idx_t end_idx = _cm->card_live_bitmap_index_for(hr->top());
1398 
1399     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1400       expected = _exp_card_bm->at(i);
1401       actual = _act_card_bm->at(i);
1402 
1403       if (expected && !actual) {
1404         failures += 1;
1405       }
1406     }
1407 
1408     _failures += failures;
1409 
1410     // We could stop iteration over the heap when we
1411     // find the first violating region by returning true.
1412     return false;
1413   }
1414 };
1415 
1416 class G1VerifyLiveDataTask: public AbstractGangTask {
1417 protected:
1418   G1CollectedHeap* _g1h;
1419   BitMap* _actual_region_bm;
1420   BitMap* _actual_card_bm;
1421 
1422   BitMap _expected_region_bm;
1423   BitMap _expected_card_bm;
1424 
1425   int  _failures;
1426 
1427   HeapRegionClaimer _hr_claimer;
1428 
1429 public:
1430   G1VerifyLiveDataTask(G1CollectedHeap* g1h,
1431                        BitMap* region_bm,
1432                        BitMap* card_bm,
1433                        uint n_workers)
1434   : AbstractGangTask("G1 verify final counting"),
1435     _g1h(g1h),
1436     _actual_region_bm(region_bm),
1437     _actual_card_bm(card_bm),
1438     _expected_region_bm(region_bm->size(), true /* in_resource_area */),
1439     _expected_card_bm(card_bm->size(), true /* in_resource_area */),
1440     _failures(0),
1441     _hr_claimer(n_workers) {
1442     assert(VerifyDuringGC, "don't call this otherwise");
1443   }
1444 
1445   void work(uint worker_id) {
1446     G1VerifyLiveDataHRClosure cl(_g1h,
1447                                  _actual_region_bm,
1448                                  _actual_card_bm,
1449                                  &_expected_region_bm,
1450                                  &_expected_card_bm);
1451     _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
1452 
1453     Atomic::add(cl.failures(), &_failures);
1454   }
1455 
1456   int failures() const { return _failures; }
1457 };
1458 
1459 class G1FinalizeLiveDataTask: public AbstractGangTask {
1460   // Finalizes the liveness counting data.
1461   // Sets the bits corresponding to the interval [NTAMS, top]
1462   // (which contains the implicitly live objects) in the
1463   // card liveness bitmap. Also sets the bit for each region
1464   // containing live data, in the region liveness bitmap.
1465   class G1FinalizeCountDataClosure: public G1LiveDataClosureBase {
1466   public:
1467     G1FinalizeCountDataClosure(G1CollectedHeap* g1h,
1468                                BitMap* region_bm,
1469                                BitMap* card_bm) :
1470       G1LiveDataClosureBase(g1h, region_bm, card_bm) { }
1471 
1472     bool doHeapRegion(HeapRegion* hr) {
1473       mark_allocated_since_marking(hr);
1474       // Set the bit for the region if it contains live data
1475       if (hr->next_marked_bytes() > 0) {
1476         set_bit_for_region(hr);
1477       }
1478 
1479       return false;
1480     }
1481   };
1482 
1483   G1CollectedHeap* _g1h;
1484   BitMap* _actual_region_bm;
1485   BitMap* _actual_card_bm;
1486 
1487   HeapRegionClaimer _hr_claimer;
1488 
1489 public:
1490   G1FinalizeLiveDataTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm, uint n_workers) :
1491     AbstractGangTask("G1 final counting"),
1492     _g1h(g1h),
1493     _actual_region_bm(region_bm),
1494     _actual_card_bm(card_bm),
1495     _hr_claimer(n_workers) {
1496   }
1497 
1498   void work(uint worker_id) {
1499     G1FinalizeCountDataClosure cl(_g1h,
1500                                   _actual_region_bm,
1501                                   _actual_card_bm);
1502 
1503     _g1h->heap_region_par_iterate(&cl, worker_id, &_hr_claimer);
1504   }
1505 };
1506 
1507 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1508   G1CollectedHeap* _g1;
1509   size_t _freed_bytes;
1510   FreeRegionList* _local_cleanup_list;
1511   uint _old_regions_removed;
1512   uint _humongous_regions_removed;
1513   HRRSCleanupTask* _hrrs_cleanup_task;
1514 
1515 public:
1516   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1517                              FreeRegionList* local_cleanup_list,
1518                              HRRSCleanupTask* hrrs_cleanup_task) :
1519     _g1(g1),
1520     _freed_bytes(0),
1521     _local_cleanup_list(local_cleanup_list),
1522     _old_regions_removed(0),
1523     _humongous_regions_removed(0),
1524     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1525 
1526   size_t freed_bytes() { return _freed_bytes; }
1527   const uint old_regions_removed() { return _old_regions_removed; }
1528   const uint humongous_regions_removed() { return _humongous_regions_removed; }
1529 
1530   bool doHeapRegion(HeapRegion *hr) {
1531     if (hr->is_archive()) {
1532       return false;
1533     }
1534     // We use a claim value of zero here because all regions
1535     // were claimed with value 1 in the FinalCount task.
1536     _g1->reset_gc_time_stamps(hr);
1537     hr->note_end_of_marking();
1538 
1539     if (hr->used() > 0 && hr->max_live_bytes() == 0 && !hr->is_young()) {
1540       _freed_bytes += hr->used();
1541       hr->set_containing_set(NULL);
1542       if (hr->is_humongous()) {
1543         _humongous_regions_removed++;
1544         _g1->free_humongous_region(hr, _local_cleanup_list, true);
1545       } else {
1546         _old_regions_removed++;
1547         _g1->free_region(hr, _local_cleanup_list, true);
1548       }
1549     } else {
1550       hr->rem_set()->do_cleanup_work(_hrrs_cleanup_task);
1551     }
1552 
1553     return false;
1554   }
1555 };
1556 
1557 class G1ParNoteEndTask: public AbstractGangTask {
1558   friend class G1NoteEndOfConcMarkClosure;
1559 
1560 protected:
1561   G1CollectedHeap* _g1h;
1562   FreeRegionList* _cleanup_list;
1563   HeapRegionClaimer _hrclaimer;
1564 
1565 public:
1566   G1ParNoteEndTask(G1CollectedHeap* g1h, FreeRegionList* cleanup_list, uint n_workers) :
1567       AbstractGangTask("G1 note end"), _g1h(g1h), _cleanup_list(cleanup_list), _hrclaimer(n_workers) {
1568   }
1569 
1570   void work(uint worker_id) {
1571     FreeRegionList local_cleanup_list("Local Cleanup List");
1572     HRRSCleanupTask hrrs_cleanup_task;
1573     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, &local_cleanup_list,
1574                                            &hrrs_cleanup_task);
1575     _g1h->heap_region_par_iterate(&g1_note_end, worker_id, &_hrclaimer);
1576     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1577 
1578     // Now update the lists
1579     _g1h->remove_from_old_sets(g1_note_end.old_regions_removed(), g1_note_end.humongous_regions_removed());
1580     {
1581       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1582       _g1h->decrement_summary_bytes(g1_note_end.freed_bytes());
1583 
1584       // If we iterate over the global cleanup list at the end of
1585       // cleanup to do this printing we will not guarantee to only
1586       // generate output for the newly-reclaimed regions (the list
1587       // might not be empty at the beginning of cleanup; we might
1588       // still be working on its previous contents). So we do the
1589       // printing here, before we append the new regions to the global
1590       // cleanup list.
1591 
1592       G1HRPrinter* hr_printer = _g1h->hr_printer();
1593       if (hr_printer->is_active()) {
1594         FreeRegionListIterator iter(&local_cleanup_list);
1595         while (iter.more_available()) {
1596           HeapRegion* hr = iter.get_next();
1597           hr_printer->cleanup(hr);
1598         }
1599       }
1600 
1601       _cleanup_list->add_ordered(&local_cleanup_list);
1602       assert(local_cleanup_list.is_empty(), "post-condition");
1603 
1604       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1605     }
1606   }
1607 };
1608 
1609 void G1ConcurrentMark::cleanup() {
1610   // world is stopped at this checkpoint
1611   assert(SafepointSynchronize::is_at_safepoint(),
1612          "world should be stopped");
1613   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1614 
1615   // If a full collection has happened, we shouldn't do this.
1616   if (has_aborted()) {
1617     g1h->collector_state()->set_mark_in_progress(false); // So bitmap clearing isn't confused
1618     return;
1619   }
1620 
1621   g1h->verifier()->verify_region_sets_optional();
1622 
1623   if (VerifyDuringGC) {
1624     HandleMark hm;  // handle scope
1625     g1h->prepare_for_verify();
1626     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (before)");
1627   }
1628   g1h->verifier()->check_bitmaps("Cleanup Start");
1629 
1630   G1CollectorPolicy* g1p = g1h->g1_policy();
1631   g1p->record_concurrent_mark_cleanup_start();
1632 
1633   double start = os::elapsedTime();
1634 
1635   HeapRegionRemSet::reset_for_cleanup_tasks();
1636 
1637   {
1638     // Finalize the live data.
1639     G1FinalizeLiveDataTask cl(g1h,
1640                               &_region_live_bm,
1641                               &_card_live_bm,
1642                               g1h->workers()->active_workers());
1643     g1h->workers()->run_task(&cl);
1644   }
1645 
1646   if (VerifyDuringGC) {
1647     // Verify that the liveness count data created concurrently matches one created
1648     // during this safepoint.
1649     ResourceMark rm;
1650     G1VerifyLiveDataTask cl(g1h,
1651                             &_region_live_bm,
1652                             &_card_live_bm,
1653                             g1h->workers()->active_workers());
1654     g1h->workers()->run_task(&cl);
1655 
1656     guarantee(cl.failures() == 0, "Unexpected accounting failures");
1657   }
1658 
1659   g1h->collector_state()->set_mark_in_progress(false);
1660 
1661   double count_end = os::elapsedTime();
1662   double this_final_counting_time = (count_end - start);
1663   _total_counting_time += this_final_counting_time;
1664 
1665   if (log_is_enabled(Trace, gc, liveness)) {
1666     G1PrintRegionLivenessInfoClosure cl("Post-Marking");
1667     _g1h->heap_region_iterate(&cl);
1668   }
1669 
1670   // Install newly created mark bitMap as "prev".
1671   swapMarkBitMaps();
1672 
1673   g1h->reset_gc_time_stamp();
1674 
1675   uint n_workers = _g1h->workers()->active_workers();
1676 
1677   // Note end of marking in all heap regions.
1678   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
1679   g1h->workers()->run_task(&g1_par_note_end_task);
1680   g1h->check_gc_time_stamps();
1681 
1682   if (!cleanup_list_is_empty()) {
1683     // The cleanup list is not empty, so we'll have to process it
1684     // concurrently. Notify anyone else that might be wanting free
1685     // regions that there will be more free regions coming soon.
1686     g1h->set_free_regions_coming();
1687   }
1688 
1689   // call below, since it affects the metric by which we sort the heap
1690   // regions.
1691   if (G1ScrubRemSets) {
1692     double rs_scrub_start = os::elapsedTime();
1693     g1h->scrub_rem_set(&_region_live_bm, &_card_live_bm);
1694     _total_rs_scrub_time += (os::elapsedTime() - rs_scrub_start);
1695   }
1696 
1697   // this will also free any regions totally full of garbage objects,
1698   // and sort the regions.
1699   g1h->g1_policy()->record_concurrent_mark_cleanup_end();
1700 
1701   // Statistics.
1702   double end = os::elapsedTime();
1703   _cleanup_times.add((end - start) * 1000.0);
1704 
1705   // Clean up will have freed any regions completely full of garbage.
1706   // Update the soft reference policy with the new heap occupancy.
1707   Universe::update_heap_info_at_gc();
1708 
1709   if (VerifyDuringGC) {
1710     HandleMark hm;  // handle scope
1711     g1h->prepare_for_verify();
1712     Universe::verify(VerifyOption_G1UsePrevMarking, "During GC (after)");
1713   }
1714 
1715   g1h->verifier()->check_bitmaps("Cleanup End");
1716 
1717   g1h->verifier()->verify_region_sets_optional();
1718 
1719   // We need to make this be a "collection" so any collection pause that
1720   // races with it goes around and waits for completeCleanup to finish.
1721   g1h->increment_total_collections();
1722 
1723   // Clean out dead classes and update Metaspace sizes.
1724   if (ClassUnloadingWithConcurrentMark) {
1725     ClassLoaderDataGraph::purge();
1726   }
1727   MetaspaceGC::compute_new_size();
1728 
1729   // We reclaimed old regions so we should calculate the sizes to make
1730   // sure we update the old gen/space data.
1731   g1h->g1mm()->update_sizes();
1732   g1h->allocation_context_stats().update_after_mark();
1733 }
1734 
1735 void G1ConcurrentMark::complete_cleanup() {
1736   if (has_aborted()) return;
1737 
1738   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1739 
1740   _cleanup_list.verify_optional();
1741   FreeRegionList tmp_free_list("Tmp Free List");
1742 
1743   log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1744                                   "cleanup list has %u entries",
1745                                   _cleanup_list.length());
1746 
1747   // No one else should be accessing the _cleanup_list at this point,
1748   // so it is not necessary to take any locks
1749   while (!_cleanup_list.is_empty()) {
1750     HeapRegion* hr = _cleanup_list.remove_region(true /* from_head */);
1751     assert(hr != NULL, "Got NULL from a non-empty list");
1752     hr->par_clear();
1753     tmp_free_list.add_ordered(hr);
1754 
1755     // Instead of adding one region at a time to the secondary_free_list,
1756     // we accumulate them in the local list and move them a few at a
1757     // time. This also cuts down on the number of notify_all() calls
1758     // we do during this process. We'll also append the local list when
1759     // _cleanup_list is empty (which means we just removed the last
1760     // region from the _cleanup_list).
1761     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1762         _cleanup_list.is_empty()) {
1763       log_develop_trace(gc, freelist)("G1ConcRegionFreeing [complete cleanup] : "
1764                                       "appending %u entries to the secondary_free_list, "
1765                                       "cleanup list still has %u entries",
1766                                       tmp_free_list.length(),
1767                                       _cleanup_list.length());
1768 
1769       {
1770         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1771         g1h->secondary_free_list_add(&tmp_free_list);
1772         SecondaryFreeList_lock->notify_all();
1773       }
1774 #ifndef PRODUCT
1775       if (G1StressConcRegionFreeing) {
1776         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1777           os::sleep(Thread::current(), (jlong) 1, false);
1778         }
1779       }
1780 #endif
1781     }
1782   }
1783   assert(tmp_free_list.is_empty(), "post-condition");
1784 }
1785 
1786 // Supporting Object and Oop closures for reference discovery
1787 // and processing in during marking
1788 
1789 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1790   HeapWord* addr = (HeapWord*)obj;
1791   return addr != NULL &&
1792          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1793 }
1794 
1795 // 'Keep Alive' oop closure used by both serial parallel reference processing.
1796 // Uses the G1CMTask associated with a worker thread (for serial reference
1797 // processing the G1CMTask for worker 0 is used) to preserve (mark) and
1798 // trace referent objects.
1799 //
1800 // Using the G1CMTask and embedded local queues avoids having the worker
1801 // threads operating on the global mark stack. This reduces the risk
1802 // of overflowing the stack - which we would rather avoid at this late
1803 // state. Also using the tasks' local queues removes the potential
1804 // of the workers interfering with each other that could occur if
1805 // operating on the global stack.
1806 
1807 class G1CMKeepAliveAndDrainClosure: public OopClosure {
1808   G1ConcurrentMark* _cm;
1809   G1CMTask*         _task;
1810   int               _ref_counter_limit;
1811   int               _ref_counter;
1812   bool              _is_serial;
1813  public:
1814   G1CMKeepAliveAndDrainClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1815     _cm(cm), _task(task), _is_serial(is_serial),
1816     _ref_counter_limit(G1RefProcDrainInterval) {
1817     assert(_ref_counter_limit > 0, "sanity");
1818     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1819     _ref_counter = _ref_counter_limit;
1820   }
1821 
1822   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1823   virtual void do_oop(      oop* p) { do_oop_work(p); }
1824 
1825   template <class T> void do_oop_work(T* p) {
1826     if (!_cm->has_overflown()) {
1827       oop obj = oopDesc::load_decode_heap_oop(p);
1828       _task->deal_with_reference(obj);
1829       _ref_counter--;
1830 
1831       if (_ref_counter == 0) {
1832         // We have dealt with _ref_counter_limit references, pushing them
1833         // and objects reachable from them on to the local stack (and
1834         // possibly the global stack). Call G1CMTask::do_marking_step() to
1835         // process these entries.
1836         //
1837         // We call G1CMTask::do_marking_step() in a loop, which we'll exit if
1838         // there's nothing more to do (i.e. we're done with the entries that
1839         // were pushed as a result of the G1CMTask::deal_with_reference() calls
1840         // above) or we overflow.
1841         //
1842         // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1843         // flag while there may still be some work to do. (See the comment at
1844         // the beginning of G1CMTask::do_marking_step() for those conditions -
1845         // one of which is reaching the specified time target.) It is only
1846         // when G1CMTask::do_marking_step() returns without setting the
1847         // has_aborted() flag that the marking step has completed.
1848         do {
1849           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1850           _task->do_marking_step(mark_step_duration_ms,
1851                                  false      /* do_termination */,
1852                                  _is_serial);
1853         } while (_task->has_aborted() && !_cm->has_overflown());
1854         _ref_counter = _ref_counter_limit;
1855       }
1856     }
1857   }
1858 };
1859 
1860 // 'Drain' oop closure used by both serial and parallel reference processing.
1861 // Uses the G1CMTask associated with a given worker thread (for serial
1862 // reference processing the G1CMtask for worker 0 is used). Calls the
1863 // do_marking_step routine, with an unbelievably large timeout value,
1864 // to drain the marking data structures of the remaining entries
1865 // added by the 'keep alive' oop closure above.
1866 
1867 class G1CMDrainMarkingStackClosure: public VoidClosure {
1868   G1ConcurrentMark* _cm;
1869   G1CMTask*         _task;
1870   bool              _is_serial;
1871  public:
1872   G1CMDrainMarkingStackClosure(G1ConcurrentMark* cm, G1CMTask* task, bool is_serial) :
1873     _cm(cm), _task(task), _is_serial(is_serial) {
1874     assert(!_is_serial || _task->worker_id() == 0, "only task 0 for serial code");
1875   }
1876 
1877   void do_void() {
1878     do {
1879       // We call G1CMTask::do_marking_step() to completely drain the local
1880       // and global marking stacks of entries pushed by the 'keep alive'
1881       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
1882       //
1883       // G1CMTask::do_marking_step() is called in a loop, which we'll exit
1884       // if there's nothing more to do (i.e. we've completely drained the
1885       // entries that were pushed as a a result of applying the 'keep alive'
1886       // closure to the entries on the discovered ref lists) or we overflow
1887       // the global marking stack.
1888       //
1889       // Note: G1CMTask::do_marking_step() can set the G1CMTask::has_aborted()
1890       // flag while there may still be some work to do. (See the comment at
1891       // the beginning of G1CMTask::do_marking_step() for those conditions -
1892       // one of which is reaching the specified time target.) It is only
1893       // when G1CMTask::do_marking_step() returns without setting the
1894       // has_aborted() flag that the marking step has completed.
1895 
1896       _task->do_marking_step(1000000000.0 /* something very large */,
1897                              true         /* do_termination */,
1898                              _is_serial);
1899     } while (_task->has_aborted() && !_cm->has_overflown());
1900   }
1901 };
1902 
1903 // Implementation of AbstractRefProcTaskExecutor for parallel
1904 // reference processing at the end of G1 concurrent marking
1905 
1906 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
1907 private:
1908   G1CollectedHeap*  _g1h;
1909   G1ConcurrentMark* _cm;
1910   WorkGang*         _workers;
1911   uint              _active_workers;
1912 
1913 public:
1914   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
1915                           G1ConcurrentMark* cm,
1916                           WorkGang* workers,
1917                           uint n_workers) :
1918     _g1h(g1h), _cm(cm),
1919     _workers(workers), _active_workers(n_workers) { }
1920 
1921   // Executes the given task using concurrent marking worker threads.
1922   virtual void execute(ProcessTask& task);
1923   virtual void execute(EnqueueTask& task);
1924 };
1925 
1926 class G1CMRefProcTaskProxy: public AbstractGangTask {
1927   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
1928   ProcessTask&      _proc_task;
1929   G1CollectedHeap*  _g1h;
1930   G1ConcurrentMark* _cm;
1931 
1932 public:
1933   G1CMRefProcTaskProxy(ProcessTask& proc_task,
1934                        G1CollectedHeap* g1h,
1935                        G1ConcurrentMark* cm) :
1936     AbstractGangTask("Process reference objects in parallel"),
1937     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
1938     ReferenceProcessor* rp = _g1h->ref_processor_cm();
1939     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
1940   }
1941 
1942   virtual void work(uint worker_id) {
1943     ResourceMark rm;
1944     HandleMark hm;
1945     G1CMTask* task = _cm->task(worker_id);
1946     G1CMIsAliveClosure g1_is_alive(_g1h);
1947     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
1948     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
1949 
1950     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
1951   }
1952 };
1953 
1954 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
1955   assert(_workers != NULL, "Need parallel worker threads.");
1956   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1957 
1958   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
1959 
1960   // We need to reset the concurrency level before each
1961   // proxy task execution, so that the termination protocol
1962   // and overflow handling in G1CMTask::do_marking_step() knows
1963   // how many workers to wait for.
1964   _cm->set_concurrency(_active_workers);
1965   _workers->run_task(&proc_task_proxy);
1966 }
1967 
1968 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
1969   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
1970   EnqueueTask& _enq_task;
1971 
1972 public:
1973   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
1974     AbstractGangTask("Enqueue reference objects in parallel"),
1975     _enq_task(enq_task) { }
1976 
1977   virtual void work(uint worker_id) {
1978     _enq_task.work(worker_id);
1979   }
1980 };
1981 
1982 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
1983   assert(_workers != NULL, "Need parallel worker threads.");
1984   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
1985 
1986   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
1987 
1988   // Not strictly necessary but...
1989   //
1990   // We need to reset the concurrency level before each
1991   // proxy task execution, so that the termination protocol
1992   // and overflow handling in G1CMTask::do_marking_step() knows
1993   // how many workers to wait for.
1994   _cm->set_concurrency(_active_workers);
1995   _workers->run_task(&enq_task_proxy);
1996 }
1997 
1998 void G1ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
1999   G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
2000 }
2001 
2002 void G1ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2003   if (has_overflown()) {
2004     // Skip processing the discovered references if we have
2005     // overflown the global marking stack. Reference objects
2006     // only get discovered once so it is OK to not
2007     // de-populate the discovered reference lists. We could have,
2008     // but the only benefit would be that, when marking restarts,
2009     // less reference objects are discovered.
2010     return;
2011   }
2012 
2013   ResourceMark rm;
2014   HandleMark   hm;
2015 
2016   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2017 
2018   // Is alive closure.
2019   G1CMIsAliveClosure g1_is_alive(g1h);
2020 
2021   // Inner scope to exclude the cleaning of the string and symbol
2022   // tables from the displayed time.
2023   {
2024     GCTraceTime(Debug, gc) trace("Reference Processing", _gc_timer_cm);
2025 
2026     ReferenceProcessor* rp = g1h->ref_processor_cm();
2027 
2028     // See the comment in G1CollectedHeap::ref_processing_init()
2029     // about how reference processing currently works in G1.
2030 
2031     // Set the soft reference policy
2032     rp->setup_policy(clear_all_soft_refs);
2033     assert(_markStack.isEmpty(), "mark stack should be empty");
2034 
2035     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2036     // in serial reference processing. Note these closures are also
2037     // used for serially processing (by the the current thread) the
2038     // JNI references during parallel reference processing.
2039     //
2040     // These closures do not need to synchronize with the worker
2041     // threads involved in parallel reference processing as these
2042     // instances are executed serially by the current thread (e.g.
2043     // reference processing is not multi-threaded and is thus
2044     // performed by the current thread instead of a gang worker).
2045     //
2046     // The gang tasks involved in parallel reference processing create
2047     // their own instances of these closures, which do their own
2048     // synchronization among themselves.
2049     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2050     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2051 
2052     // We need at least one active thread. If reference processing
2053     // is not multi-threaded we use the current (VMThread) thread,
2054     // otherwise we use the work gang from the G1CollectedHeap and
2055     // we utilize all the worker threads we can.
2056     bool processing_is_mt = rp->processing_is_mt();
2057     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2058     active_workers = MAX2(MIN2(active_workers, _max_worker_id), 1U);
2059 
2060     // Parallel processing task executor.
2061     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2062                                               g1h->workers(), active_workers);
2063     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2064 
2065     // Set the concurrency level. The phase was already set prior to
2066     // executing the remark task.
2067     set_concurrency(active_workers);
2068 
2069     // Set the degree of MT processing here.  If the discovery was done MT,
2070     // the number of threads involved during discovery could differ from
2071     // the number of active workers.  This is OK as long as the discovered
2072     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2073     rp->set_active_mt_degree(active_workers);
2074 
2075     // Process the weak references.
2076     const ReferenceProcessorStats& stats =
2077         rp->process_discovered_references(&g1_is_alive,
2078                                           &g1_keep_alive,
2079                                           &g1_drain_mark_stack,
2080                                           executor,
2081                                           _gc_timer_cm);
2082     _gc_tracer_cm->report_gc_reference_stats(stats);
2083 
2084     // The do_oop work routines of the keep_alive and drain_marking_stack
2085     // oop closures will set the has_overflown flag if we overflow the
2086     // global marking stack.
2087 
2088     assert(_markStack.overflow() || _markStack.isEmpty(),
2089             "mark stack should be empty (unless it overflowed)");
2090 
2091     if (_markStack.overflow()) {
2092       // This should have been done already when we tried to push an
2093       // entry on to the global mark stack. But let's do it again.
2094       set_has_overflown();
2095     }
2096 
2097     assert(rp->num_q() == active_workers, "why not");
2098 
2099     rp->enqueue_discovered_references(executor);
2100 
2101     rp->verify_no_references_recorded();
2102     assert(!rp->discovery_enabled(), "Post condition");
2103   }
2104 
2105   if (has_overflown()) {
2106     // We can not trust g1_is_alive if the marking stack overflowed
2107     return;
2108   }
2109 
2110   assert(_markStack.isEmpty(), "Marking should have completed");
2111 
2112   // Unload Klasses, String, Symbols, Code Cache, etc.
2113   {
2114     GCTraceTime(Debug, gc) trace("Unloading", _gc_timer_cm);
2115 
2116     if (ClassUnloadingWithConcurrentMark) {
2117       bool purged_classes;
2118 
2119       {
2120         GCTraceTime(Trace, gc) trace("System Dictionary Unloading", _gc_timer_cm);
2121         purged_classes = SystemDictionary::do_unloading(&g1_is_alive, false /* Defer klass cleaning */);
2122       }
2123 
2124       {
2125         GCTraceTime(Trace, gc) trace("Parallel Unloading", _gc_timer_cm);
2126         weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
2127       }
2128     }
2129 
2130     if (G1StringDedup::is_enabled()) {
2131       GCTraceTime(Trace, gc) trace("String Deduplication Unlink", _gc_timer_cm);
2132       G1StringDedup::unlink(&g1_is_alive);
2133     }
2134   }
2135 }
2136 
2137 void G1ConcurrentMark::swapMarkBitMaps() {
2138   G1CMBitMapRO* temp = _prevMarkBitMap;
2139   _prevMarkBitMap    = (G1CMBitMapRO*)_nextMarkBitMap;
2140   _nextMarkBitMap    = (G1CMBitMap*)  temp;
2141 }
2142 
2143 BitMap G1ConcurrentMark::allocate_large_bitmap(BitMap::idx_t size_in_bits) {
2144   size_t size_in_words = BitMap::size_in_words(size_in_bits);
2145 
2146   BitMap::bm_word_t* map = MmapArrayAllocator<BitMap::bm_word_t, mtGC>::allocate(size_in_words);
2147   
2148   return BitMap(map, size_in_bits);
2149 }
2150 
2151 void G1ConcurrentMark::allocate_internal_bitmaps() {
2152   double start_time = os::elapsedTime();
2153 
2154   _region_live_bm = allocate_large_bitmap(_g1h->max_regions());
2155 
2156   guarantee(_g1h->max_capacity() % CardTableModRefBS::card_size == 0,
2157             "Heap capacity must be aligned to card size.");
2158   _card_live_bm = allocate_large_bitmap(_g1h->max_capacity() / CardTableModRefBS::card_size);
2159 
2160   log_debug(gc, marking)("Allocating internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time);
2161 }
2162 
2163 void G1ConcurrentMark::pretouch_internal_bitmaps() {
2164   double start_time = os::elapsedTime();
2165 
2166   _region_live_bm.pretouch();
2167   _card_live_bm.pretouch();
2168   
2169   log_debug(gc, marking)("Pre-touching internal bitmaps took %1.2f seconds.", os::elapsedTime() - start_time);
2170 }
2171 
2172 // Closure for marking entries in SATB buffers.
2173 class G1CMSATBBufferClosure : public SATBBufferClosure {
2174 private:
2175   G1CMTask* _task;
2176   G1CollectedHeap* _g1h;
2177 
2178   // This is very similar to G1CMTask::deal_with_reference, but with
2179   // more relaxed requirements for the argument, so this must be more
2180   // circumspect about treating the argument as an object.
2181   void do_entry(void* entry) const {
2182     _task->increment_refs_reached();
2183     HeapRegion* hr = _g1h->heap_region_containing(entry);
2184     if (entry < hr->next_top_at_mark_start()) {
2185       // Until we get here, we don't know whether entry refers to a valid
2186       // object; it could instead have been a stale reference.
2187       oop obj = static_cast<oop>(entry);
2188       assert(obj->is_oop(true /* ignore mark word */),
2189              "Invalid oop in SATB buffer: " PTR_FORMAT, p2i(obj));
2190       _task->make_reference_grey(obj);
2191     }
2192   }
2193 
2194 public:
2195   G1CMSATBBufferClosure(G1CMTask* task, G1CollectedHeap* g1h)
2196     : _task(task), _g1h(g1h) { }
2197 
2198   virtual void do_buffer(void** buffer, size_t size) {
2199     for (size_t i = 0; i < size; ++i) {
2200       do_entry(buffer[i]);
2201     }
2202   }
2203 };
2204 
2205 class G1RemarkThreadsClosure : public ThreadClosure {
2206   G1CMSATBBufferClosure _cm_satb_cl;
2207   G1CMOopClosure _cm_cl;
2208   MarkingCodeBlobClosure _code_cl;
2209   int _thread_parity;
2210 
2211  public:
2212   G1RemarkThreadsClosure(G1CollectedHeap* g1h, G1CMTask* task) :
2213     _cm_satb_cl(task, g1h),
2214     _cm_cl(g1h, g1h->concurrent_mark(), task),
2215     _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
2216     _thread_parity(Threads::thread_claim_parity()) {}
2217 
2218   void do_thread(Thread* thread) {
2219     if (thread->is_Java_thread()) {
2220       if (thread->claim_oops_do(true, _thread_parity)) {
2221         JavaThread* jt = (JavaThread*)thread;
2222 
2223         // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
2224         // however the liveness of oops reachable from nmethods have very complex lifecycles:
2225         // * Alive if on the stack of an executing method
2226         // * Weakly reachable otherwise
2227         // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
2228         // live by the SATB invariant but other oops recorded in nmethods may behave differently.
2229         jt->nmethods_do(&_code_cl);
2230 
2231         jt->satb_mark_queue().apply_closure_and_empty(&_cm_satb_cl);
2232       }
2233     } else if (thread->is_VM_thread()) {
2234       if (thread->claim_oops_do(true, _thread_parity)) {
2235         JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_satb_cl);
2236       }
2237     }
2238   }
2239 };
2240 
2241 class G1CMRemarkTask: public AbstractGangTask {
2242 private:
2243   G1ConcurrentMark* _cm;
2244 public:
2245   void work(uint worker_id) {
2246     // Since all available tasks are actually started, we should
2247     // only proceed if we're supposed to be active.
2248     if (worker_id < _cm->active_tasks()) {
2249       G1CMTask* task = _cm->task(worker_id);
2250       task->record_start_time();
2251       {
2252         ResourceMark rm;
2253         HandleMark hm;
2254 
2255         G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task);
2256         Threads::threads_do(&threads_f);
2257       }
2258 
2259       do {
2260         task->do_marking_step(1000000000.0 /* something very large */,
2261                               true         /* do_termination       */,
2262                               false        /* is_serial            */);
2263       } while (task->has_aborted() && !_cm->has_overflown());
2264       // If we overflow, then we do not want to restart. We instead
2265       // want to abort remark and do concurrent marking again.
2266       task->record_end_time();
2267     }
2268   }
2269 
2270   G1CMRemarkTask(G1ConcurrentMark* cm, uint active_workers) :
2271     AbstractGangTask("Par Remark"), _cm(cm) {
2272     _cm->terminator()->reset_for_reuse(active_workers);
2273   }
2274 };
2275 
2276 void G1ConcurrentMark::checkpointRootsFinalWork() {
2277   ResourceMark rm;
2278   HandleMark   hm;
2279   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2280 
2281   GCTraceTime(Debug, gc) trace("Finalize Marking", _gc_timer_cm);
2282 
2283   g1h->ensure_parsability(false);
2284 
2285   // this is remark, so we'll use up all active threads
2286   uint active_workers = g1h->workers()->active_workers();
2287   set_concurrency_and_phase(active_workers, false /* concurrent */);
2288   // Leave _parallel_marking_threads at it's
2289   // value originally calculated in the G1ConcurrentMark
2290   // constructor and pass values of the active workers
2291   // through the gang in the task.
2292 
2293   {
2294     StrongRootsScope srs(active_workers);
2295 
2296     G1CMRemarkTask remarkTask(this, active_workers);
2297     // We will start all available threads, even if we decide that the
2298     // active_workers will be fewer. The extra ones will just bail out
2299     // immediately.
2300     g1h->workers()->run_task(&remarkTask);
2301   }
2302 
2303   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2304   guarantee(has_overflown() ||
2305             satb_mq_set.completed_buffers_num() == 0,
2306             "Invariant: has_overflown = %s, num buffers = " SIZE_FORMAT,
2307             BOOL_TO_STR(has_overflown()),
2308             satb_mq_set.completed_buffers_num());
2309 
2310   print_stats();
2311 }
2312 
2313 void G1ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2314   // Note we are overriding the read-only view of the prev map here, via
2315   // the cast.
2316   ((G1CMBitMap*)_prevMarkBitMap)->clear_range(mr);
2317 }
2318 
2319 HeapRegion*
2320 G1ConcurrentMark::claim_region(uint worker_id) {
2321   // "checkpoint" the finger
2322   HeapWord* finger = _finger;
2323 
2324   // _heap_end will not change underneath our feet; it only changes at
2325   // yield points.
2326   while (finger < _heap_end) {
2327     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2328 
2329     HeapRegion* curr_region = _g1h->heap_region_containing(finger);
2330 
2331     // Above heap_region_containing may return NULL as we always scan claim
2332     // until the end of the heap. In this case, just jump to the next region.
2333     HeapWord* end = curr_region != NULL ? curr_region->end() : finger + HeapRegion::GrainWords;
2334 
2335     // Is the gap between reading the finger and doing the CAS too long?
2336     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2337     if (res == finger && curr_region != NULL) {
2338       // we succeeded
2339       HeapWord*   bottom        = curr_region->bottom();
2340       HeapWord*   limit         = curr_region->next_top_at_mark_start();
2341 
2342       // notice that _finger == end cannot be guaranteed here since,
2343       // someone else might have moved the finger even further
2344       assert(_finger >= end, "the finger should have moved forward");
2345 
2346       if (limit > bottom) {
2347         return curr_region;
2348       } else {
2349         assert(limit == bottom,
2350                "the region limit should be at bottom");
2351         // we return NULL and the caller should try calling
2352         // claim_region() again.
2353         return NULL;
2354       }
2355     } else {
2356       assert(_finger > finger, "the finger should have moved forward");
2357       // read it again
2358       finger = _finger;
2359     }
2360   }
2361 
2362   return NULL;
2363 }
2364 
2365 #ifndef PRODUCT
2366 class VerifyNoCSetOops VALUE_OBJ_CLASS_SPEC {
2367 private:
2368   G1CollectedHeap* _g1h;
2369   const char* _phase;
2370   int _info;
2371 
2372 public:
2373   VerifyNoCSetOops(const char* phase, int info = -1) :
2374     _g1h(G1CollectedHeap::heap()),
2375     _phase(phase),
2376     _info(info)
2377   { }
2378 
2379   void operator()(oop obj) const {
2380     guarantee(obj->is_oop(),
2381               "Non-oop " PTR_FORMAT ", phase: %s, info: %d",
2382               p2i(obj), _phase, _info);
2383     guarantee(!_g1h->obj_in_cs(obj),
2384               "obj: " PTR_FORMAT " in CSet, phase: %s, info: %d",
2385               p2i(obj), _phase, _info);
2386   }
2387 };
2388 
2389 void G1ConcurrentMark::verify_no_cset_oops() {
2390   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2391   if (!G1CollectedHeap::heap()->collector_state()->mark_in_progress()) {
2392     return;
2393   }
2394 
2395   // Verify entries on the global mark stack
2396   _markStack.iterate(VerifyNoCSetOops("Stack"));
2397 
2398   // Verify entries on the task queues
2399   for (uint i = 0; i < _max_worker_id; ++i) {
2400     G1CMTaskQueue* queue = _task_queues->queue(i);
2401     queue->iterate(VerifyNoCSetOops("Queue", i));
2402   }
2403 
2404   // Verify the global finger
2405   HeapWord* global_finger = finger();
2406   if (global_finger != NULL && global_finger < _heap_end) {
2407     // Since we always iterate over all regions, we might get a NULL HeapRegion
2408     // here.
2409     HeapRegion* global_hr = _g1h->heap_region_containing(global_finger);
2410     guarantee(global_hr == NULL || global_finger == global_hr->bottom(),
2411               "global finger: " PTR_FORMAT " region: " HR_FORMAT,
2412               p2i(global_finger), HR_FORMAT_PARAMS(global_hr));
2413   }
2414 
2415   // Verify the task fingers
2416   assert(parallel_marking_threads() <= _max_worker_id, "sanity");
2417   for (uint i = 0; i < parallel_marking_threads(); ++i) {
2418     G1CMTask* task = _tasks[i];
2419     HeapWord* task_finger = task->finger();
2420     if (task_finger != NULL && task_finger < _heap_end) {
2421       // See above note on the global finger verification.
2422       HeapRegion* task_hr = _g1h->heap_region_containing(task_finger);
2423       guarantee(task_hr == NULL || task_finger == task_hr->bottom() ||
2424                 !task_hr->in_collection_set(),
2425                 "task finger: " PTR_FORMAT " region: " HR_FORMAT,
2426                 p2i(task_finger), HR_FORMAT_PARAMS(task_hr));
2427     }
2428   }
2429 }
2430 #endif // PRODUCT
2431 
2432 class G1CreateLiveDataTask: public AbstractGangTask {
2433   // Aggregate the counting data that was constructed concurrently
2434   // with marking.
2435   class G1CreateLiveDataHRClosure: public G1LiveDataClosureBase {
2436   public:
2437     G1CreateLiveDataHRClosure(G1CollectedHeap* g1h,
2438                               BitMap* cm_card_bm)
2439     : G1LiveDataClosureBase(g1h, NULL, cm_card_bm) { }
2440 
2441     bool doHeapRegion(HeapRegion* hr) {
2442       size_t temp;
2443       return mark_marked_during_marking(hr, true, &temp);
2444     }
2445   };
2446 
2447   G1CollectedHeap* _g1h;
2448   G1ConcurrentMark* _cm;
2449   BitMap* _cm_card_bm;
2450   HeapRegionClaimer _hrclaimer;
2451 
2452 public:
2453   G1CreateLiveDataTask(G1CollectedHeap* g1h,
2454                        BitMap* cm_card_bm,
2455                        uint n_workers) :
2456       AbstractGangTask("Create Live Data"),
2457       _g1h(g1h),
2458       _cm_card_bm(cm_card_bm),
2459       _hrclaimer(n_workers) {
2460   }
2461 
2462   void work(uint worker_id) {
2463     SuspendibleThreadSetJoiner sts_join;
2464 
2465     G1CreateLiveDataHRClosure cl(_g1h, _cm_card_bm);
2466     _g1h->heap_region_par_iterate(&cl, worker_id, &_hrclaimer);
2467   }
2468 };
2469 
2470 
2471 void G1ConcurrentMark::create_live_data() {
2472   uint n_workers = _parallel_workers->active_workers();
2473 
2474   G1CreateLiveDataTask cl(_g1h,
2475                           &_card_live_bm,
2476                           n_workers);
2477   _parallel_workers->run_task(&cl);
2478 }
2479 
2480 class G1ClearAllLiveDataTask : public AbstractGangTask {
2481   BitMap* _bitmap;
2482   size_t _num_tasks;
2483   size_t _cur_task;
2484 public:
2485   G1ClearAllLiveDataTask(BitMap* bitmap, size_t num_tasks) :
2486     AbstractGangTask("Clear All Live Data"),
2487     _bitmap(bitmap),
2488     _num_tasks(num_tasks),
2489     _cur_task(0) {
2490   }
2491 
2492   virtual void work(uint worker_id) {
2493     while (true) {
2494       size_t to_process = Atomic::add(1, &_cur_task) - 1;
2495       if (to_process >= _num_tasks) {
2496         break;
2497       }
2498 
2499       BitMap::idx_t start = M * BitsPerByte * to_process;
2500       BitMap::idx_t end = MIN2(start + M * BitsPerByte, _bitmap->size());
2501       _bitmap->clear_range(start, end);
2502     }
2503   }
2504 };
2505 
2506 void G1ConcurrentMark::clear_all_live_data(WorkGang* workers) {
2507   double start_time = os::elapsedTime();
2508    
2509   guarantee(Universe::is_fully_initialized(), "Should not call this during initialization.");
2510   
2511   size_t const num_chunks = align_size_up(_card_live_bm.size_in_words() * HeapWordSize, M) / M;
2512 
2513   G1ClearAllLiveDataTask cl(&_card_live_bm, num_chunks);
2514   workers->run_task(&cl);
2515 
2516   // The region live bitmap is always very small, even for huge heaps. Clear
2517   // directly.
2518   _region_live_bm.clear();
2519 
2520 
2521   log_debug(gc, marking)("Clear Live Data took %.3fms", (os::elapsedTime() - start_time) * 1000.0);
2522 }
2523 
2524 void G1ConcurrentMark::verify_all_live_data() {
2525   assert(_card_live_bm.count_one_bits() == 0, "Master card bitmap not clear");
2526   assert(_region_live_bm.count_one_bits() == 0, "Master region bitmap not clear");
2527 }
2528 
2529 void G1ConcurrentMark::print_stats() {
2530   if (!log_is_enabled(Debug, gc, stats)) {
2531     return;
2532   }
2533   log_debug(gc, stats)("---------------------------------------------------------------------");
2534   for (size_t i = 0; i < _active_tasks; ++i) {
2535     _tasks[i]->print_stats();
2536     log_debug(gc, stats)("---------------------------------------------------------------------");
2537   }
2538 }
2539 
2540 void G1ConcurrentMark::abort() {
2541   if (!cmThread()->during_cycle() || _has_aborted) {
2542     // We haven't started a concurrent cycle or we have already aborted it. No need to do anything.
2543     return;
2544   }
2545 
2546   // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
2547   // concurrent bitmap clearing.
2548   clear_bitmap(_nextMarkBitMap, _g1h->workers(), false);
2549 
2550   // Note we cannot clear the previous marking bitmap here
2551   // since VerifyDuringGC verifies the objects marked during
2552   // a full GC against the previous bitmap.
2553 
2554   clear_all_live_data(_g1h->workers());
2555   DEBUG_ONLY(verify_all_live_data());
2556   // Empty mark stack
2557   reset_marking_state();
2558   for (uint i = 0; i < _max_worker_id; ++i) {
2559     _tasks[i]->clear_region_fields();
2560   }
2561   _first_overflow_barrier_sync.abort();
2562   _second_overflow_barrier_sync.abort();
2563   _has_aborted = true;
2564 
2565   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2566   satb_mq_set.abandon_partial_marking();
2567   // This can be called either during or outside marking, we'll read
2568   // the expected_active value from the SATB queue set.
2569   satb_mq_set.set_active_all_threads(
2570                                  false, /* new active value */
2571                                  satb_mq_set.is_active() /* expected_active */);
2572 }
2573 
2574 static void print_ms_time_info(const char* prefix, const char* name,
2575                                NumberSeq& ns) {
2576   log_trace(gc, marking)("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2577                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2578   if (ns.num() > 0) {
2579     log_trace(gc, marking)("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2580                            prefix, ns.sd(), ns.maximum());
2581   }
2582 }
2583 
2584 void G1ConcurrentMark::print_summary_info() {
2585   LogHandle(gc, marking) log;
2586   if (!log.is_trace()) {
2587     return;
2588   }
2589 
2590   log.trace(" Concurrent marking:");
2591   print_ms_time_info("  ", "init marks", _init_times);
2592   print_ms_time_info("  ", "remarks", _remark_times);
2593   {
2594     print_ms_time_info("     ", "final marks", _remark_mark_times);
2595     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2596 
2597   }
2598   print_ms_time_info("  ", "cleanups", _cleanup_times);
2599   log.trace("    Finalize live data total time = %8.2f s (avg = %8.2f ms).",
2600             _total_counting_time, (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2601   if (G1ScrubRemSets) {
2602     log.trace("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2603               _total_rs_scrub_time, (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 / (double)_cleanup_times.num() : 0.0));
2604   }
2605   log.trace("  Total stop_world time = %8.2f s.",
2606             (_init_times.sum() + _remark_times.sum() + _cleanup_times.sum())/1000.0);
2607   log.trace("  Total concurrent time = %8.2f s (%8.2f s marking).",
2608             cmThread()->vtime_accum(), cmThread()->vtime_mark_accum());
2609 }
2610 
2611 void G1ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2612   _parallel_workers->print_worker_threads_on(st);
2613 }
2614 
2615 void G1ConcurrentMark::print_on_error(outputStream* st) const {
2616   st->print_cr("Marking Bits (Prev, Next): (CMBitMap*) " PTR_FORMAT ", (CMBitMap*) " PTR_FORMAT,
2617       p2i(_prevMarkBitMap), p2i(_nextMarkBitMap));
2618   _prevMarkBitMap->print_on_error(st, " Prev Bits: ");
2619   _nextMarkBitMap->print_on_error(st, " Next Bits: ");
2620 }
2621 
2622 // We take a break if someone is trying to stop the world.
2623 bool G1ConcurrentMark::do_yield_check(uint worker_id) {
2624   if (SuspendibleThreadSet::should_yield()) {
2625     SuspendibleThreadSet::yield();
2626     return true;
2627   } else {
2628     return false;
2629   }
2630 }
2631 
2632 // Closure for iteration over bitmaps
2633 class G1CMBitMapClosure : public BitMapClosure {
2634 private:
2635   // the bitmap that is being iterated over
2636   G1CMBitMap*                 _nextMarkBitMap;
2637   G1ConcurrentMark*           _cm;
2638   G1CMTask*                   _task;
2639 
2640 public:
2641   G1CMBitMapClosure(G1CMTask *task, G1ConcurrentMark* cm, G1CMBitMap* nextMarkBitMap) :
2642     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2643 
2644   bool do_bit(size_t offset) {
2645     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2646     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2647     assert( addr < _cm->finger(), "invariant");
2648     assert(addr >= _task->finger(), "invariant");
2649 
2650     // We move that task's local finger along.
2651     _task->move_finger_to(addr);
2652 
2653     _task->scan_object(oop(addr));
2654     // we only partially drain the local queue and global stack
2655     _task->drain_local_queue(true);
2656     _task->drain_global_stack(true);
2657 
2658     // if the has_aborted flag has been raised, we need to bail out of
2659     // the iteration
2660     return !_task->has_aborted();
2661   }
2662 };
2663 
2664 static ReferenceProcessor* get_cm_oop_closure_ref_processor(G1CollectedHeap* g1h) {
2665   ReferenceProcessor* result = g1h->ref_processor_cm();
2666   assert(result != NULL, "CM reference processor should not be NULL");
2667   return result;
2668 }
2669 
2670 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
2671                                G1ConcurrentMark* cm,
2672                                G1CMTask* task)
2673   : MetadataAwareOopClosure(get_cm_oop_closure_ref_processor(g1h)),
2674     _g1h(g1h), _cm(cm), _task(task)
2675 { }
2676 
2677 void G1CMTask::setup_for_region(HeapRegion* hr) {
2678   assert(hr != NULL,
2679         "claim_region() should have filtered out NULL regions");
2680   _curr_region  = hr;
2681   _finger       = hr->bottom();
2682   update_region_limit();
2683 }
2684 
2685 void G1CMTask::update_region_limit() {
2686   HeapRegion* hr            = _curr_region;
2687   HeapWord* bottom          = hr->bottom();
2688   HeapWord* limit           = hr->next_top_at_mark_start();
2689 
2690   if (limit == bottom) {
2691     // The region was collected underneath our feet.
2692     // We set the finger to bottom to ensure that the bitmap
2693     // iteration that will follow this will not do anything.
2694     // (this is not a condition that holds when we set the region up,
2695     // as the region is not supposed to be empty in the first place)
2696     _finger = bottom;
2697   } else if (limit >= _region_limit) {
2698     assert(limit >= _finger, "peace of mind");
2699   } else {
2700     assert(limit < _region_limit, "only way to get here");
2701     // This can happen under some pretty unusual circumstances.  An
2702     // evacuation pause empties the region underneath our feet (NTAMS
2703     // at bottom). We then do some allocation in the region (NTAMS
2704     // stays at bottom), followed by the region being used as a GC
2705     // alloc region (NTAMS will move to top() and the objects
2706     // originally below it will be grayed). All objects now marked in
2707     // the region are explicitly grayed, if below the global finger,
2708     // and we do not need in fact to scan anything else. So, we simply
2709     // set _finger to be limit to ensure that the bitmap iteration
2710     // doesn't do anything.
2711     _finger = limit;
2712   }
2713 
2714   _region_limit = limit;
2715 }
2716 
2717 void G1CMTask::giveup_current_region() {
2718   assert(_curr_region != NULL, "invariant");
2719   clear_region_fields();
2720 }
2721 
2722 void G1CMTask::clear_region_fields() {
2723   // Values for these three fields that indicate that we're not
2724   // holding on to a region.
2725   _curr_region   = NULL;
2726   _finger        = NULL;
2727   _region_limit  = NULL;
2728 }
2729 
2730 void G1CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
2731   if (cm_oop_closure == NULL) {
2732     assert(_cm_oop_closure != NULL, "invariant");
2733   } else {
2734     assert(_cm_oop_closure == NULL, "invariant");
2735   }
2736   _cm_oop_closure = cm_oop_closure;
2737 }
2738 
2739 void G1CMTask::reset(G1CMBitMap* nextMarkBitMap) {
2740   guarantee(nextMarkBitMap != NULL, "invariant");
2741   _nextMarkBitMap                = nextMarkBitMap;
2742   clear_region_fields();
2743 
2744   _calls                         = 0;
2745   _elapsed_time_ms               = 0.0;
2746   _termination_time_ms           = 0.0;
2747   _termination_start_time_ms     = 0.0;
2748 }
2749 
2750 bool G1CMTask::should_exit_termination() {
2751   regular_clock_call();
2752   // This is called when we are in the termination protocol. We should
2753   // quit if, for some reason, this task wants to abort or the global
2754   // stack is not empty (this means that we can get work from it).
2755   return !_cm->mark_stack_empty() || has_aborted();
2756 }
2757 
2758 void G1CMTask::reached_limit() {
2759   assert(_words_scanned >= _words_scanned_limit ||
2760          _refs_reached >= _refs_reached_limit ,
2761          "shouldn't have been called otherwise");
2762   regular_clock_call();
2763 }
2764 
2765 void G1CMTask::regular_clock_call() {
2766   if (has_aborted()) return;
2767 
2768   // First, we need to recalculate the words scanned and refs reached
2769   // limits for the next clock call.
2770   recalculate_limits();
2771 
2772   // During the regular clock call we do the following
2773 
2774   // (1) If an overflow has been flagged, then we abort.
2775   if (_cm->has_overflown()) {
2776     set_has_aborted();
2777     return;
2778   }
2779 
2780   // If we are not concurrent (i.e. we're doing remark) we don't need
2781   // to check anything else. The other steps are only needed during
2782   // the concurrent marking phase.
2783   if (!concurrent()) return;
2784 
2785   // (2) If marking has been aborted for Full GC, then we also abort.
2786   if (_cm->has_aborted()) {
2787     set_has_aborted();
2788     return;
2789   }
2790 
2791   double curr_time_ms = os::elapsedVTime() * 1000.0;
2792 
2793   // (4) We check whether we should yield. If we have to, then we abort.
2794   if (SuspendibleThreadSet::should_yield()) {
2795     // We should yield. To do this we abort the task. The caller is
2796     // responsible for yielding.
2797     set_has_aborted();
2798     return;
2799   }
2800 
2801   // (5) We check whether we've reached our time quota. If we have,
2802   // then we abort.
2803   double elapsed_time_ms = curr_time_ms - _start_time_ms;
2804   if (elapsed_time_ms > _time_target_ms) {
2805     set_has_aborted();
2806     _has_timed_out = true;
2807     return;
2808   }
2809 
2810   // (6) Finally, we check whether there are enough completed STAB
2811   // buffers available for processing. If there are, we abort.
2812   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2813   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
2814     // we do need to process SATB buffers, we'll abort and restart
2815     // the marking task to do so
2816     set_has_aborted();
2817     return;
2818   }
2819 }
2820 
2821 void G1CMTask::recalculate_limits() {
2822   _real_words_scanned_limit = _words_scanned + words_scanned_period;
2823   _words_scanned_limit      = _real_words_scanned_limit;
2824 
2825   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
2826   _refs_reached_limit       = _real_refs_reached_limit;
2827 }
2828 
2829 void G1CMTask::decrease_limits() {
2830   // This is called when we believe that we're going to do an infrequent
2831   // operation which will increase the per byte scanned cost (i.e. move
2832   // entries to/from the global stack). It basically tries to decrease the
2833   // scanning limit so that the clock is called earlier.
2834 
2835   _words_scanned_limit = _real_words_scanned_limit -
2836     3 * words_scanned_period / 4;
2837   _refs_reached_limit  = _real_refs_reached_limit -
2838     3 * refs_reached_period / 4;
2839 }
2840 
2841 void G1CMTask::move_entries_to_global_stack() {
2842   // local array where we'll store the entries that will be popped
2843   // from the local queue
2844   oop buffer[global_stack_transfer_size];
2845 
2846   int n = 0;
2847   oop obj;
2848   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
2849     buffer[n] = obj;
2850     ++n;
2851   }
2852 
2853   if (n > 0) {
2854     // we popped at least one entry from the local queue
2855 
2856     if (!_cm->mark_stack_push(buffer, n)) {
2857       set_has_aborted();
2858     }
2859   }
2860 
2861   // this operation was quite expensive, so decrease the limits
2862   decrease_limits();
2863 }
2864 
2865 void G1CMTask::get_entries_from_global_stack() {
2866   // local array where we'll store the entries that will be popped
2867   // from the global stack.
2868   oop buffer[global_stack_transfer_size];
2869   int n;
2870   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
2871   assert(n <= global_stack_transfer_size,
2872          "we should not pop more than the given limit");
2873   if (n > 0) {
2874     // yes, we did actually pop at least one entry
2875     for (int i = 0; i < n; ++i) {
2876       bool success = _task_queue->push(buffer[i]);
2877       // We only call this when the local queue is empty or under a
2878       // given target limit. So, we do not expect this push to fail.
2879       assert(success, "invariant");
2880     }
2881   }
2882 
2883   // this operation was quite expensive, so decrease the limits
2884   decrease_limits();
2885 }
2886 
2887 void G1CMTask::drain_local_queue(bool partially) {
2888   if (has_aborted()) return;
2889 
2890   // Decide what the target size is, depending whether we're going to
2891   // drain it partially (so that other tasks can steal if they run out
2892   // of things to do) or totally (at the very end).
2893   size_t target_size;
2894   if (partially) {
2895     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
2896   } else {
2897     target_size = 0;
2898   }
2899 
2900   if (_task_queue->size() > target_size) {
2901     oop obj;
2902     bool ret = _task_queue->pop_local(obj);
2903     while (ret) {
2904       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
2905       assert(!_g1h->is_on_master_free_list(
2906                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
2907 
2908       scan_object(obj);
2909 
2910       if (_task_queue->size() <= target_size || has_aborted()) {
2911         ret = false;
2912       } else {
2913         ret = _task_queue->pop_local(obj);
2914       }
2915     }
2916   }
2917 }
2918 
2919 void G1CMTask::drain_global_stack(bool partially) {
2920   if (has_aborted()) return;
2921 
2922   // We have a policy to drain the local queue before we attempt to
2923   // drain the global stack.
2924   assert(partially || _task_queue->size() == 0, "invariant");
2925 
2926   // Decide what the target size is, depending whether we're going to
2927   // drain it partially (so that other tasks can steal if they run out
2928   // of things to do) or totally (at the very end).  Notice that,
2929   // because we move entries from the global stack in chunks or
2930   // because another task might be doing the same, we might in fact
2931   // drop below the target. But, this is not a problem.
2932   size_t target_size;
2933   if (partially) {
2934     target_size = _cm->partial_mark_stack_size_target();
2935   } else {
2936     target_size = 0;
2937   }
2938 
2939   if (_cm->mark_stack_size() > target_size) {
2940     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
2941       get_entries_from_global_stack();
2942       drain_local_queue(partially);
2943     }
2944   }
2945 }
2946 
2947 // SATB Queue has several assumptions on whether to call the par or
2948 // non-par versions of the methods. this is why some of the code is
2949 // replicated. We should really get rid of the single-threaded version
2950 // of the code to simplify things.
2951 void G1CMTask::drain_satb_buffers() {
2952   if (has_aborted()) return;
2953 
2954   // We set this so that the regular clock knows that we're in the
2955   // middle of draining buffers and doesn't set the abort flag when it
2956   // notices that SATB buffers are available for draining. It'd be
2957   // very counter productive if it did that. :-)
2958   _draining_satb_buffers = true;
2959 
2960   G1CMSATBBufferClosure satb_cl(this, _g1h);
2961   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2962 
2963   // This keeps claiming and applying the closure to completed buffers
2964   // until we run out of buffers or we need to abort.
2965   while (!has_aborted() &&
2966          satb_mq_set.apply_closure_to_completed_buffer(&satb_cl)) {
2967     regular_clock_call();
2968   }
2969 
2970   _draining_satb_buffers = false;
2971 
2972   assert(has_aborted() ||
2973          concurrent() ||
2974          satb_mq_set.completed_buffers_num() == 0, "invariant");
2975 
2976   // again, this was a potentially expensive operation, decrease the
2977   // limits to get the regular clock call early
2978   decrease_limits();
2979 }
2980 
2981 void G1CMTask::print_stats() {
2982   log_debug(gc, stats)("Marking Stats, task = %u, calls = %d",
2983                        _worker_id, _calls);
2984   log_debug(gc, stats)("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
2985                        _elapsed_time_ms, _termination_time_ms);
2986   log_debug(gc, stats)("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
2987                        _step_times_ms.num(), _step_times_ms.avg(),
2988                        _step_times_ms.sd());
2989   log_debug(gc, stats)("                    max = %1.2lfms, total = %1.2lfms",
2990                        _step_times_ms.maximum(), _step_times_ms.sum());
2991 }
2992 
2993 bool G1ConcurrentMark::try_stealing(uint worker_id, int* hash_seed, oop& obj) {
2994   return _task_queues->steal(worker_id, hash_seed, obj);
2995 }
2996 
2997 /*****************************************************************************
2998 
2999     The do_marking_step(time_target_ms, ...) method is the building
3000     block of the parallel marking framework. It can be called in parallel
3001     with other invocations of do_marking_step() on different tasks
3002     (but only one per task, obviously) and concurrently with the
3003     mutator threads, or during remark, hence it eliminates the need
3004     for two versions of the code. When called during remark, it will
3005     pick up from where the task left off during the concurrent marking
3006     phase. Interestingly, tasks are also claimable during evacuation
3007     pauses too, since do_marking_step() ensures that it aborts before
3008     it needs to yield.
3009 
3010     The data structures that it uses to do marking work are the
3011     following:
3012 
3013       (1) Marking Bitmap. If there are gray objects that appear only
3014       on the bitmap (this happens either when dealing with an overflow
3015       or when the initial marking phase has simply marked the roots
3016       and didn't push them on the stack), then tasks claim heap
3017       regions whose bitmap they then scan to find gray objects. A
3018       global finger indicates where the end of the last claimed region
3019       is. A local finger indicates how far into the region a task has
3020       scanned. The two fingers are used to determine how to gray an
3021       object (i.e. whether simply marking it is OK, as it will be
3022       visited by a task in the future, or whether it needs to be also
3023       pushed on a stack).
3024 
3025       (2) Local Queue. The local queue of the task which is accessed
3026       reasonably efficiently by the task. Other tasks can steal from
3027       it when they run out of work. Throughout the marking phase, a
3028       task attempts to keep its local queue short but not totally
3029       empty, so that entries are available for stealing by other
3030       tasks. Only when there is no more work, a task will totally
3031       drain its local queue.
3032 
3033       (3) Global Mark Stack. This handles local queue overflow. During
3034       marking only sets of entries are moved between it and the local
3035       queues, as access to it requires a mutex and more fine-grain
3036       interaction with it which might cause contention. If it
3037       overflows, then the marking phase should restart and iterate
3038       over the bitmap to identify gray objects. Throughout the marking
3039       phase, tasks attempt to keep the global mark stack at a small
3040       length but not totally empty, so that entries are available for
3041       popping by other tasks. Only when there is no more work, tasks
3042       will totally drain the global mark stack.
3043 
3044       (4) SATB Buffer Queue. This is where completed SATB buffers are
3045       made available. Buffers are regularly removed from this queue
3046       and scanned for roots, so that the queue doesn't get too
3047       long. During remark, all completed buffers are processed, as
3048       well as the filled in parts of any uncompleted buffers.
3049 
3050     The do_marking_step() method tries to abort when the time target
3051     has been reached. There are a few other cases when the
3052     do_marking_step() method also aborts:
3053 
3054       (1) When the marking phase has been aborted (after a Full GC).
3055 
3056       (2) When a global overflow (on the global stack) has been
3057       triggered. Before the task aborts, it will actually sync up with
3058       the other tasks to ensure that all the marking data structures
3059       (local queues, stacks, fingers etc.)  are re-initialized so that
3060       when do_marking_step() completes, the marking phase can
3061       immediately restart.
3062 
3063       (3) When enough completed SATB buffers are available. The
3064       do_marking_step() method only tries to drain SATB buffers right
3065       at the beginning. So, if enough buffers are available, the
3066       marking step aborts and the SATB buffers are processed at
3067       the beginning of the next invocation.
3068 
3069       (4) To yield. when we have to yield then we abort and yield
3070       right at the end of do_marking_step(). This saves us from a lot
3071       of hassle as, by yielding we might allow a Full GC. If this
3072       happens then objects will be compacted underneath our feet, the
3073       heap might shrink, etc. We save checking for this by just
3074       aborting and doing the yield right at the end.
3075 
3076     From the above it follows that the do_marking_step() method should
3077     be called in a loop (or, otherwise, regularly) until it completes.
3078 
3079     If a marking step completes without its has_aborted() flag being
3080     true, it means it has completed the current marking phase (and
3081     also all other marking tasks have done so and have all synced up).
3082 
3083     A method called regular_clock_call() is invoked "regularly" (in
3084     sub ms intervals) throughout marking. It is this clock method that
3085     checks all the abort conditions which were mentioned above and
3086     decides when the task should abort. A work-based scheme is used to
3087     trigger this clock method: when the number of object words the
3088     marking phase has scanned or the number of references the marking
3089     phase has visited reach a given limit. Additional invocations to
3090     the method clock have been planted in a few other strategic places
3091     too. The initial reason for the clock method was to avoid calling
3092     vtime too regularly, as it is quite expensive. So, once it was in
3093     place, it was natural to piggy-back all the other conditions on it
3094     too and not constantly check them throughout the code.
3095 
3096     If do_termination is true then do_marking_step will enter its
3097     termination protocol.
3098 
3099     The value of is_serial must be true when do_marking_step is being
3100     called serially (i.e. by the VMThread) and do_marking_step should
3101     skip any synchronization in the termination and overflow code.
3102     Examples include the serial remark code and the serial reference
3103     processing closures.
3104 
3105     The value of is_serial must be false when do_marking_step is
3106     being called by any of the worker threads in a work gang.
3107     Examples include the concurrent marking code (CMMarkingTask),
3108     the MT remark code, and the MT reference processing closures.
3109 
3110  *****************************************************************************/
3111 
3112 void G1CMTask::do_marking_step(double time_target_ms,
3113                                bool do_termination,
3114                                bool is_serial) {
3115   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3116   assert(concurrent() == _cm->concurrent(), "they should be the same");
3117 
3118   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3119   assert(_task_queues != NULL, "invariant");
3120   assert(_task_queue != NULL, "invariant");
3121   assert(_task_queues->queue(_worker_id) == _task_queue, "invariant");
3122 
3123   assert(!_claimed,
3124          "only one thread should claim this task at any one time");
3125 
3126   // OK, this doesn't safeguard again all possible scenarios, as it is
3127   // possible for two threads to set the _claimed flag at the same
3128   // time. But it is only for debugging purposes anyway and it will
3129   // catch most problems.
3130   _claimed = true;
3131 
3132   _start_time_ms = os::elapsedVTime() * 1000.0;
3133 
3134   // If do_stealing is true then do_marking_step will attempt to
3135   // steal work from the other G1CMTasks. It only makes sense to
3136   // enable stealing when the termination protocol is enabled
3137   // and do_marking_step() is not being called serially.
3138   bool do_stealing = do_termination && !is_serial;
3139 
3140   double diff_prediction_ms = _g1h->g1_policy()->predictor().get_new_prediction(&_marking_step_diffs_ms);
3141   _time_target_ms = time_target_ms - diff_prediction_ms;
3142 
3143   // set up the variables that are used in the work-based scheme to
3144   // call the regular clock method
3145   _words_scanned = 0;
3146   _refs_reached  = 0;
3147   recalculate_limits();
3148 
3149   // clear all flags
3150   clear_has_aborted();
3151   _has_timed_out = false;
3152   _draining_satb_buffers = false;
3153 
3154   ++_calls;
3155 
3156   // Set up the bitmap and oop closures. Anything that uses them is
3157   // eventually called from this method, so it is OK to allocate these
3158   // statically.
3159   G1CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3160   G1CMOopClosure    cm_oop_closure(_g1h, _cm, this);
3161   set_cm_oop_closure(&cm_oop_closure);
3162 
3163   if (_cm->has_overflown()) {
3164     // This can happen if the mark stack overflows during a GC pause
3165     // and this task, after a yield point, restarts. We have to abort
3166     // as we need to get into the overflow protocol which happens
3167     // right at the end of this task.
3168     set_has_aborted();
3169   }
3170 
3171   // First drain any available SATB buffers. After this, we will not
3172   // look at SATB buffers before the next invocation of this method.
3173   // If enough completed SATB buffers are queued up, the regular clock
3174   // will abort this task so that it restarts.
3175   drain_satb_buffers();
3176   // ...then partially drain the local queue and the global stack
3177   drain_local_queue(true);
3178   drain_global_stack(true);
3179 
3180   do {
3181     if (!has_aborted() && _curr_region != NULL) {
3182       // This means that we're already holding on to a region.
3183       assert(_finger != NULL, "if region is not NULL, then the finger "
3184              "should not be NULL either");
3185 
3186       // We might have restarted this task after an evacuation pause
3187       // which might have evacuated the region we're holding on to
3188       // underneath our feet. Let's read its limit again to make sure
3189       // that we do not iterate over a region of the heap that
3190       // contains garbage (update_region_limit() will also move
3191       // _finger to the start of the region if it is found empty).
3192       update_region_limit();
3193       // We will start from _finger not from the start of the region,
3194       // as we might be restarting this task after aborting half-way
3195       // through scanning this region. In this case, _finger points to
3196       // the address where we last found a marked object. If this is a
3197       // fresh region, _finger points to start().
3198       MemRegion mr = MemRegion(_finger, _region_limit);
3199 
3200       assert(!_curr_region->is_humongous() || mr.start() == _curr_region->bottom(),
3201              "humongous regions should go around loop once only");
3202 
3203       // Some special cases:
3204       // If the memory region is empty, we can just give up the region.
3205       // If the current region is humongous then we only need to check
3206       // the bitmap for the bit associated with the start of the object,
3207       // scan the object if it's live, and give up the region.
3208       // Otherwise, let's iterate over the bitmap of the part of the region
3209       // that is left.
3210       // If the iteration is successful, give up the region.
3211       if (mr.is_empty()) {
3212         giveup_current_region();
3213         regular_clock_call();
3214       } else if (_curr_region->is_humongous() && mr.start() == _curr_region->bottom()) {
3215         if (_nextMarkBitMap->isMarked(mr.start())) {
3216           // The object is marked - apply the closure
3217           BitMap::idx_t offset = _nextMarkBitMap->heapWordToOffset(mr.start());
3218           bitmap_closure.do_bit(offset);
3219         }
3220         // Even if this task aborted while scanning the humongous object
3221         // we can (and should) give up the current region.
3222         giveup_current_region();
3223         regular_clock_call();
3224       } else if (_nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3225         giveup_current_region();
3226         regular_clock_call();
3227       } else {
3228         assert(has_aborted(), "currently the only way to do so");
3229         // The only way to abort the bitmap iteration is to return
3230         // false from the do_bit() method. However, inside the
3231         // do_bit() method we move the _finger to point to the
3232         // object currently being looked at. So, if we bail out, we
3233         // have definitely set _finger to something non-null.
3234         assert(_finger != NULL, "invariant");
3235 
3236         // Region iteration was actually aborted. So now _finger
3237         // points to the address of the object we last scanned. If we
3238         // leave it there, when we restart this task, we will rescan
3239         // the object. It is easy to avoid this. We move the finger by
3240         // enough to point to the next possible object header (the
3241         // bitmap knows by how much we need to move it as it knows its
3242         // granularity).
3243         assert(_finger < _region_limit, "invariant");
3244         HeapWord* new_finger = _nextMarkBitMap->nextObject(_finger);
3245         // Check if bitmap iteration was aborted while scanning the last object
3246         if (new_finger >= _region_limit) {
3247           giveup_current_region();
3248         } else {
3249           move_finger_to(new_finger);
3250         }
3251       }
3252     }
3253     // At this point we have either completed iterating over the
3254     // region we were holding on to, or we have aborted.
3255 
3256     // We then partially drain the local queue and the global stack.
3257     // (Do we really need this?)
3258     drain_local_queue(true);
3259     drain_global_stack(true);
3260 
3261     // Read the note on the claim_region() method on why it might
3262     // return NULL with potentially more regions available for
3263     // claiming and why we have to check out_of_regions() to determine
3264     // whether we're done or not.
3265     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3266       // We are going to try to claim a new region. We should have
3267       // given up on the previous one.
3268       // Separated the asserts so that we know which one fires.
3269       assert(_curr_region  == NULL, "invariant");
3270       assert(_finger       == NULL, "invariant");
3271       assert(_region_limit == NULL, "invariant");
3272       HeapRegion* claimed_region = _cm->claim_region(_worker_id);
3273       if (claimed_region != NULL) {
3274         // Yes, we managed to claim one
3275         setup_for_region(claimed_region);
3276         assert(_curr_region == claimed_region, "invariant");
3277       }
3278       // It is important to call the regular clock here. It might take
3279       // a while to claim a region if, for example, we hit a large
3280       // block of empty regions. So we need to call the regular clock
3281       // method once round the loop to make sure it's called
3282       // frequently enough.
3283       regular_clock_call();
3284     }
3285 
3286     if (!has_aborted() && _curr_region == NULL) {
3287       assert(_cm->out_of_regions(),
3288              "at this point we should be out of regions");
3289     }
3290   } while ( _curr_region != NULL && !has_aborted());
3291 
3292   if (!has_aborted()) {
3293     // We cannot check whether the global stack is empty, since other
3294     // tasks might be pushing objects to it concurrently.
3295     assert(_cm->out_of_regions(),
3296            "at this point we should be out of regions");
3297     // Try to reduce the number of available SATB buffers so that
3298     // remark has less work to do.
3299     drain_satb_buffers();
3300   }
3301 
3302   // Since we've done everything else, we can now totally drain the
3303   // local queue and global stack.
3304   drain_local_queue(false);
3305   drain_global_stack(false);
3306 
3307   // Attempt at work stealing from other task's queues.
3308   if (do_stealing && !has_aborted()) {
3309     // We have not aborted. This means that we have finished all that
3310     // we could. Let's try to do some stealing...
3311 
3312     // We cannot check whether the global stack is empty, since other
3313     // tasks might be pushing objects to it concurrently.
3314     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3315            "only way to reach here");
3316     while (!has_aborted()) {
3317       oop obj;
3318       if (_cm->try_stealing(_worker_id, &_hash_seed, obj)) {
3319         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3320                "any stolen object should be marked");
3321         scan_object(obj);
3322 
3323         // And since we're towards the end, let's totally drain the
3324         // local queue and global stack.
3325         drain_local_queue(false);
3326         drain_global_stack(false);
3327       } else {
3328         break;
3329       }
3330     }
3331   }
3332 
3333   // We still haven't aborted. Now, let's try to get into the
3334   // termination protocol.
3335   if (do_termination && !has_aborted()) {
3336     // We cannot check whether the global stack is empty, since other
3337     // tasks might be concurrently pushing objects on it.
3338     // Separated the asserts so that we know which one fires.
3339     assert(_cm->out_of_regions(), "only way to reach here");
3340     assert(_task_queue->size() == 0, "only way to reach here");
3341     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3342 
3343     // The G1CMTask class also extends the TerminatorTerminator class,
3344     // hence its should_exit_termination() method will also decide
3345     // whether to exit the termination protocol or not.
3346     bool finished = (is_serial ||
3347                      _cm->terminator()->offer_termination(this));
3348     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3349     _termination_time_ms +=
3350       termination_end_time_ms - _termination_start_time_ms;
3351 
3352     if (finished) {
3353       // We're all done.
3354 
3355       if (_worker_id == 0) {
3356         // let's allow task 0 to do this
3357         if (concurrent()) {
3358           assert(_cm->concurrent_marking_in_progress(), "invariant");
3359           // we need to set this to false before the next
3360           // safepoint. This way we ensure that the marking phase
3361           // doesn't observe any more heap expansions.
3362           _cm->clear_concurrent_marking_in_progress();
3363         }
3364       }
3365 
3366       // We can now guarantee that the global stack is empty, since
3367       // all other tasks have finished. We separated the guarantees so
3368       // that, if a condition is false, we can immediately find out
3369       // which one.
3370       guarantee(_cm->out_of_regions(), "only way to reach here");
3371       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3372       guarantee(_task_queue->size() == 0, "only way to reach here");
3373       guarantee(!_cm->has_overflown(), "only way to reach here");
3374       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3375     } else {
3376       // Apparently there's more work to do. Let's abort this task. It
3377       // will restart it and we can hopefully find more things to do.
3378       set_has_aborted();
3379     }
3380   }
3381 
3382   // Mainly for debugging purposes to make sure that a pointer to the
3383   // closure which was statically allocated in this frame doesn't
3384   // escape it by accident.
3385   set_cm_oop_closure(NULL);
3386   double end_time_ms = os::elapsedVTime() * 1000.0;
3387   double elapsed_time_ms = end_time_ms - _start_time_ms;
3388   // Update the step history.
3389   _step_times_ms.add(elapsed_time_ms);
3390 
3391   if (has_aborted()) {
3392     // The task was aborted for some reason.
3393     if (_has_timed_out) {
3394       double diff_ms = elapsed_time_ms - _time_target_ms;
3395       // Keep statistics of how well we did with respect to hitting
3396       // our target only if we actually timed out (if we aborted for
3397       // other reasons, then the results might get skewed).
3398       _marking_step_diffs_ms.add(diff_ms);
3399     }
3400 
3401     if (_cm->has_overflown()) {
3402       // This is the interesting one. We aborted because a global
3403       // overflow was raised. This means we have to restart the
3404       // marking phase and start iterating over regions. However, in
3405       // order to do this we have to make sure that all tasks stop
3406       // what they are doing and re-initialize in a safe manner. We
3407       // will achieve this with the use of two barrier sync points.
3408 
3409       if (!is_serial) {
3410         // We only need to enter the sync barrier if being called
3411         // from a parallel context
3412         _cm->enter_first_sync_barrier(_worker_id);
3413 
3414         // When we exit this sync barrier we know that all tasks have
3415         // stopped doing marking work. So, it's now safe to
3416         // re-initialize our data structures. At the end of this method,
3417         // task 0 will clear the global data structures.
3418       }
3419 
3420       // We clear the local state of this task...
3421       clear_region_fields();
3422 
3423       if (!is_serial) {
3424         // ...and enter the second barrier.
3425         _cm->enter_second_sync_barrier(_worker_id);
3426       }
3427       // At this point, if we're during the concurrent phase of
3428       // marking, everything has been re-initialized and we're
3429       // ready to restart.
3430     }
3431   }
3432 
3433   _claimed = false;
3434 }
3435 
3436 G1CMTask::G1CMTask(uint worker_id,
3437                    G1ConcurrentMark* cm,
3438                    G1CMTaskQueue* task_queue,
3439                    G1CMTaskQueueSet* task_queues)
3440   : _g1h(G1CollectedHeap::heap()),
3441     _worker_id(worker_id), _cm(cm),
3442     _claimed(false),
3443     _nextMarkBitMap(NULL), _hash_seed(17),
3444     _task_queue(task_queue),
3445     _task_queues(task_queues),
3446     _cm_oop_closure(NULL) {
3447   guarantee(task_queue != NULL, "invariant");
3448   guarantee(task_queues != NULL, "invariant");
3449 
3450   _marking_step_diffs_ms.add(0.5);
3451 }
3452 
3453 // These are formatting macros that are used below to ensure
3454 // consistent formatting. The *_H_* versions are used to format the
3455 // header for a particular value and they should be kept consistent
3456 // with the corresponding macro. Also note that most of the macros add
3457 // the necessary white space (as a prefix) which makes them a bit
3458 // easier to compose.
3459 
3460 // All the output lines are prefixed with this string to be able to
3461 // identify them easily in a large log file.
3462 #define G1PPRL_LINE_PREFIX            "###"
3463 
3464 #define G1PPRL_ADDR_BASE_FORMAT    " " PTR_FORMAT "-" PTR_FORMAT
3465 #ifdef _LP64
3466 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
3467 #else // _LP64
3468 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
3469 #endif // _LP64
3470 
3471 // For per-region info
3472 #define G1PPRL_TYPE_FORMAT            "   %-4s"
3473 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
3474 #define G1PPRL_BYTE_FORMAT            "  " SIZE_FORMAT_W(9)
3475 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
3476 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
3477 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
3478 
3479 // For summary info
3480 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  " tag ":" G1PPRL_ADDR_BASE_FORMAT
3481 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  " tag ": " SIZE_FORMAT
3482 #define G1PPRL_SUM_MB_FORMAT(tag)      "  " tag ": %1.2f MB"
3483 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag) " / %1.2f %%"
3484 
3485 G1PrintRegionLivenessInfoClosure::
3486 G1PrintRegionLivenessInfoClosure(const char* phase_name)
3487   : _total_used_bytes(0), _total_capacity_bytes(0),
3488     _total_prev_live_bytes(0), _total_next_live_bytes(0),
3489     _total_remset_bytes(0), _total_strong_code_roots_bytes(0) {
3490   G1CollectedHeap* g1h = G1CollectedHeap::heap();
3491   MemRegion g1_reserved = g1h->g1_reserved();
3492   double now = os::elapsedTime();
3493 
3494   // Print the header of the output.
3495   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
3496   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX" HEAP"
3497                           G1PPRL_SUM_ADDR_FORMAT("reserved")
3498                           G1PPRL_SUM_BYTE_FORMAT("region-size"),
3499                           p2i(g1_reserved.start()), p2i(g1_reserved.end()),
3500                           HeapRegion::GrainBytes);
3501   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3502   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3503                           G1PPRL_TYPE_H_FORMAT
3504                           G1PPRL_ADDR_BASE_H_FORMAT
3505                           G1PPRL_BYTE_H_FORMAT
3506                           G1PPRL_BYTE_H_FORMAT
3507                           G1PPRL_BYTE_H_FORMAT
3508                           G1PPRL_DOUBLE_H_FORMAT
3509                           G1PPRL_BYTE_H_FORMAT
3510                           G1PPRL_BYTE_H_FORMAT,
3511                           "type", "address-range",
3512                           "used", "prev-live", "next-live", "gc-eff",
3513                           "remset", "code-roots");
3514   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3515                           G1PPRL_TYPE_H_FORMAT
3516                           G1PPRL_ADDR_BASE_H_FORMAT
3517                           G1PPRL_BYTE_H_FORMAT
3518                           G1PPRL_BYTE_H_FORMAT
3519                           G1PPRL_BYTE_H_FORMAT
3520                           G1PPRL_DOUBLE_H_FORMAT
3521                           G1PPRL_BYTE_H_FORMAT
3522                           G1PPRL_BYTE_H_FORMAT,
3523                           "", "",
3524                           "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)",
3525                           "(bytes)", "(bytes)");
3526 }
3527 
3528 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
3529   const char* type       = r->get_type_str();
3530   HeapWord* bottom       = r->bottom();
3531   HeapWord* end          = r->end();
3532   size_t capacity_bytes  = r->capacity();
3533   size_t used_bytes      = r->used();
3534   size_t prev_live_bytes = r->live_bytes();
3535   size_t next_live_bytes = r->next_live_bytes();
3536   double gc_eff          = r->gc_efficiency();
3537   size_t remset_bytes    = r->rem_set()->mem_size();
3538   size_t strong_code_roots_bytes = r->rem_set()->strong_code_roots_mem_size();
3539 
3540   _total_used_bytes      += used_bytes;
3541   _total_capacity_bytes  += capacity_bytes;
3542   _total_prev_live_bytes += prev_live_bytes;
3543   _total_next_live_bytes += next_live_bytes;
3544   _total_remset_bytes    += remset_bytes;
3545   _total_strong_code_roots_bytes += strong_code_roots_bytes;
3546 
3547   // Print a line for this particular region.
3548   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3549                           G1PPRL_TYPE_FORMAT
3550                           G1PPRL_ADDR_BASE_FORMAT
3551                           G1PPRL_BYTE_FORMAT
3552                           G1PPRL_BYTE_FORMAT
3553                           G1PPRL_BYTE_FORMAT
3554                           G1PPRL_DOUBLE_FORMAT
3555                           G1PPRL_BYTE_FORMAT
3556                           G1PPRL_BYTE_FORMAT,
3557                           type, p2i(bottom), p2i(end),
3558                           used_bytes, prev_live_bytes, next_live_bytes, gc_eff,
3559                           remset_bytes, strong_code_roots_bytes);
3560 
3561   return false;
3562 }
3563 
3564 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
3565   // add static memory usages to remembered set sizes
3566   _total_remset_bytes += HeapRegionRemSet::fl_mem_size() + HeapRegionRemSet::static_mem_size();
3567   // Print the footer of the output.
3568   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX);
3569   log_trace(gc, liveness)(G1PPRL_LINE_PREFIX
3570                          " SUMMARY"
3571                          G1PPRL_SUM_MB_FORMAT("capacity")
3572                          G1PPRL_SUM_MB_PERC_FORMAT("used")
3573                          G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
3574                          G1PPRL_SUM_MB_PERC_FORMAT("next-live")
3575                          G1PPRL_SUM_MB_FORMAT("remset")
3576                          G1PPRL_SUM_MB_FORMAT("code-roots"),
3577                          bytes_to_mb(_total_capacity_bytes),
3578                          bytes_to_mb(_total_used_bytes),
3579                          perc(_total_used_bytes, _total_capacity_bytes),
3580                          bytes_to_mb(_total_prev_live_bytes),
3581                          perc(_total_prev_live_bytes, _total_capacity_bytes),
3582                          bytes_to_mb(_total_next_live_bytes),
3583                          perc(_total_next_live_bytes, _total_capacity_bytes),
3584                          bytes_to_mb(_total_remset_bytes),
3585                          bytes_to_mb(_total_strong_code_roots_bytes));
3586 }