New src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "services/memTracker.hpp"
  49 
  50 // Concurrent marking bit map wrapper
  51 
  52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  53   _bm((uintptr_t*)NULL,0),
  54   _shifter(shifter) {
  55   _bmStartWord = (HeapWord*)(rs.base());
  56   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  57   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  58                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  59 
  60   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  61 
  62   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  63   // For now we'll just commit all of the bit map up fromt.
  64   // Later on we'll try to be more parsimonious with swap.
  65   guarantee(_virtual_space.initialize(brs, brs.size()),
  66             "couldn't reseve backing store for concurrent marking bit map");
  67   assert(_virtual_space.committed_size() == brs.size(),
  68          "didn't reserve backing store for all of concurrent marking bit map?");
  69   _bm.set_map((uintptr_t*)_virtual_space.low());
  70   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  71          _bmWordSize, "inconsistency in bit map sizing");
  72   _bm.set_size(_bmWordSize >> _shifter);
  73 }
  74 
  75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  76                                                HeapWord* limit) const {
  77   // First we must round addr *up* to a possible object boundary.
  78   addr = (HeapWord*)align_size_up((intptr_t)addr,
  79                                   HeapWordSize << _shifter);
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  94                                                  HeapWord* limit) const {
  95   size_t addrOffset = heapWordToOffset(addr);
  96   if (limit == NULL) {
  97     limit = _bmStartWord + _bmWordSize;
  98   }
  99   size_t limitOffset = heapWordToOffset(limit);
 100   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
 101   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
 102   assert(nextAddr >= addr, "get_next_one postcondition");
 103   assert(nextAddr == limit || !isMarked(nextAddr),
 104          "get_next_one postcondition");
 105   return nextAddr;
 106 }
 107 
 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 109   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 110   return (int) (diff >> _shifter);
 111 }
 112 
 113 #ifndef PRODUCT
 114 bool CMBitMapRO::covers(ReservedSpace rs) const {
 115   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 116   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 117          "size inconsistency");
 118   return _bmStartWord == (HeapWord*)(rs.base()) &&
 119          _bmWordSize  == rs.size()>>LogHeapWordSize;
 120 }
 121 #endif
 122 
 123 void CMBitMap::clearAll() {
 124   _bm.clear();
 125   return;
 126 }
 127 
 128 void CMBitMap::markRange(MemRegion mr) {
 129   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 130   assert(!mr.is_empty(), "unexpected empty region");
 131   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 132           ((HeapWord *) mr.end())),
 133          "markRange memory region end is not card aligned");
 134   // convert address range into offset range
 135   _bm.at_put_range(heapWordToOffset(mr.start()),
 136                    heapWordToOffset(mr.end()), true);
 137 }
 138 
 139 void CMBitMap::clearRange(MemRegion mr) {
 140   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 141   assert(!mr.is_empty(), "unexpected empty region");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), false);
 145 }
 146 
 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 148                                             HeapWord* end_addr) {
 149   HeapWord* start = getNextMarkedWordAddress(addr);
 150   start = MIN2(start, end_addr);
 151   HeapWord* end   = getNextUnmarkedWordAddress(start);
 152   end = MIN2(end, end_addr);
 153   assert(start <= end, "Consistency check");
 154   MemRegion mr(start, end);
 155   if (!mr.is_empty()) {
 156     clearRange(mr);
 157   }
 158   return mr;
 159 }
 160 
 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 162   _base(NULL), _cm(cm)
 163 #ifdef ASSERT
 164   , _drain_in_progress(false)
 165   , _drain_in_progress_yields(false)
 166 #endif
 167 {}
 168 
 169 void CMMarkStack::allocate(size_t size) {
 170   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 171   if (_base == NULL) {
 172     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 173   }
 174   _index = 0;
 175   _capacity = (jint) size;
 176   _saved_index = -1;
 177   NOT_PRODUCT(_max_depth = 0);
 178 }
 179 
 180 CMMarkStack::~CMMarkStack() {
 181   if (_base != NULL) {
 182     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 183   }
 184 }
 185 
 186 void CMMarkStack::par_push(oop ptr) {
 187   while (true) {
 188     if (isFull()) {
 189       _overflow = true;
 190       return;
 191     }
 192     // Otherwise...
 193     jint index = _index;
 194     jint next_index = index+1;
 195     jint res = Atomic::cmpxchg(next_index, &_index, index);
 196     if (res == index) {
 197       _base[index] = ptr;
 198       // Note that we don't maintain this atomically.  We could, but it
 199       // doesn't seem necessary.
 200       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 201       return;
 202     }
 203     // Otherwise, we need to try again.
 204   }
 205 }
 206 
 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 208   while (true) {
 209     if (isFull()) {
 210       _overflow = true;
 211       return;
 212     }
 213     // Otherwise...
 214     jint index = _index;
 215     jint next_index = index + n;
 216     if (next_index > _capacity) {
 217       _overflow = true;
 218       return;
 219     }
 220     jint res = Atomic::cmpxchg(next_index, &_index, index);
 221     if (res == index) {
 222       for (int i = 0; i < n; i++) {
 223         int ind = index + i;
 224         assert(ind < _capacity, "By overflow test above.");
 225         _base[ind] = ptr_arr[i];
 226       }
 227       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 228       return;
 229     }
 230     // Otherwise, we need to try again.
 231   }
 232 }
 233 
 234 
 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 236   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 237   jint start = _index;
 238   jint next_index = start + n;
 239   if (next_index > _capacity) {
 240     _overflow = true;
 241     return;
 242   }
 243   // Otherwise.
 244   _index = next_index;
 245   for (int i = 0; i < n; i++) {
 246     int ind = start + i;
 247     assert(ind < _capacity, "By overflow test above.");
 248     _base[ind] = ptr_arr[i];
 249   }
 250 }
 251 
 252 
 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 254   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 255   jint index = _index;
 256   if (index == 0) {
 257     *n = 0;
 258     return false;
 259   } else {
 260     int k = MIN2(max, index);
 261     jint new_ind = index - k;
 262     for (int j = 0; j < k; j++) {
 263       ptr_arr[j] = _base[new_ind + j];
 264     }
 265     _index = new_ind;
 266     *n = k;
 267     return true;
 268   }
 269 }
 270 
 271 template<class OopClosureClass>
 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 273   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 274          || SafepointSynchronize::is_at_safepoint(),
 275          "Drain recursion must be yield-safe.");
 276   bool res = true;
 277   debug_only(_drain_in_progress = true);
 278   debug_only(_drain_in_progress_yields = yield_after);
 279   while (!isEmpty()) {
 280     oop newOop = pop();
 281     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 282     assert(newOop->is_oop(), "Expected an oop");
 283     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 284            "only grey objects on this stack");
 285     newOop->oop_iterate(cl);
 286     if (yield_after && _cm->do_yield_check()) {
 287       res = false;
 288       break;
 289     }
 290   }
 291   debug_only(_drain_in_progress = false);
 292   return res;
 293 }
 294 
 295 void CMMarkStack::note_start_of_gc() {
 296   assert(_saved_index == -1,
 297          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 298   _saved_index = _index;
 299 }
 300 
 301 void CMMarkStack::note_end_of_gc() {
 302   // This is intentionally a guarantee, instead of an assert. If we
 303   // accidentally add something to the mark stack during GC, it
 304   // will be a correctness issue so it's better if we crash. we'll
 305   // only check this once per GC anyway, so it won't be a performance
 306   // issue in any way.
 307   guarantee(_saved_index == _index,
 308             err_msg("saved index: %d index: %d", _saved_index, _index));
 309   _saved_index = -1;
 310 }
 311 
 312 void CMMarkStack::oops_do(OopClosure* f) {
 313   assert(_saved_index == _index,
 314          err_msg("saved index: %d index: %d", _saved_index, _index));
 315   for (int i = 0; i < _index; i += 1) {
 316     f->do_oop(&_base[i]);
 317   }
 318 }
 319 
 320 bool ConcurrentMark::not_yet_marked(oop obj) const {
 321   return (_g1h->is_obj_ill(obj)
 322           || (_g1h->is_in_permanent(obj)
 323               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 324 }
 325 
 326 CMRootRegions::CMRootRegions() :
 327   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 328   _should_abort(false),  _next_survivor(NULL) { }
 329 
 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 331   _young_list = g1h->young_list();
 332   _cm = cm;
 333 }
 334 
 335 void CMRootRegions::prepare_for_scan() {
 336   assert(!scan_in_progress(), "pre-condition");
 337 
 338   // Currently, only survivors can be root regions.
 339   assert(_next_survivor == NULL, "pre-condition");
 340   _next_survivor = _young_list->first_survivor_region();
 341   _scan_in_progress = (_next_survivor != NULL);
 342   _should_abort = false;
 343 }
 344 
 345 HeapRegion* CMRootRegions::claim_next() {
 346   if (_should_abort) {
 347     // If someone has set the should_abort flag, we return NULL to
 348     // force the caller to bail out of their loop.
 349     return NULL;
 350   }
 351 
 352   // Currently, only survivors can be root regions.
 353   HeapRegion* res = _next_survivor;
 354   if (res != NULL) {
 355     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 356     // Read it again in case it changed while we were waiting for the lock.
 357     res = _next_survivor;
 358     if (res != NULL) {
 359       if (res == _young_list->last_survivor_region()) {
 360         // We just claimed the last survivor so store NULL to indicate
 361         // that we're done.
 362         _next_survivor = NULL;
 363       } else {
 364         _next_survivor = res->get_next_young_region();
 365       }
 366     } else {
 367       // Someone else claimed the last survivor while we were trying
 368       // to take the lock so nothing else to do.
 369     }
 370   }
 371   assert(res == NULL || res->is_survivor(), "post-condition");
 372 
 373   return res;
 374 }
 375 
 376 void CMRootRegions::scan_finished() {
 377   assert(scan_in_progress(), "pre-condition");
 378 
 379   // Currently, only survivors can be root regions.
 380   if (!_should_abort) {
 381     assert(_next_survivor == NULL, "we should have claimed all survivors");
 382   }
 383   _next_survivor = NULL;
 384 
 385   {
 386     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 387     _scan_in_progress = false;
 388     RootRegionScan_lock->notify_all();
 389   }
 390 }
 391 
 392 bool CMRootRegions::wait_until_scan_finished() {
 393   if (!scan_in_progress()) return false;
 394 
 395   {
 396     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 397     while (scan_in_progress()) {
 398       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 399     }
 400   }
 401   return true;
 402 }
 403 
 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 406 #endif // _MSC_VER
 407 
 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 409   return MAX2((n_par_threads + 2) / 4, 1U);
 410 }
 411 
 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 413   _markBitMap1(rs, MinObjAlignment - 1),
 414   _markBitMap2(rs, MinObjAlignment - 1),
 415 
 416   _parallel_marking_threads(0),
 417   _max_parallel_marking_threads(0),
 418   _sleep_factor(0.0),
 419   _marking_task_overhead(1.0),
 420   _cleanup_sleep_factor(0.0),
 421   _cleanup_task_overhead(1.0),
 422   _cleanup_list("Cleanup List"),
 423   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 424   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 425            CardTableModRefBS::card_shift,
 426            false /* in_resource_area*/),
 427 
 428   _prevMarkBitMap(&_markBitMap1),
 429   _nextMarkBitMap(&_markBitMap2),
 430 
 431   _markStack(this),
 432   // _finger set in set_non_marking_state
 433 
 434   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 435   // _active_tasks set in set_non_marking_state
 436   // _tasks set inside the constructor
 437   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 438   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 439 
 440   _has_overflown(false),
 441   _concurrent(false),
 442   _has_aborted(false),
 443   _restart_for_overflow(false),
 444   _concurrent_marking_in_progress(false),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL) {
 458   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 459   if (verbose_level < no_verbose) {
 460     verbose_level = no_verbose;
 461   }
 462   if (verbose_level > high_verbose) {
 463     verbose_level = high_verbose;
 464   }
 465   _verbose_level = verbose_level;
 466 
 467   if (verbose_low()) {
 468     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 469                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 470   }
 471 
 472   _markStack.allocate(MarkStackSize);
 473 
 474   // Create & start a ConcurrentMark thread.
 475   _cmThread = new ConcurrentMarkThread(this);
 476   assert(cmThread() != NULL, "CM Thread should have been created");
 477   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 478 
 479   _g1h = G1CollectedHeap::heap();
 480   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 481   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 482   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 483 
 484   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 485   satb_qs.set_buffer_size(G1SATBBufferSize);
 486 
 487   _root_regions.init(_g1h, this);
 488 
 489   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 490   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 491 
 492   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 493   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 494 
 495   BitMap::idx_t card_bm_size = _card_bm.size();
 496 
 497   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 498   _active_tasks = _max_task_num;
 499   for (int i = 0; i < (int) _max_task_num; ++i) {
 500     CMTaskQueue* task_queue = new CMTaskQueue();
 501     task_queue->initialize();
 502     _task_queues->register_queue(i, task_queue);
 503 
 504     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 505     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 506 
 507     _tasks[i] = new CMTask(i, this,
 508                            _count_marked_bytes[i],
 509                            &_count_card_bitmaps[i],
 510                            task_queue, _task_queues);
 511 
 512     _accum_task_vtime[i] = 0.0;
 513   }
 514 
 515   // Calculate the card number for the bottom of the heap. Used
 516   // in biasing indexes into the accounting card bitmaps.
 517   _heap_bottom_card_num =
 518     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 519                                 CardTableModRefBS::card_shift);
 520 
 521   // Clear all the liveness counting data
 522   clear_all_count_data();
 523 
 524   if (ConcGCThreads > ParallelGCThreads) {
 525     vm_exit_during_initialization("Can't have more ConcGCThreads "
 526                                   "than ParallelGCThreads.");
 527   }
 528   if (ParallelGCThreads == 0) {
 529     // if we are not running with any parallel GC threads we will not
 530     // spawn any marking threads either
 531     _parallel_marking_threads =       0;
 532     _max_parallel_marking_threads =   0;
 533     _sleep_factor             =     0.0;
 534     _marking_task_overhead    =     1.0;
 535   } else {
 536     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 537       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 538       // if both are set
 539       _sleep_factor             = 0.0;
 540       _marking_task_overhead    = 1.0;
 541     } else if (G1MarkingOverheadPercent > 0) {
 542       // We will calculate the number of parallel marking threads based
 543       // on a target overhead with respect to the soft real-time goal
 544       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 545       double overall_cm_overhead =
 546         (double) MaxGCPauseMillis * marking_overhead /
 547         (double) GCPauseIntervalMillis;
 548       double cpu_ratio = 1.0 / (double) os::processor_count();
 549       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 550       double marking_task_overhead =
 551         overall_cm_overhead / marking_thread_num *
 552                                                 (double) os::processor_count();
 553       double sleep_factor =
 554                          (1.0 - marking_task_overhead) / marking_task_overhead;
 555 
 556       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 557       _sleep_factor             = sleep_factor;
 558       _marking_task_overhead    = marking_task_overhead;
 559     } else {
 560       // Calculate the number of parallel marking threads by scaling
 561       // the number of parallel GC threads.
 562       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 563       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 564       _sleep_factor             = 0.0;
 565       _marking_task_overhead    = 1.0;
 566     }
 567 
 568     assert(ConcGCThreads > 0, "Should have been set");
 569     _parallel_marking_threads = (uint) ConcGCThreads;
 570     _max_parallel_marking_threads = _parallel_marking_threads;
 571 
 572     if (parallel_marking_threads() > 1) {
 573       _cleanup_task_overhead = 1.0;
 574     } else {
 575       _cleanup_task_overhead = marking_task_overhead();
 576     }
 577     _cleanup_sleep_factor =
 578                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 579 
 580 #if 0
 581     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 582     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 583     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 584     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 585     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 586 #endif
 587 
 588     guarantee(parallel_marking_threads() > 0, "peace of mind");
 589     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 590          _max_parallel_marking_threads, false, true);
 591     if (_parallel_workers == NULL) {
 592       vm_exit_during_initialization("Failed necessary allocation.");
 593     } else {
 594       _parallel_workers->initialize_workers();
 595     }
 596   }
 597 
 598   // so that the call below can read a sensible value
 599   _heap_start = (HeapWord*) rs.base();
 600   set_non_marking_state();
 601 }
 602 
 603 void ConcurrentMark::update_g1_committed(bool force) {
 604   // If concurrent marking is not in progress, then we do not need to
 605   // update _heap_end.
 606   if (!concurrent_marking_in_progress() && !force) return;
 607 
 608   MemRegion committed = _g1h->g1_committed();
 609   assert(committed.start() == _heap_start, "start shouldn't change");
 610   HeapWord* new_end = committed.end();
 611   if (new_end > _heap_end) {
 612     // The heap has been expanded.
 613 
 614     _heap_end = new_end;
 615   }
 616   // Notice that the heap can also shrink. However, this only happens
 617   // during a Full GC (at least currently) and the entire marking
 618   // phase will bail out and the task will not be restarted. So, let's
 619   // do nothing.
 620 }
 621 
 622 void ConcurrentMark::reset() {
 623   // Starting values for these two. This should be called in a STW
 624   // phase. CM will be notified of any future g1_committed expansions
 625   // will be at the end of evacuation pauses, when tasks are
 626   // inactive.
 627   MemRegion committed = _g1h->g1_committed();
 628   _heap_start = committed.start();
 629   _heap_end   = committed.end();
 630 
 631   // Separated the asserts so that we know which one fires.
 632   assert(_heap_start != NULL, "heap bounds should look ok");
 633   assert(_heap_end != NULL, "heap bounds should look ok");
 634   assert(_heap_start < _heap_end, "heap bounds should look ok");
 635 
 636   // Reset all the marking data structures and any necessary flags
 637   reset_marking_state();
 638 
 639   if (verbose_low()) {
 640     gclog_or_tty->print_cr("[global] resetting");
 641   }
 642 
 643   // We do reset all of them, since different phases will use
 644   // different number of active threads. So, it's easiest to have all
 645   // of them ready.
 646   for (int i = 0; i < (int) _max_task_num; ++i) {
 647     _tasks[i]->reset(_nextMarkBitMap);
 648   }
 649 
 650   // we need this to make sure that the flag is on during the evac
 651   // pause with initial mark piggy-backed
 652   set_concurrent_marking_in_progress();
 653 }
 654 
 655 
 656 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 657   _markStack.setEmpty();
 658   _markStack.clear_overflow();
 659   if (clear_overflow) {
 660     clear_has_overflown();
 661   } else {
 662     assert(has_overflown(), "pre-condition");
 663   }
 664   _finger = _heap_start;
 665 
 666   for (uint i = 0; i < _max_task_num; ++i) {
 667     CMTaskQueue* queue = _task_queues->queue(i);
 668     queue->set_empty();
 669   }
 670 }
 671 
 672 void ConcurrentMark::set_concurrency(uint active_tasks) {
 673   assert(active_tasks <= _max_task_num, "we should not have more");
 674 
 675   _active_tasks = active_tasks;
 676   // Need to update the three data structures below according to the
 677   // number of active threads for this phase.
 678   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 679   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 680   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 681 }
 682 
 683 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 684   set_concurrency(active_tasks);
 685 
 686   _concurrent = concurrent;
 687   // We propagate this to all tasks, not just the active ones.
 688   for (int i = 0; i < (int) _max_task_num; ++i)
 689     _tasks[i]->set_concurrent(concurrent);
 690 
 691   if (concurrent) {
 692     set_concurrent_marking_in_progress();
 693   } else {
 694     // We currently assume that the concurrent flag has been set to
 695     // false before we start remark. At this point we should also be
 696     // in a STW phase.
 697     assert(!concurrent_marking_in_progress(), "invariant");
 698     assert(_finger == _heap_end,
 699            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 700                    _finger, _heap_end));
 701     update_g1_committed(true);
 702   }
 703 }
 704 
 705 void ConcurrentMark::set_non_marking_state() {
 706   // We set the global marking state to some default values when we're
 707   // not doing marking.
 708   reset_marking_state();
 709   _active_tasks = 0;
 710   clear_concurrent_marking_in_progress();
 711 }
 712 
 713 ConcurrentMark::~ConcurrentMark() {
 714   // The ConcurrentMark instance is never freed.
 715   ShouldNotReachHere();
 716 }
 717 
 718 void ConcurrentMark::clearNextBitmap() {
 719   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 720   G1CollectorPolicy* g1p = g1h->g1_policy();
 721 
 722   // Make sure that the concurrent mark thread looks to still be in
 723   // the current cycle.
 724   guarantee(cmThread()->during_cycle(), "invariant");
 725 
 726   // We are finishing up the current cycle by clearing the next
 727   // marking bitmap and getting it ready for the next cycle. During
 728   // this time no other cycle can start. So, let's make sure that this
 729   // is the case.
 730   guarantee(!g1h->mark_in_progress(), "invariant");
 731 
 732   // clear the mark bitmap (no grey objects to start with).
 733   // We need to do this in chunks and offer to yield in between
 734   // each chunk.
 735   HeapWord* start  = _nextMarkBitMap->startWord();
 736   HeapWord* end    = _nextMarkBitMap->endWord();
 737   HeapWord* cur    = start;
 738   size_t chunkSize = M;
 739   while (cur < end) {
 740     HeapWord* next = cur + chunkSize;
 741     if (next > end) {
 742       next = end;
 743     }
 744     MemRegion mr(cur,next);
 745     _nextMarkBitMap->clearRange(mr);
 746     cur = next;
 747     do_yield_check();
 748 
 749     // Repeat the asserts from above. We'll do them as asserts here to
 750     // minimize their overhead on the product. However, we'll have
 751     // them as guarantees at the beginning / end of the bitmap
 752     // clearing to get some checking in the product.
 753     assert(cmThread()->during_cycle(), "invariant");
 754     assert(!g1h->mark_in_progress(), "invariant");
 755   }
 756 
 757   // Clear the liveness counting data
 758   clear_all_count_data();
 759 
 760   // Repeat the asserts from above.
 761   guarantee(cmThread()->during_cycle(), "invariant");
 762   guarantee(!g1h->mark_in_progress(), "invariant");
 763 }
 764 
 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 766 public:
 767   bool doHeapRegion(HeapRegion* r) {
 768     if (!r->continuesHumongous()) {
 769       r->note_start_of_marking();
 770     }
 771     return false;
 772   }
 773 };
 774 
 775 void ConcurrentMark::checkpointRootsInitialPre() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777   G1CollectorPolicy* g1p = g1h->g1_policy();
 778 
 779   _has_aborted = false;
 780 
 781 #ifndef PRODUCT
 782   if (G1PrintReachableAtInitialMark) {
 783     print_reachable("at-cycle-start",
 784                     VerifyOption_G1UsePrevMarking, true /* all */);
 785   }
 786 #endif
 787 
 788   // Initialise marking structures. This has to be done in a STW phase.
 789   reset();
 790 
 791   // For each region note start of marking.
 792   NoteStartOfMarkHRClosure startcl;
 793   g1h->heap_region_iterate(&startcl);
 794 }
 795 
 796 
 797 void ConcurrentMark::checkpointRootsInitialPost() {
 798   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 799 
 800   // If we force an overflow during remark, the remark operation will
 801   // actually abort and we'll restart concurrent marking. If we always
 802   // force an oveflow during remark we'll never actually complete the
 803   // marking phase. So, we initilize this here, at the start of the
 804   // cycle, so that at the remaining overflow number will decrease at
 805   // every remark and we'll eventually not need to cause one.
 806   force_overflow_stw()->init();
 807 
 808   // Start Concurrent Marking weak-reference discovery.
 809   ReferenceProcessor* rp = g1h->ref_processor_cm();
 810   // enable ("weak") refs discovery
 811   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 812   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 813 
 814   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 815   // This is the start of  the marking cycle, we're expected all
 816   // threads to have SATB queues with active set to false.
 817   satb_mq_set.set_active_all_threads(true, /* new active value */
 818                                      false /* expected_active */);
 819 
 820   _root_regions.prepare_for_scan();
 821 
 822   // update_g1_committed() will be called at the end of an evac pause
 823   // when marking is on. So, it's also called at the end of the
 824   // initial-mark pause to update the heap end, if the heap expands
 825   // during it. No need to call it here.
 826 }
 827 
 828 /*
 829  * Notice that in the next two methods, we actually leave the STS
 830  * during the barrier sync and join it immediately afterwards. If we
 831  * do not do this, the following deadlock can occur: one thread could
 832  * be in the barrier sync code, waiting for the other thread to also
 833  * sync up, whereas another one could be trying to yield, while also
 834  * waiting for the other threads to sync up too.
 835  *
 836  * Note, however, that this code is also used during remark and in
 837  * this case we should not attempt to leave / enter the STS, otherwise
 838  * we'll either hit an asseert (debug / fastdebug) or deadlock
 839  * (product). So we should only leave / enter the STS if we are
 840  * operating concurrently.
 841  *
 842  * Because the thread that does the sync barrier has left the STS, it
 843  * is possible to be suspended for a Full GC or an evacuation pause
 844  * could occur. This is actually safe, since the entering the sync
 845  * barrier is one of the last things do_marking_step() does, and it
 846  * doesn't manipulate any data structures afterwards.
 847  */
 848 
 849 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 850   if (verbose_low()) {
 851     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 852   }
 853 
 854   if (concurrent()) {
 855     ConcurrentGCThread::stsLeave();
 856   }
 857   _first_overflow_barrier_sync.enter();
 858   if (concurrent()) {
 859     ConcurrentGCThread::stsJoin();
 860   }
 861   // at this point everyone should have synced up and not be doing any
 862   // more work
 863 
 864   if (verbose_low()) {
 865     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 866   }
 867 
 868   // If we're executing the concurrent phase of marking, reset the marking
 869   // state; otherwise the marking state is reset after reference processing,
 870   // during the remark pause.
 871   // If we reset here as a result of an overflow during the remark we will
 872   // see assertion failures from any subsequent set_concurrency_and_phase()
 873   // calls.
 874   if (concurrent()) {
 875     // let the task 0 do this
 876     if (task_num == 0) {
 877       // task 0 is responsible for clearing the global data structures
 878       // We should be here because of an overflow. During STW we should
 879       // not clear the overflow flag since we rely on it being true when
 880       // we exit this method to abort the pause and restart concurent
 881       // marking.
 882       reset_marking_state(true /* clear_overflow */);
 883       force_overflow()->update();
 884 
 885       if (G1Log::fine()) {
 886         gclog_or_tty->date_stamp(PrintGCDateStamps);
 887         gclog_or_tty->stamp(PrintGCTimeStamps);
 888         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 889       }
 890     }
 891   }
 892 
 893   // after this, each task should reset its own data structures then
 894   // then go into the second barrier
 895 }
 896 
 897 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 898   if (verbose_low()) {
 899     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 900   }
 901 
 902   if (concurrent()) {
 903     ConcurrentGCThread::stsLeave();
 904   }
 905   _second_overflow_barrier_sync.enter();
 906   if (concurrent()) {
 907     ConcurrentGCThread::stsJoin();
 908   }
 909   // at this point everything should be re-initialized and ready to go
 910 
 911   if (verbose_low()) {
 912     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 913   }
 914 }
 915 
 916 #ifndef PRODUCT
 917 void ForceOverflowSettings::init() {
 918   _num_remaining = G1ConcMarkForceOverflow;
 919   _force = false;
 920   update();
 921 }
 922 
 923 void ForceOverflowSettings::update() {
 924   if (_num_remaining > 0) {
 925     _num_remaining -= 1;
 926     _force = true;
 927   } else {
 928     _force = false;
 929   }
 930 }
 931 
 932 bool ForceOverflowSettings::should_force() {
 933   if (_force) {
 934     _force = false;
 935     return true;
 936   } else {
 937     return false;
 938   }
 939 }
 940 #endif // !PRODUCT
 941 
 942 class CMConcurrentMarkingTask: public AbstractGangTask {
 943 private:
 944   ConcurrentMark*       _cm;
 945   ConcurrentMarkThread* _cmt;
 946 
 947 public:
 948   void work(uint worker_id) {
 949     assert(Thread::current()->is_ConcurrentGC_thread(),
 950            "this should only be done by a conc GC thread");
 951     ResourceMark rm;
 952 
 953     double start_vtime = os::elapsedVTime();
 954 
 955     ConcurrentGCThread::stsJoin();
 956 
 957     assert(worker_id < _cm->active_tasks(), "invariant");
 958     CMTask* the_task = _cm->task(worker_id);
 959     the_task->record_start_time();
 960     if (!_cm->has_aborted()) {
 961       do {
 962         double start_vtime_sec = os::elapsedVTime();
 963         double start_time_sec = os::elapsedTime();
 964         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 965 
 966         the_task->do_marking_step(mark_step_duration_ms,
 967                                   true  /* do_termination */,
 968                                   false /* is_serial*/);
 969 
 970         double end_time_sec = os::elapsedTime();
 971         double end_vtime_sec = os::elapsedVTime();
 972         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 973         double elapsed_time_sec = end_time_sec - start_time_sec;
 974         _cm->clear_has_overflown();
 975 
 976         bool ret = _cm->do_yield_check(worker_id);
 977 
 978         jlong sleep_time_ms;
 979         if (!_cm->has_aborted() && the_task->has_aborted()) {
 980           sleep_time_ms =
 981             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 982           ConcurrentGCThread::stsLeave();
 983           os::sleep(Thread::current(), sleep_time_ms, false);
 984           ConcurrentGCThread::stsJoin();
 985         }
 986         double end_time2_sec = os::elapsedTime();
 987         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 988 
 989 #if 0
 990           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 991                                  "overhead %1.4lf",
 992                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 993                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 994           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 995                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 996 #endif
 997       } while (!_cm->has_aborted() && the_task->has_aborted());
 998     }
 999     the_task->record_end_time();
1000     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1001 
1002     ConcurrentGCThread::stsLeave();
1003 
1004     double end_vtime = os::elapsedVTime();
1005     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1006   }
1007 
1008   CMConcurrentMarkingTask(ConcurrentMark* cm,
1009                           ConcurrentMarkThread* cmt) :
1010       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1011 
1012   ~CMConcurrentMarkingTask() { }
1013 };
1014 
1015 // Calculates the number of active workers for a concurrent
1016 // phase.
1017 uint ConcurrentMark::calc_parallel_marking_threads() {
1018   if (G1CollectedHeap::use_parallel_gc_threads()) {
1019     uint n_conc_workers = 0;
1020     if (!UseDynamicNumberOfGCThreads ||
1021         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1022          !ForceDynamicNumberOfGCThreads)) {
1023       n_conc_workers = max_parallel_marking_threads();
1024     } else {
1025       n_conc_workers =
1026         AdaptiveSizePolicy::calc_default_active_workers(
1027                                      max_parallel_marking_threads(),
1028                                      1, /* Minimum workers */
1029                                      parallel_marking_threads(),
1030                                      Threads::number_of_non_daemon_threads());
1031       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1032       // that scaling has already gone into "_max_parallel_marking_threads".
1033     }
1034     assert(n_conc_workers > 0, "Always need at least 1");
1035     return n_conc_workers;
1036   }
1037   // If we are not running with any parallel GC threads we will not
1038   // have spawned any marking threads either. Hence the number of
1039   // concurrent workers should be 0.
1040   return 0;
1041 }
1042 
1043 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1044   // Currently, only survivors can be root regions.
1045   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1046   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1047 
1048   const uintx interval = PrefetchScanIntervalInBytes;
1049   HeapWord* curr = hr->bottom();
1050   const HeapWord* end = hr->top();
1051   while (curr < end) {
1052     Prefetch::read(curr, interval);
1053     oop obj = oop(curr);
1054     int size = obj->oop_iterate(&cl);
1055     assert(size == obj->size(), "sanity");
1056     curr += size;
1057   }
1058 }
1059 
1060 class CMRootRegionScanTask : public AbstractGangTask {
1061 private:
1062   ConcurrentMark* _cm;
1063 
1064 public:
1065   CMRootRegionScanTask(ConcurrentMark* cm) :
1066     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1067 
1068   void work(uint worker_id) {
1069     assert(Thread::current()->is_ConcurrentGC_thread(),
1070            "this should only be done by a conc GC thread");
1071 
1072     CMRootRegions* root_regions = _cm->root_regions();
1073     HeapRegion* hr = root_regions->claim_next();
1074     while (hr != NULL) {
1075       _cm->scanRootRegion(hr, worker_id);
1076       hr = root_regions->claim_next();
1077     }
1078   }
1079 };
1080 
1081 void ConcurrentMark::scanRootRegions() {
1082   // scan_in_progress() will have been set to true only if there was
1083   // at least one root region to scan. So, if it's false, we
1084   // should not attempt to do any further work.
1085   if (root_regions()->scan_in_progress()) {
1086     _parallel_marking_threads = calc_parallel_marking_threads();
1087     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1088            "Maximum number of marking threads exceeded");
1089     uint active_workers = MAX2(1U, parallel_marking_threads());
1090 
1091     CMRootRegionScanTask task(this);
1092     if (use_parallel_marking_threads()) {
1093       _parallel_workers->set_active_workers((int) active_workers);
1094       _parallel_workers->run_task(&task);
1095     } else {
1096       task.work(0);
1097     }
1098 
1099     // It's possible that has_aborted() is true here without actually
1100     // aborting the survivor scan earlier. This is OK as it's
1101     // mainly used for sanity checking.
1102     root_regions()->scan_finished();
1103   }
1104 }
1105 
1106 void ConcurrentMark::markFromRoots() {
1107   // we might be tempted to assert that:
1108   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1109   //        "inconsistent argument?");
1110   // However that wouldn't be right, because it's possible that
1111   // a safepoint is indeed in progress as a younger generation
1112   // stop-the-world GC happens even as we mark in this generation.
1113 
1114   _restart_for_overflow = false;
1115   force_overflow_conc()->init();
1116 
1117   // _g1h has _n_par_threads
1118   _parallel_marking_threads = calc_parallel_marking_threads();
1119   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1120     "Maximum number of marking threads exceeded");
1121 
1122   uint active_workers = MAX2(1U, parallel_marking_threads());
1123 
1124   // Parallel task terminator is set in "set_concurrency_and_phase()"
1125   set_concurrency_and_phase(active_workers, true /* concurrent */);
1126 
1127   CMConcurrentMarkingTask markingTask(this, cmThread());
1128   if (use_parallel_marking_threads()) {
1129     _parallel_workers->set_active_workers((int)active_workers);
1130     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1131     // and the decisions on that MT processing is made elsewhere.
1132     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1133     _parallel_workers->run_task(&markingTask);
1134   } else {
1135     markingTask.work(0);
1136   }
1137   print_stats();
1138 }
1139 
1140 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1141   // world is stopped at this checkpoint
1142   assert(SafepointSynchronize::is_at_safepoint(),
1143          "world should be stopped");
1144 
1145   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1146 
1147   // If a full collection has happened, we shouldn't do this.
1148   if (has_aborted()) {
1149     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1150     return;
1151   }
1152 
1153   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1154 
1155   if (VerifyDuringGC) {
1156     HandleMark hm;  // handle scope
1157     gclog_or_tty->print(" VerifyDuringGC:(before)");
1158     Universe::heap()->prepare_for_verify();
1159     Universe::verify(/* silent */ false,
1160                      /* option */ VerifyOption_G1UsePrevMarking);
1161   }
1162 
1163   G1CollectorPolicy* g1p = g1h->g1_policy();
1164   g1p->record_concurrent_mark_remark_start();
1165 
1166   double start = os::elapsedTime();
1167 
1168   checkpointRootsFinalWork();
1169 
1170   double mark_work_end = os::elapsedTime();
1171 
1172   weakRefsWork(clear_all_soft_refs);
1173 
1174   if (has_overflown()) {
1175     // Oops.  We overflowed.  Restart concurrent marking.
1176     _restart_for_overflow = true;
1177     if (G1TraceMarkStackOverflow) {
1178       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1179     }
1180 
1181     // Verify the heap w.r.t. the previous marking bitmap.
1182     if (VerifyDuringGC) {
1183       HandleMark hm;  // handle scope
1184       gclog_or_tty->print(" VerifyDuringGC:(overflow)");
1185       Universe::heap()->prepare_for_verify();
1186       Universe::verify(/* silent */ false,
1187                        /* option */ VerifyOption_G1UsePrevMarking);
1188     }
1189 
1190     // Clear the marking state because we will be restarting
1191     // marking due to overflowing the global mark stack.
1192     reset_marking_state();
1193   } else {
1194     // Aggregate the per-task counting data that we have accumulated
1195     // while marking.
1196     aggregate_count_data();
1197 
1198     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1199     // We're done with marking.
1200     // This is the end of  the marking cycle, we're expected all
1201     // threads to have SATB queues with active set to true.
1202     satb_mq_set.set_active_all_threads(false, /* new active value */
1203                                        true /* expected_active */);
1204 
1205     if (VerifyDuringGC) {
1206       HandleMark hm;  // handle scope
1207       gclog_or_tty->print(" VerifyDuringGC:(after)");
1208       Universe::heap()->prepare_for_verify();
1209       Universe::verify(/* silent */ false,
1210                        /* option */ VerifyOption_G1UseNextMarking);
1211     }
1212     assert(!restart_for_overflow(), "sanity");
1213     // Completely reset the marking state since marking completed
1214     set_non_marking_state();
1215   }
1216 
1217 #if VERIFY_OBJS_PROCESSED
1218   _scan_obj_cl.objs_processed = 0;
1219   ThreadLocalObjQueue::objs_enqueued = 0;
1220 #endif
1221 
1222   // Statistics
1223   double now = os::elapsedTime();
1224   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1225   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1226   _remark_times.add((now - start) * 1000.0);
1227 
1228   g1p->record_concurrent_mark_remark_end();
1229 
1230   G1CMIsAliveClosure is_alive(g1h);
1231   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1232 }
1233 
1234 // Base class of the closures that finalize and verify the
1235 // liveness counting data.
1236 class CMCountDataClosureBase: public HeapRegionClosure {
1237 protected:
1238   G1CollectedHeap* _g1h;
1239   ConcurrentMark* _cm;
1240   CardTableModRefBS* _ct_bs;
1241 
1242   BitMap* _region_bm;
1243   BitMap* _card_bm;
1244 
1245   // Takes a region that's not empty (i.e., it has at least one
1246   // live object in it and sets its corresponding bit on the region
1247   // bitmap to 1. If the region is "starts humongous" it will also set
1248   // to 1 the bits on the region bitmap that correspond to its
1249   // associated "continues humongous" regions.
1250   void set_bit_for_region(HeapRegion* hr) {
1251     assert(!hr->continuesHumongous(), "should have filtered those out");
1252 
1253     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1254     if (!hr->startsHumongous()) {
1255       // Normal (non-humongous) case: just set the bit.
1256       _region_bm->par_at_put(index, true);
1257     } else {
1258       // Starts humongous case: calculate how many regions are part of
1259       // this humongous region and then set the bit range.
1260       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1261       _region_bm->par_at_put_range(index, end_index, true);
1262     }
1263   }
1264 
1265 public:
1266   CMCountDataClosureBase(G1CollectedHeap* g1h,
1267                          BitMap* region_bm, BitMap* card_bm):
1268     _g1h(g1h), _cm(g1h->concurrent_mark()),
1269     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1270     _region_bm(region_bm), _card_bm(card_bm) { }
1271 };
1272 
1273 // Closure that calculates the # live objects per region. Used
1274 // for verification purposes during the cleanup pause.
1275 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1276   CMBitMapRO* _bm;
1277   size_t _region_marked_bytes;
1278 
1279 public:
1280   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1281                          BitMap* region_bm, BitMap* card_bm) :
1282     CMCountDataClosureBase(g1h, region_bm, card_bm),
1283     _bm(bm), _region_marked_bytes(0) { }
1284 
1285   bool doHeapRegion(HeapRegion* hr) {
1286 
1287     if (hr->continuesHumongous()) {
1288       // We will ignore these here and process them when their
1289       // associated "starts humongous" region is processed (see
1290       // set_bit_for_heap_region()). Note that we cannot rely on their
1291       // associated "starts humongous" region to have their bit set to
1292       // 1 since, due to the region chunking in the parallel region
1293       // iteration, a "continues humongous" region might be visited
1294       // before its associated "starts humongous".
1295       return false;
1296     }
1297 
1298     HeapWord* ntams = hr->next_top_at_mark_start();
1299     HeapWord* start = hr->bottom();
1300 
1301     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1302            err_msg("Preconditions not met - "
1303                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1304                    start, ntams, hr->end()));
1305 
1306     // Find the first marked object at or after "start".
1307     start = _bm->getNextMarkedWordAddress(start, ntams);
1308 
1309     size_t marked_bytes = 0;
1310 
1311     while (start < ntams) {
1312       oop obj = oop(start);
1313       int obj_sz = obj->size();
1314       HeapWord* obj_end = start + obj_sz;
1315 
1316       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1317       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1318 
1319       // Note: if we're looking at the last region in heap - obj_end
1320       // could be actually just beyond the end of the heap; end_idx
1321       // will then correspond to a (non-existent) card that is also
1322       // just beyond the heap.
1323       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1324         // end of object is not card aligned - increment to cover
1325         // all the cards spanned by the object
1326         end_idx += 1;
1327       }
1328 
1329       // Set the bits in the card BM for the cards spanned by this object.
1330       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1331 
1332       // Add the size of this object to the number of marked bytes.
1333       marked_bytes += (size_t)obj_sz * HeapWordSize;
1334 
1335       // Find the next marked object after this one.
1336       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1337     }
1338 
1339     // Mark the allocated-since-marking portion...
1340     HeapWord* top = hr->top();
1341     if (ntams < top) {
1342       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1343       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1344 
1345       // Note: if we're looking at the last region in heap - top
1346       // could be actually just beyond the end of the heap; end_idx
1347       // will then correspond to a (non-existent) card that is also
1348       // just beyond the heap.
1349       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1350         // end of object is not card aligned - increment to cover
1351         // all the cards spanned by the object
1352         end_idx += 1;
1353       }
1354       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1355 
1356       // This definitely means the region has live objects.
1357       set_bit_for_region(hr);
1358     }
1359 
1360     // Update the live region bitmap.
1361     if (marked_bytes > 0) {
1362       set_bit_for_region(hr);
1363     }
1364 
1365     // Set the marked bytes for the current region so that
1366     // it can be queried by a calling verificiation routine
1367     _region_marked_bytes = marked_bytes;
1368 
1369     return false;
1370   }
1371 
1372   size_t region_marked_bytes() const { return _region_marked_bytes; }
1373 };
1374 
1375 // Heap region closure used for verifying the counting data
1376 // that was accumulated concurrently and aggregated during
1377 // the remark pause. This closure is applied to the heap
1378 // regions during the STW cleanup pause.
1379 
1380 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1381   G1CollectedHeap* _g1h;
1382   ConcurrentMark* _cm;
1383   CalcLiveObjectsClosure _calc_cl;
1384   BitMap* _region_bm;   // Region BM to be verified
1385   BitMap* _card_bm;     // Card BM to be verified
1386   bool _verbose;        // verbose output?
1387 
1388   BitMap* _exp_region_bm; // Expected Region BM values
1389   BitMap* _exp_card_bm;   // Expected card BM values
1390 
1391   int _failures;
1392 
1393 public:
1394   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1395                                 BitMap* region_bm,
1396                                 BitMap* card_bm,
1397                                 BitMap* exp_region_bm,
1398                                 BitMap* exp_card_bm,
1399                                 bool verbose) :
1400     _g1h(g1h), _cm(g1h->concurrent_mark()),
1401     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1402     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1403     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1404     _failures(0) { }
1405 
1406   int failures() const { return _failures; }
1407 
1408   bool doHeapRegion(HeapRegion* hr) {
1409     if (hr->continuesHumongous()) {
1410       // We will ignore these here and process them when their
1411       // associated "starts humongous" region is processed (see
1412       // set_bit_for_heap_region()). Note that we cannot rely on their
1413       // associated "starts humongous" region to have their bit set to
1414       // 1 since, due to the region chunking in the parallel region
1415       // iteration, a "continues humongous" region might be visited
1416       // before its associated "starts humongous".
1417       return false;
1418     }
1419 
1420     int failures = 0;
1421 
1422     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1423     // this region and set the corresponding bits in the expected region
1424     // and card bitmaps.
1425     bool res = _calc_cl.doHeapRegion(hr);
1426     assert(res == false, "should be continuing");
1427 
1428     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1429                     Mutex::_no_safepoint_check_flag);
1430 
1431     // Verify the marked bytes for this region.
1432     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1433     size_t act_marked_bytes = hr->next_marked_bytes();
1434 
1435     // We're not OK if expected marked bytes > actual marked bytes. It means
1436     // we have missed accounting some objects during the actual marking.
1437     if (exp_marked_bytes > act_marked_bytes) {
1438       if (_verbose) {
1439         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1440                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1441                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1442       }
1443       failures += 1;
1444     }
1445 
1446     // Verify the bit, for this region, in the actual and expected
1447     // (which was just calculated) region bit maps.
1448     // We're not OK if the bit in the calculated expected region
1449     // bitmap is set and the bit in the actual region bitmap is not.
1450     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1451 
1452     bool expected = _exp_region_bm->at(index);
1453     bool actual = _region_bm->at(index);
1454     if (expected && !actual) {
1455       if (_verbose) {
1456         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1457                                "expected: %s, actual: %s",
1458                                hr->hrs_index(),
1459                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1460       }
1461       failures += 1;
1462     }
1463 
1464     // Verify that the card bit maps for the cards spanned by the current
1465     // region match. We have an error if we have a set bit in the expected
1466     // bit map and the corresponding bit in the actual bitmap is not set.
1467 
1468     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1469     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1470 
1471     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1472       expected = _exp_card_bm->at(i);
1473       actual = _card_bm->at(i);
1474 
1475       if (expected && !actual) {
1476         if (_verbose) {
1477           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1478                                  "expected: %s, actual: %s",
1479                                  hr->hrs_index(), i,
1480                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1481         }
1482         failures += 1;
1483       }
1484     }
1485 
1486     if (failures > 0 && _verbose)  {
1487       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1488                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1489                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1490                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1491     }
1492 
1493     _failures += failures;
1494 
1495     // We could stop iteration over the heap when we
1496     // find the first violating region by returning true.
1497     return false;
1498   }
1499 };
1500 
1501 
1502 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1503 protected:
1504   G1CollectedHeap* _g1h;
1505   ConcurrentMark* _cm;
1506   BitMap* _actual_region_bm;
1507   BitMap* _actual_card_bm;
1508 
1509   uint    _n_workers;
1510 
1511   BitMap* _expected_region_bm;
1512   BitMap* _expected_card_bm;
1513 
1514   int  _failures;
1515   bool _verbose;
1516 
1517 public:
1518   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1519                             BitMap* region_bm, BitMap* card_bm,
1520                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1521     : AbstractGangTask("G1 verify final counting"),
1522       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1523       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1524       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1525       _failures(0), _verbose(false),
1526       _n_workers(0) {
1527     assert(VerifyDuringGC, "don't call this otherwise");
1528 
1529     // Use the value already set as the number of active threads
1530     // in the call to run_task().
1531     if (G1CollectedHeap::use_parallel_gc_threads()) {
1532       assert( _g1h->workers()->active_workers() > 0,
1533         "Should have been previously set");
1534       _n_workers = _g1h->workers()->active_workers();
1535     } else {
1536       _n_workers = 1;
1537     }
1538 
1539     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1540     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1541 
1542     _verbose = _cm->verbose_medium();
1543   }
1544 
1545   void work(uint worker_id) {
1546     assert(worker_id < _n_workers, "invariant");
1547 
1548     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1549                                             _actual_region_bm, _actual_card_bm,
1550                                             _expected_region_bm,
1551                                             _expected_card_bm,
1552                                             _verbose);
1553 
1554     if (G1CollectedHeap::use_parallel_gc_threads()) {
1555       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1556                                             worker_id,
1557                                             _n_workers,
1558                                             HeapRegion::VerifyCountClaimValue);
1559     } else {
1560       _g1h->heap_region_iterate(&verify_cl);
1561     }
1562 
1563     Atomic::add(verify_cl.failures(), &_failures);
1564   }
1565 
1566   int failures() const { return _failures; }
1567 };
1568 
1569 // Closure that finalizes the liveness counting data.
1570 // Used during the cleanup pause.
1571 // Sets the bits corresponding to the interval [NTAMS, top]
1572 // (which contains the implicitly live objects) in the
1573 // card liveness bitmap. Also sets the bit for each region,
1574 // containing live data, in the region liveness bitmap.
1575 
1576 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1577  public:
1578   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1579                               BitMap* region_bm,
1580                               BitMap* card_bm) :
1581     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1582 
1583   bool doHeapRegion(HeapRegion* hr) {
1584 
1585     if (hr->continuesHumongous()) {
1586       // We will ignore these here and process them when their
1587       // associated "starts humongous" region is processed (see
1588       // set_bit_for_heap_region()). Note that we cannot rely on their
1589       // associated "starts humongous" region to have their bit set to
1590       // 1 since, due to the region chunking in the parallel region
1591       // iteration, a "continues humongous" region might be visited
1592       // before its associated "starts humongous".
1593       return false;
1594     }
1595 
1596     HeapWord* ntams = hr->next_top_at_mark_start();
1597     HeapWord* top   = hr->top();
1598 
1599     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1600 
1601     // Mark the allocated-since-marking portion...
1602     if (ntams < top) {
1603       // This definitely means the region has live objects.
1604       set_bit_for_region(hr);
1605 
1606       // Now set the bits in the card bitmap for [ntams, top)
1607       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1608       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1609 
1610       // Note: if we're looking at the last region in heap - top
1611       // could be actually just beyond the end of the heap; end_idx
1612       // will then correspond to a (non-existent) card that is also
1613       // just beyond the heap.
1614       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1615         // end of object is not card aligned - increment to cover
1616         // all the cards spanned by the object
1617         end_idx += 1;
1618       }
1619 
1620       assert(end_idx <= _card_bm->size(),
1621              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1622                      end_idx, _card_bm->size()));
1623       assert(start_idx < _card_bm->size(),
1624              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1625                      start_idx, _card_bm->size()));
1626 
1627       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1628      }
1629 
1630     // Set the bit for the region if it contains live data
1631     if (hr->next_marked_bytes() > 0) {
1632       set_bit_for_region(hr);
1633     }
1634 
1635     return false;
1636   }
1637 };
1638 
1639 class G1ParFinalCountTask: public AbstractGangTask {
1640 protected:
1641   G1CollectedHeap* _g1h;
1642   ConcurrentMark* _cm;
1643   BitMap* _actual_region_bm;
1644   BitMap* _actual_card_bm;
1645 
1646   uint    _n_workers;
1647 
1648 public:
1649   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1650     : AbstractGangTask("G1 final counting"),
1651       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1652       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1653       _n_workers(0) {
1654     // Use the value already set as the number of active threads
1655     // in the call to run_task().
1656     if (G1CollectedHeap::use_parallel_gc_threads()) {
1657       assert( _g1h->workers()->active_workers() > 0,
1658         "Should have been previously set");
1659       _n_workers = _g1h->workers()->active_workers();
1660     } else {
1661       _n_workers = 1;
1662     }
1663   }
1664 
1665   void work(uint worker_id) {
1666     assert(worker_id < _n_workers, "invariant");
1667 
1668     FinalCountDataUpdateClosure final_update_cl(_g1h,
1669                                                 _actual_region_bm,
1670                                                 _actual_card_bm);
1671 
1672     if (G1CollectedHeap::use_parallel_gc_threads()) {
1673       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1674                                             worker_id,
1675                                             _n_workers,
1676                                             HeapRegion::FinalCountClaimValue);
1677     } else {
1678       _g1h->heap_region_iterate(&final_update_cl);
1679     }
1680   }
1681 };
1682 
1683 class G1ParNoteEndTask;
1684 
1685 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1686   G1CollectedHeap* _g1;
1687   int _worker_num;
1688   size_t _max_live_bytes;
1689   uint _regions_claimed;
1690   size_t _freed_bytes;
1691   FreeRegionList* _local_cleanup_list;
1692   OldRegionSet* _old_proxy_set;
1693   HumongousRegionSet* _humongous_proxy_set;
1694   HRRSCleanupTask* _hrrs_cleanup_task;
1695   double _claimed_region_time;
1696   double _max_region_time;
1697 
1698 public:
1699   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1700                              int worker_num,
1701                              FreeRegionList* local_cleanup_list,
1702                              OldRegionSet* old_proxy_set,
1703                              HumongousRegionSet* humongous_proxy_set,
1704                              HRRSCleanupTask* hrrs_cleanup_task) :
1705     _g1(g1), _worker_num(worker_num),
1706     _max_live_bytes(0), _regions_claimed(0),
1707     _freed_bytes(0),
1708     _claimed_region_time(0.0), _max_region_time(0.0),
1709     _local_cleanup_list(local_cleanup_list),
1710     _old_proxy_set(old_proxy_set),
1711     _humongous_proxy_set(humongous_proxy_set),
1712     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1713 
1714   size_t freed_bytes() { return _freed_bytes; }
1715 
1716   bool doHeapRegion(HeapRegion *hr) {
1717     if (hr->continuesHumongous()) {
1718       return false;
1719     }
1720     // We use a claim value of zero here because all regions
1721     // were claimed with value 1 in the FinalCount task.
1722     _g1->reset_gc_time_stamps(hr);
1723     double start = os::elapsedTime();
1724     _regions_claimed++;
1725     hr->note_end_of_marking();
1726     _max_live_bytes += hr->max_live_bytes();
1727     _g1->free_region_if_empty(hr,
1728                               &_freed_bytes,
1729                               _local_cleanup_list,
1730                               _old_proxy_set,
1731                               _humongous_proxy_set,
1732                               _hrrs_cleanup_task,
1733                               true /* par */);
1734     double region_time = (os::elapsedTime() - start);
1735     _claimed_region_time += region_time;
1736     if (region_time > _max_region_time) {
1737       _max_region_time = region_time;
1738     }
1739     return false;
1740   }
1741 
1742   size_t max_live_bytes() { return _max_live_bytes; }
1743   uint regions_claimed() { return _regions_claimed; }
1744   double claimed_region_time_sec() { return _claimed_region_time; }
1745   double max_region_time_sec() { return _max_region_time; }
1746 };
1747 
1748 class G1ParNoteEndTask: public AbstractGangTask {
1749   friend class G1NoteEndOfConcMarkClosure;
1750 
1751 protected:
1752   G1CollectedHeap* _g1h;
1753   size_t _max_live_bytes;
1754   size_t _freed_bytes;
1755   FreeRegionList* _cleanup_list;
1756 
1757 public:
1758   G1ParNoteEndTask(G1CollectedHeap* g1h,
1759                    FreeRegionList* cleanup_list) :
1760     AbstractGangTask("G1 note end"), _g1h(g1h),
1761     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1762 
1763   void work(uint worker_id) {
1764     double start = os::elapsedTime();
1765     FreeRegionList local_cleanup_list("Local Cleanup List");
1766     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1767     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1768     HRRSCleanupTask hrrs_cleanup_task;
1769     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1770                                            &old_proxy_set,
1771                                            &humongous_proxy_set,
1772                                            &hrrs_cleanup_task);
1773     if (G1CollectedHeap::use_parallel_gc_threads()) {
1774       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1775                                             _g1h->workers()->active_workers(),
1776                                             HeapRegion::NoteEndClaimValue);
1777     } else {
1778       _g1h->heap_region_iterate(&g1_note_end);
1779     }
1780     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1781 
1782     // Now update the lists
1783     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1784                                             NULL /* free_list */,
1785                                             &old_proxy_set,
1786                                             &humongous_proxy_set,
1787                                             true /* par */);
1788     {
1789       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1790       _max_live_bytes += g1_note_end.max_live_bytes();
1791       _freed_bytes += g1_note_end.freed_bytes();
1792 
1793       // If we iterate over the global cleanup list at the end of
1794       // cleanup to do this printing we will not guarantee to only
1795       // generate output for the newly-reclaimed regions (the list
1796       // might not be empty at the beginning of cleanup; we might
1797       // still be working on its previous contents). So we do the
1798       // printing here, before we append the new regions to the global
1799       // cleanup list.
1800 
1801       G1HRPrinter* hr_printer = _g1h->hr_printer();
1802       if (hr_printer->is_active()) {
1803         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1804         while (iter.more_available()) {
1805           HeapRegion* hr = iter.get_next();
1806           hr_printer->cleanup(hr);
1807         }
1808       }
1809 
1810       _cleanup_list->add_as_tail(&local_cleanup_list);
1811       assert(local_cleanup_list.is_empty(), "post-condition");
1812 
1813       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1814     }
1815   }
1816   size_t max_live_bytes() { return _max_live_bytes; }
1817   size_t freed_bytes() { return _freed_bytes; }
1818 };
1819 
1820 class G1ParScrubRemSetTask: public AbstractGangTask {
1821 protected:
1822   G1RemSet* _g1rs;
1823   BitMap* _region_bm;
1824   BitMap* _card_bm;
1825 public:
1826   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1827                        BitMap* region_bm, BitMap* card_bm) :
1828     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1829     _region_bm(region_bm), _card_bm(card_bm) { }
1830 
1831   void work(uint worker_id) {
1832     if (G1CollectedHeap::use_parallel_gc_threads()) {
1833       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1834                        HeapRegion::ScrubRemSetClaimValue);
1835     } else {
1836       _g1rs->scrub(_region_bm, _card_bm);
1837     }
1838   }
1839 
1840 };
1841 
1842 void ConcurrentMark::cleanup() {
1843   // world is stopped at this checkpoint
1844   assert(SafepointSynchronize::is_at_safepoint(),
1845          "world should be stopped");
1846   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1847 
1848   // If a full collection has happened, we shouldn't do this.
1849   if (has_aborted()) {
1850     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1851     return;
1852   }
1853 
1854   HRSPhaseSetter x(HRSPhaseCleanup);
1855   g1h->verify_region_sets_optional();
1856 
1857   if (VerifyDuringGC) {
1858     HandleMark hm;  // handle scope
1859     gclog_or_tty->print(" VerifyDuringGC:(before)");
1860     Universe::heap()->prepare_for_verify();
1861     Universe::verify(/* silent */ false,
1862                      /* option */ VerifyOption_G1UsePrevMarking);
1863   }
1864 
1865   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1866   g1p->record_concurrent_mark_cleanup_start();
1867 
1868   double start = os::elapsedTime();
1869 
1870   HeapRegionRemSet::reset_for_cleanup_tasks();
1871 
1872   uint n_workers;
1873 
1874   // Do counting once more with the world stopped for good measure.
1875   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1876 
1877   if (G1CollectedHeap::use_parallel_gc_threads()) {
1878    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1879            "sanity check");
1880 
1881     g1h->set_par_threads();
1882     n_workers = g1h->n_par_threads();
1883     assert(g1h->n_par_threads() == n_workers,
1884            "Should not have been reset");
1885     g1h->workers()->run_task(&g1_par_count_task);
1886     // Done with the parallel phase so reset to 0.
1887     g1h->set_par_threads(0);
1888 
1889     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1890            "sanity check");
1891   } else {
1892     n_workers = 1;
1893     g1_par_count_task.work(0);
1894   }
1895 
1896   if (VerifyDuringGC) {
1897     // Verify that the counting data accumulated during marking matches
1898     // that calculated by walking the marking bitmap.
1899 
1900     // Bitmaps to hold expected values
1901     BitMap expected_region_bm(_region_bm.size(), false);
1902     BitMap expected_card_bm(_card_bm.size(), false);
1903 
1904     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1905                                                  &_region_bm,
1906                                                  &_card_bm,
1907                                                  &expected_region_bm,
1908                                                  &expected_card_bm);
1909 
1910     if (G1CollectedHeap::use_parallel_gc_threads()) {
1911       g1h->set_par_threads((int)n_workers);
1912       g1h->workers()->run_task(&g1_par_verify_task);
1913       // Done with the parallel phase so reset to 0.
1914       g1h->set_par_threads(0);
1915 
1916       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1917              "sanity check");
1918     } else {
1919       g1_par_verify_task.work(0);
1920     }
1921 
1922     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1923   }
1924 
1925   size_t start_used_bytes = g1h->used();
1926   g1h->set_marking_complete();
1927 
1928   double count_end = os::elapsedTime();
1929   double this_final_counting_time = (count_end - start);
1930   _total_counting_time += this_final_counting_time;
1931 
1932   if (G1PrintRegionLivenessInfo) {
1933     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1934     _g1h->heap_region_iterate(&cl);
1935   }
1936 
1937   // Install newly created mark bitMap as "prev".
1938   swapMarkBitMaps();
1939 
1940   g1h->reset_gc_time_stamp();
1941 
1942   // Note end of marking in all heap regions.
1943   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1944   if (G1CollectedHeap::use_parallel_gc_threads()) {
1945     g1h->set_par_threads((int)n_workers);
1946     g1h->workers()->run_task(&g1_par_note_end_task);
1947     g1h->set_par_threads(0);
1948 
1949     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1950            "sanity check");
1951   } else {
1952     g1_par_note_end_task.work(0);
1953   }
1954   g1h->check_gc_time_stamps();
1955 
1956   if (!cleanup_list_is_empty()) {
1957     // The cleanup list is not empty, so we'll have to process it
1958     // concurrently. Notify anyone else that might be wanting free
1959     // regions that there will be more free regions coming soon.
1960     g1h->set_free_regions_coming();
1961   }
1962 
1963   // call below, since it affects the metric by which we sort the heap
1964   // regions.
1965   if (G1ScrubRemSets) {
1966     double rs_scrub_start = os::elapsedTime();
1967     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1968     if (G1CollectedHeap::use_parallel_gc_threads()) {
1969       g1h->set_par_threads((int)n_workers);
1970       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1971       g1h->set_par_threads(0);
1972 
1973       assert(g1h->check_heap_region_claim_values(
1974                                             HeapRegion::ScrubRemSetClaimValue),
1975              "sanity check");
1976     } else {
1977       g1_par_scrub_rs_task.work(0);
1978     }
1979 
1980     double rs_scrub_end = os::elapsedTime();
1981     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1982     _total_rs_scrub_time += this_rs_scrub_time;
1983   }
1984 
1985   // this will also free any regions totally full of garbage objects,
1986   // and sort the regions.
1987   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1988 
1989   // Statistics.
1990   double end = os::elapsedTime();
1991   _cleanup_times.add((end - start) * 1000.0);
1992 
1993   if (G1Log::fine()) {
1994     g1h->print_size_transition(gclog_or_tty,
1995                                start_used_bytes,
1996                                g1h->used(),
1997                                g1h->capacity());
1998   }
1999 
2000   // Clean up will have freed any regions completely full of garbage.
2001   // Update the soft reference policy with the new heap occupancy.
2002   Universe::update_heap_info_at_gc();
2003 
2004   // We need to make this be a "collection" so any collection pause that
2005   // races with it goes around and waits for completeCleanup to finish.
2006   g1h->increment_total_collections();
2007 
2008   // We reclaimed old regions so we should calculate the sizes to make
2009   // sure we update the old gen/space data.
2010   g1h->g1mm()->update_sizes();
2011 
2012   if (VerifyDuringGC) {
2013     HandleMark hm;  // handle scope
2014     gclog_or_tty->print(" VerifyDuringGC:(after)");
2015     Universe::heap()->prepare_for_verify();
2016     Universe::verify(/* silent */ false,
2017                      /* option */ VerifyOption_G1UsePrevMarking);
2018   }
2019 
2020   g1h->verify_region_sets_optional();
2021   g1h->trace_heap_after_concurrent_cycle();
2022 }
2023 
2024 void ConcurrentMark::completeCleanup() {
2025   if (has_aborted()) return;
2026 
2027   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2028 
2029   _cleanup_list.verify_optional();
2030   FreeRegionList tmp_free_list("Tmp Free List");
2031 
2032   if (G1ConcRegionFreeingVerbose) {
2033     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2034                            "cleanup list has %u entries",
2035                            _cleanup_list.length());
2036   }
2037 
2038   // Noone else should be accessing the _cleanup_list at this point,
2039   // so it's not necessary to take any locks
2040   while (!_cleanup_list.is_empty()) {
2041     HeapRegion* hr = _cleanup_list.remove_head();
2042     assert(hr != NULL, "the list was not empty");
2043     hr->par_clear();
2044     tmp_free_list.add_as_tail(hr);
2045 
2046     // Instead of adding one region at a time to the secondary_free_list,
2047     // we accumulate them in the local list and move them a few at a
2048     // time. This also cuts down on the number of notify_all() calls
2049     // we do during this process. We'll also append the local list when
2050     // _cleanup_list is empty (which means we just removed the last
2051     // region from the _cleanup_list).
2052     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2053         _cleanup_list.is_empty()) {
2054       if (G1ConcRegionFreeingVerbose) {
2055         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2056                                "appending %u entries to the secondary_free_list, "
2057                                "cleanup list still has %u entries",
2058                                tmp_free_list.length(),
2059                                _cleanup_list.length());
2060       }
2061 
2062       {
2063         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2064         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2065         SecondaryFreeList_lock->notify_all();
2066       }
2067 
2068       if (G1StressConcRegionFreeing) {
2069         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2070           os::sleep(Thread::current(), (jlong) 1, false);
2071         }
2072       }
2073     }
2074   }
2075   assert(tmp_free_list.is_empty(), "post-condition");
2076 }
2077 
2078 // Supporting Object and Oop closures for reference discovery
2079 // and processing in during marking
2080 
2081 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2082   HeapWord* addr = (HeapWord*)obj;
2083   return addr != NULL &&
2084          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2085 }
2086 
2087 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2088 // Uses the CMTask associated with a worker thread (for serial reference
2089 // processing the CMTask for worker 0 is used) to preserve (mark) and
2090 // trace referent objects.
2091 //
2092 // Using the CMTask and embedded local queues avoids having the worker
2093 // threads operating on the global mark stack. This reduces the risk
2094 // of overflowing the stack - which we would rather avoid at this late
2095 // state. Also using the tasks' local queues removes the potential
2096 // of the workers interfering with each other that could occur if
2097 // operating on the global stack.
2098 
2099 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2100   ConcurrentMark* _cm;
2101   CMTask*         _task;
2102   int             _ref_counter_limit;
2103   int             _ref_counter;
2104   bool            _is_serial;
2105  public:
2106   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2107     _cm(cm), _task(task), _is_serial(is_serial),
2108     _ref_counter_limit(G1RefProcDrainInterval) {
2109     assert(_ref_counter_limit > 0, "sanity");
2110     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
2111     _ref_counter = _ref_counter_limit;
2112   }
2113 
2114   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2115   virtual void do_oop(      oop* p) { do_oop_work(p); }
2116 
2117   template <class T> void do_oop_work(T* p) {
2118     if (!_cm->has_overflown()) {
2119       oop obj = oopDesc::load_decode_heap_oop(p);
2120       if (_cm->verbose_high()) {
2121         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2122                                "*"PTR_FORMAT" = "PTR_FORMAT,
2123                                _task->task_id(), p, (void*) obj);
2124       }
2125 
2126       _task->deal_with_reference(obj);
2127       _ref_counter--;
2128 
2129       if (_ref_counter == 0) {
2130         // We have dealt with _ref_counter_limit references, pushing them
2131         // and objects reachable from them on to the local stack (and
2132         // possibly the global stack). Call CMTask::do_marking_step() to
2133         // process these entries.
2134         //
2135         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2136         // there's nothing more to do (i.e. we're done with the entries that
2137         // were pushed as a result of the CMTask::deal_with_reference() calls
2138         // above) or we overflow.
2139         //
2140         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2141         // flag while there may still be some work to do. (See the comment at
2142         // the beginning of CMTask::do_marking_step() for those conditions -
2143         // one of which is reaching the specified time target.) It is only
2144         // when CMTask::do_marking_step() returns without setting the
2145         // has_aborted() flag that the marking step has completed.
2146         do {
2147           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2148           _task->do_marking_step(mark_step_duration_ms,
2149                                  false      /* do_termination */,
2150                                  _is_serial);
2151         } while (_task->has_aborted() && !_cm->has_overflown());
2152         _ref_counter = _ref_counter_limit;
2153       }
2154     } else {
2155       if (_cm->verbose_high()) {
2156          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2157       }
2158     }
2159   }
2160 };
2161 
2162 // 'Drain' oop closure used by both serial and parallel reference processing.
2163 // Uses the CMTask associated with a given worker thread (for serial
2164 // reference processing the CMtask for worker 0 is used). Calls the
2165 // do_marking_step routine, with an unbelievably large timeout value,
2166 // to drain the marking data structures of the remaining entries
2167 // added by the 'keep alive' oop closure above.
2168 
2169 class G1CMDrainMarkingStackClosure: public VoidClosure {
2170   ConcurrentMark* _cm;
2171   CMTask*         _task;
2172   bool            _is_serial;
2173  public:
2174   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2175     _cm(cm), _task(task), _is_serial(is_serial) {
2176     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
2177   }
2178 
2179   void do_void() {
2180     do {
2181       if (_cm->verbose_high()) {
2182         gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s",
2183                                _task->task_id(), BOOL_TO_STR(_is_serial));
2184       }
2185 
2186       // We call CMTask::do_marking_step() to completely drain the local
2187       // and global marking stacks of entries pushed by the 'keep alive'
2188       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2189       //
2190       // CMTask::do_marking_step() is called in a loop, which we'll exit
2191       // if there's nothing more to do (i.e. we'completely drained the
2192       // entries that were pushed as a a result of applying the 'keep alive'
2193       // closure to the entries on the discovered ref lists) or we overflow
2194       // the global marking stack.
2195       //
2196       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2197       // flag while there may still be some work to do. (See the comment at
2198       // the beginning of CMTask::do_marking_step() for those conditions -
2199       // one of which is reaching the specified time target.) It is only
2200       // when CMTask::do_marking_step() returns without setting the
2201       // has_aborted() flag that the marking step has completed.
2202 
2203       _task->do_marking_step(1000000000.0 /* something very large */,
2204                              true         /* do_termination */,
2205                              _is_serial);
2206     } while (_task->has_aborted() && !_cm->has_overflown());
2207   }
2208 };
2209 
2210 // Implementation of AbstractRefProcTaskExecutor for parallel
2211 // reference processing at the end of G1 concurrent marking
2212 
2213 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2214 private:
2215   G1CollectedHeap* _g1h;
2216   ConcurrentMark*  _cm;
2217   WorkGang*        _workers;
2218   int              _active_workers;
2219 
2220 public:
2221   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2222                         ConcurrentMark* cm,
2223                         WorkGang* workers,
2224                         int n_workers) :
2225     _g1h(g1h), _cm(cm),
2226     _workers(workers), _active_workers(n_workers) { }
2227 
2228   // Executes the given task using concurrent marking worker threads.
2229   virtual void execute(ProcessTask& task);
2230   virtual void execute(EnqueueTask& task);
2231 };
2232 
2233 class G1CMRefProcTaskProxy: public AbstractGangTask {
2234   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2235   ProcessTask&     _proc_task;
2236   G1CollectedHeap* _g1h;
2237   ConcurrentMark*  _cm;
2238 
2239 public:
2240   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2241                      G1CollectedHeap* g1h,
2242                      ConcurrentMark* cm) :
2243     AbstractGangTask("Process reference objects in parallel"),
2244     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2245     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2246     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2247   }
2248 
2249   virtual void work(uint worker_id) {
2250     CMTask* task = _cm->task(worker_id);
2251     G1CMIsAliveClosure g1_is_alive(_g1h);
2252     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2253     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2254 
2255     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2256   }
2257 };
2258 
2259 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2260   assert(_workers != NULL, "Need parallel worker threads.");
2261   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2262 
2263   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2264 
2265   // We need to reset the concurrency level before each
2266   // proxy task execution, so that the termination protocol
2267   // and overflow handling in CMTask::do_marking_step() knows
2268   // how many workers to wait for.
2269   _cm->set_concurrency(_active_workers);
2270   _g1h->set_par_threads(_active_workers);
2271   _workers->run_task(&proc_task_proxy);
2272   _g1h->set_par_threads(0);
2273 }
2274 
2275 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2276   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2277   EnqueueTask& _enq_task;
2278 
2279 public:
2280   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2281     AbstractGangTask("Enqueue reference objects in parallel"),
2282     _enq_task(enq_task) { }
2283 
2284   virtual void work(uint worker_id) {
2285     _enq_task.work(worker_id);
2286   }
2287 };
2288 
2289 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2290   assert(_workers != NULL, "Need parallel worker threads.");
2291   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2292 
2293   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2294 
2295   // Not strictly necessary but...
2296   //
2297   // We need to reset the concurrency level before each
2298   // proxy task execution, so that the termination protocol
2299   // and overflow handling in CMTask::do_marking_step() knows
2300   // how many workers to wait for.
2301   _cm->set_concurrency(_active_workers);
2302   _g1h->set_par_threads(_active_workers);
2303   _workers->run_task(&enq_task_proxy);
2304   _g1h->set_par_threads(0);
2305 }
2306 
2307 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2308   if (has_overflown()) {
2309     // Skip processing the discovered references if we have
2310     // overflown the global marking stack. Reference objects
2311     // only get discovered once so it is OK to not
2312     // de-populate the discovered reference lists. We could have,
2313     // but the only benefit would be that, when marking restarts,
2314     // less reference objects are discovered.
2315     return;
2316   }
2317 
2318   ResourceMark rm;
2319   HandleMark   hm;
2320 
2321   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2322 
2323   // Is alive closure.
2324   G1CMIsAliveClosure g1_is_alive(g1h);
2325 
2326   // Inner scope to exclude the cleaning of the string and symbol
2327   // tables from the displayed time.
2328   {
2329     if (G1Log::finer()) {
2330       gclog_or_tty->put(' ');
2331     }
2332     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2333 
2334     ReferenceProcessor* rp = g1h->ref_processor_cm();
2335 
2336     // See the comment in G1CollectedHeap::ref_processing_init()
2337     // about how reference processing currently works in G1.
2338 
2339     // Set the soft reference policy
2340     rp->setup_policy(clear_all_soft_refs);
2341     assert(_markStack.isEmpty(), "mark stack should be empty");
2342 
2343     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2344     // in serial reference processing. Note these closures are also
2345     // used for serially processing (by the the current thread) the
2346     // JNI references during parallel reference processing.
2347     //
2348     // These closures do not need to synchronize with the worker
2349     // threads involved in parallel reference processing as these
2350     // instances are executed serially by the current thread (e.g.
2351     // reference processing is not multi-threaded and is thus
2352     // performed by the current thread instead of a gang worker).
2353     //
2354     // The gang tasks involved in parallel reference procssing create
2355     // their own instances of these closures, which do their own
2356     // synchronization among themselves.
2357     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2358     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2359 
2360     // We need at least one active thread. If reference processing
2361     // is not multi-threaded we use the current (VMThread) thread,
2362     // otherwise we use the work gang from the G1CollectedHeap and
2363     // we utilize all the worker threads we can.
2364     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2365     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2366     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2367 
2368     // Parallel processing task executor.
2369     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2370                                               g1h->workers(), active_workers);
2371     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2372 
2373     ReferenceProcessorStats stats;
2374 
2375     // Set the concurrency level. The phase was already set prior to
2376     // executing the remark task.
2377     set_concurrency(active_workers);
2378 
2379     // Set the degree of MT processing here.  If the discovery was done MT,
2380     // the number of threads involved during discovery could differ from
2381     // the number of active workers.  This is OK as long as the discovered
2382     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2383     rp->set_active_mt_degree(active_workers);
2384 
2385     // Process the weak references.
2386     stats = rp->process_discovered_references(&g1_is_alive,
2387                                               &g1_keep_alive,
2388                                               &g1_drain_mark_stack,
2389                                               executor,
2390                                               g1h->gc_timer_cm());
2391 
2392     // The do_oop work routines of the keep_alive and drain_marking_stack
2393     // oop closures will set the has_overflown flag if we overflow the
2394     // global marking stack.
2395 
2396     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2397 
2398     assert(_markStack.overflow() || _markStack.isEmpty(),
2399             "mark stack should be empty (unless it overflowed)");
2400 
2401     if (_markStack.overflow()) {
2402       // This should have been done already when we tried to push an
2403       // entry on to the global mark stack. But let's do it again.
2404       set_has_overflown();
2405     }
2406 
2407     assert(rp->num_q() == active_workers, "why not");
2408 
2409     rp->enqueue_discovered_references(executor);
2410 
2411     rp->verify_no_references_recorded();
2412     assert(!rp->discovery_enabled(), "Post condition");
2413   }
2414 
2415   // Now clean up stale oops in StringTable
2416   StringTable::unlink(&g1_is_alive);
2417   // Clean up unreferenced symbols in symbol table.
2418   SymbolTable::unlink();
2419 }
2420 
2421 void ConcurrentMark::swapMarkBitMaps() {
2422   CMBitMapRO* temp = _prevMarkBitMap;
2423   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2424   _nextMarkBitMap  = (CMBitMap*)  temp;
2425 }
2426 
2427 class CMRemarkTask: public AbstractGangTask {
2428 private:
2429   ConcurrentMark* _cm;
2430   bool            _is_serial;
2431 public:
2432   void work(uint worker_id) {
2433     // Since all available tasks are actually started, we should
2434     // only proceed if we're supposed to be actived.
2435     if (worker_id < _cm->active_tasks()) {
2436       CMTask* task = _cm->task(worker_id);
2437       task->record_start_time();
2438       do {
2439         task->do_marking_step(1000000000.0 /* something very large */,
2440                               true         /* do_termination       */,
2441                               _is_serial);
2442       } while (task->has_aborted() && !_cm->has_overflown());
2443       // If we overflow, then we do not want to restart. We instead
2444       // want to abort remark and do concurrent marking again.
2445       task->record_end_time();
2446     }
2447   }
2448 
2449   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2450     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2451     _cm->terminator()->reset_for_reuse(active_workers);
2452   }
2453 };
2454 
2455 void ConcurrentMark::checkpointRootsFinalWork() {
2456   ResourceMark rm;
2457   HandleMark   hm;
2458   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2459 
2460   g1h->ensure_parsability(false);
2461 
2462   if (G1CollectedHeap::use_parallel_gc_threads()) {
2463     G1CollectedHeap::StrongRootsScope srs(g1h);
2464     // this is remark, so we'll use up all active threads
2465     uint active_workers = g1h->workers()->active_workers();
2466     if (active_workers == 0) {
2467       assert(active_workers > 0, "Should have been set earlier");
2468       active_workers = (uint) ParallelGCThreads;
2469       g1h->workers()->set_active_workers(active_workers);
2470     }
2471     set_concurrency_and_phase(active_workers, false /* concurrent */);
2472     // Leave _parallel_marking_threads at it's
2473     // value originally calculated in the ConcurrentMark
2474     // constructor and pass values of the active workers
2475     // through the gang in the task.
2476 
2477     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2478     // We will start all available threads, even if we decide that the
2479     // active_workers will be fewer. The extra ones will just bail out
2480     // immediately.
2481     g1h->set_par_threads(active_workers);
2482     g1h->workers()->run_task(&remarkTask);
2483     g1h->set_par_threads(0);
2484   } else {
2485     G1CollectedHeap::StrongRootsScope srs(g1h);
2486     uint active_workers = 1;
2487     set_concurrency_and_phase(active_workers, false /* concurrent */);
2488 
2489     // Note - if there's no work gang then the VMThread will be
2490     // the thread to execute the remark - serially. We have
2491     // to pass true for the is_serial parameter so that
2492     // CMTask::do_marking_step() doesn't enter the sync
2493     // barriers in the event of an overflow. Doing so will
2494     // cause an assert that the current thread is not a
2495     // concurrent GC thread.
2496     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2497     remarkTask.work(0);
2498   }
2499   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2500   guarantee(has_overflown() ||
2501             satb_mq_set.completed_buffers_num() == 0,
2502             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2503                     BOOL_TO_STR(has_overflown()),
2504                     satb_mq_set.completed_buffers_num()));
2505 
2506   print_stats();
2507 
2508 #if VERIFY_OBJS_PROCESSED
2509   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2510     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2511                            _scan_obj_cl.objs_processed,
2512                            ThreadLocalObjQueue::objs_enqueued);
2513     guarantee(_scan_obj_cl.objs_processed ==
2514               ThreadLocalObjQueue::objs_enqueued,
2515               "Different number of objs processed and enqueued.");
2516   }
2517 #endif
2518 }
2519 
2520 #ifndef PRODUCT
2521 
2522 class PrintReachableOopClosure: public OopClosure {
2523 private:
2524   G1CollectedHeap* _g1h;
2525   outputStream*    _out;
2526   VerifyOption     _vo;
2527   bool             _all;
2528 
2529 public:
2530   PrintReachableOopClosure(outputStream* out,
2531                            VerifyOption  vo,
2532                            bool          all) :
2533     _g1h(G1CollectedHeap::heap()),
2534     _out(out), _vo(vo), _all(all) { }
2535 
2536   void do_oop(narrowOop* p) { do_oop_work(p); }
2537   void do_oop(      oop* p) { do_oop_work(p); }
2538 
2539   template <class T> void do_oop_work(T* p) {
2540     oop         obj = oopDesc::load_decode_heap_oop(p);
2541     const char* str = NULL;
2542     const char* str2 = "";
2543 
2544     if (obj == NULL) {
2545       str = "";
2546     } else if (!_g1h->is_in_g1_reserved(obj)) {
2547       str = " O";
2548     } else {
2549       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2550       guarantee(hr != NULL, "invariant");
2551       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2552       bool marked = _g1h->is_marked(obj, _vo);
2553 
2554       if (over_tams) {
2555         str = " >";
2556         if (marked) {
2557           str2 = " AND MARKED";
2558         }
2559       } else if (marked) {
2560         str = " M";
2561       } else {
2562         str = " NOT";
2563       }
2564     }
2565 
2566     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2567                    p, (void*) obj, str, str2);
2568   }
2569 };
2570 
2571 class PrintReachableObjectClosure : public ObjectClosure {
2572 private:
2573   G1CollectedHeap* _g1h;
2574   outputStream*    _out;
2575   VerifyOption     _vo;
2576   bool             _all;
2577   HeapRegion*      _hr;
2578 
2579 public:
2580   PrintReachableObjectClosure(outputStream* out,
2581                               VerifyOption  vo,
2582                               bool          all,
2583                               HeapRegion*   hr) :
2584     _g1h(G1CollectedHeap::heap()),
2585     _out(out), _vo(vo), _all(all), _hr(hr) { }
2586 
2587   void do_object(oop o) {
2588     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2589     bool marked = _g1h->is_marked(o, _vo);
2590     bool print_it = _all || over_tams || marked;
2591 
2592     if (print_it) {
2593       _out->print_cr(" "PTR_FORMAT"%s",
2594                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2595       PrintReachableOopClosure oopCl(_out, _vo, _all);
2596       o->oop_iterate(&oopCl);
2597     }
2598   }
2599 };
2600 
2601 class PrintReachableRegionClosure : public HeapRegionClosure {
2602 private:
2603   G1CollectedHeap* _g1h;
2604   outputStream*    _out;
2605   VerifyOption     _vo;
2606   bool             _all;
2607 
2608 public:
2609   bool doHeapRegion(HeapRegion* hr) {
2610     HeapWord* b = hr->bottom();
2611     HeapWord* e = hr->end();
2612     HeapWord* t = hr->top();
2613     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2614     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2615                    "TAMS: "PTR_FORMAT, b, e, t, p);
2616     _out->cr();
2617 
2618     HeapWord* from = b;
2619     HeapWord* to   = t;
2620 
2621     if (to > from) {
2622       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2623       _out->cr();
2624       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2625       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2626       _out->cr();
2627     }
2628 
2629     return false;
2630   }
2631 
2632   PrintReachableRegionClosure(outputStream* out,
2633                               VerifyOption  vo,
2634                               bool          all) :
2635     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2636 };
2637 
2638 void ConcurrentMark::print_reachable(const char* str,
2639                                      VerifyOption vo,
2640                                      bool all) {
2641   gclog_or_tty->cr();
2642   gclog_or_tty->print_cr("== Doing heap dump... ");
2643 
2644   if (G1PrintReachableBaseFile == NULL) {
2645     gclog_or_tty->print_cr("  #### error: no base file defined");
2646     return;
2647   }
2648 
2649   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2650       (JVM_MAXPATHLEN - 1)) {
2651     gclog_or_tty->print_cr("  #### error: file name too long");
2652     return;
2653   }
2654 
2655   char file_name[JVM_MAXPATHLEN];
2656   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2657   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2658 
2659   fileStream fout(file_name);
2660   if (!fout.is_open()) {
2661     gclog_or_tty->print_cr("  #### error: could not open file");
2662     return;
2663   }
2664 
2665   outputStream* out = &fout;
2666   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2667   out->cr();
2668 
2669   out->print_cr("--- ITERATING OVER REGIONS");
2670   out->cr();
2671   PrintReachableRegionClosure rcl(out, vo, all);
2672   _g1h->heap_region_iterate(&rcl);
2673   out->cr();
2674 
2675   gclog_or_tty->print_cr("  done");
2676   gclog_or_tty->flush();
2677 }
2678 
2679 #endif // PRODUCT
2680 
2681 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2682   // Note we are overriding the read-only view of the prev map here, via
2683   // the cast.
2684   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2685 }
2686 
2687 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2688   _nextMarkBitMap->clearRange(mr);
2689 }
2690 
2691 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2692   clearRangePrevBitmap(mr);
2693   clearRangeNextBitmap(mr);
2694 }
2695 
2696 HeapRegion*
2697 ConcurrentMark::claim_region(int task_num) {
2698   // "checkpoint" the finger
2699   HeapWord* finger = _finger;
2700 
2701   // _heap_end will not change underneath our feet; it only changes at
2702   // yield points.
2703   while (finger < _heap_end) {
2704     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2705 
2706     // Note on how this code handles humongous regions. In the
2707     // normal case the finger will reach the start of a "starts
2708     // humongous" (SH) region. Its end will either be the end of the
2709     // last "continues humongous" (CH) region in the sequence, or the
2710     // standard end of the SH region (if the SH is the only region in
2711     // the sequence). That way claim_region() will skip over the CH
2712     // regions. However, there is a subtle race between a CM thread
2713     // executing this method and a mutator thread doing a humongous
2714     // object allocation. The two are not mutually exclusive as the CM
2715     // thread does not need to hold the Heap_lock when it gets
2716     // here. So there is a chance that claim_region() will come across
2717     // a free region that's in the progress of becoming a SH or a CH
2718     // region. In the former case, it will either
2719     //   a) Miss the update to the region's end, in which case it will
2720     //      visit every subsequent CH region, will find their bitmaps
2721     //      empty, and do nothing, or
2722     //   b) Will observe the update of the region's end (in which case
2723     //      it will skip the subsequent CH regions).
2724     // If it comes across a region that suddenly becomes CH, the
2725     // scenario will be similar to b). So, the race between
2726     // claim_region() and a humongous object allocation might force us
2727     // to do a bit of unnecessary work (due to some unnecessary bitmap
2728     // iterations) but it should not introduce and correctness issues.
2729     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2730     HeapWord*   bottom        = curr_region->bottom();
2731     HeapWord*   end           = curr_region->end();
2732     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2733 
2734     if (verbose_low()) {
2735       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2736                              "["PTR_FORMAT", "PTR_FORMAT"), "
2737                              "limit = "PTR_FORMAT,
2738                              task_num, curr_region, bottom, end, limit);
2739     }
2740 
2741     // Is the gap between reading the finger and doing the CAS too long?
2742     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2743     if (res == finger) {
2744       // we succeeded
2745 
2746       // notice that _finger == end cannot be guaranteed here since,
2747       // someone else might have moved the finger even further
2748       assert(_finger >= end, "the finger should have moved forward");
2749 
2750       if (verbose_low()) {
2751         gclog_or_tty->print_cr("[%d] we were successful with region = "
2752                                PTR_FORMAT, task_num, curr_region);
2753       }
2754 
2755       if (limit > bottom) {
2756         if (verbose_low()) {
2757           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2758                                  "returning it ", task_num, curr_region);
2759         }
2760         return curr_region;
2761       } else {
2762         assert(limit == bottom,
2763                "the region limit should be at bottom");
2764         if (verbose_low()) {
2765           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2766                                  "returning NULL", task_num, curr_region);
2767         }
2768         // we return NULL and the caller should try calling
2769         // claim_region() again.
2770         return NULL;
2771       }
2772     } else {
2773       assert(_finger > finger, "the finger should have moved forward");
2774       if (verbose_low()) {
2775         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2776                                "global finger = "PTR_FORMAT", "
2777                                "our finger = "PTR_FORMAT,
2778                                task_num, _finger, finger);
2779       }
2780 
2781       // read it again
2782       finger = _finger;
2783     }
2784   }
2785 
2786   return NULL;
2787 }
2788 
2789 #ifndef PRODUCT
2790 enum VerifyNoCSetOopsPhase {
2791   VerifyNoCSetOopsStack,
2792   VerifyNoCSetOopsQueues,
2793   VerifyNoCSetOopsSATBCompleted,
2794   VerifyNoCSetOopsSATBThread
2795 };
2796 
2797 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2798 private:
2799   G1CollectedHeap* _g1h;
2800   VerifyNoCSetOopsPhase _phase;
2801   int _info;
2802 
2803   const char* phase_str() {
2804     switch (_phase) {
2805     case VerifyNoCSetOopsStack:         return "Stack";
2806     case VerifyNoCSetOopsQueues:        return "Queue";
2807     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2808     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2809     default:                            ShouldNotReachHere();
2810     }
2811     return NULL;
2812   }
2813 
2814   void do_object_work(oop obj) {
2815     guarantee(!_g1h->obj_in_cs(obj),
2816               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2817                       (void*) obj, phase_str(), _info));
2818   }
2819 
2820 public:
2821   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2822 
2823   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2824     _phase = phase;
2825     _info = info;
2826   }
2827 
2828   virtual void do_oop(oop* p) {
2829     oop obj = oopDesc::load_decode_heap_oop(p);
2830     do_object_work(obj);
2831   }
2832 
2833   virtual void do_oop(narrowOop* p) {
2834     // We should not come across narrow oops while scanning marking
2835     // stacks and SATB buffers.
2836     ShouldNotReachHere();
2837   }
2838 
2839   virtual void do_object(oop obj) {
2840     do_object_work(obj);
2841   }
2842 };
2843 
2844 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2845                                          bool verify_enqueued_buffers,
2846                                          bool verify_thread_buffers,
2847                                          bool verify_fingers) {
2848   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2849   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2850     return;
2851   }
2852 
2853   VerifyNoCSetOopsClosure cl;
2854 
2855   if (verify_stacks) {
2856     // Verify entries on the global mark stack
2857     cl.set_phase(VerifyNoCSetOopsStack);
2858     _markStack.oops_do(&cl);
2859 
2860     // Verify entries on the task queues
2861     for (int i = 0; i < (int) _max_task_num; i += 1) {
2862       cl.set_phase(VerifyNoCSetOopsQueues, i);
2863       OopTaskQueue* queue = _task_queues->queue(i);
2864       queue->oops_do(&cl);
2865     }
2866   }
2867 
2868   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2869 
2870   // Verify entries on the enqueued SATB buffers
2871   if (verify_enqueued_buffers) {
2872     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2873     satb_qs.iterate_completed_buffers_read_only(&cl);
2874   }
2875 
2876   // Verify entries on the per-thread SATB buffers
2877   if (verify_thread_buffers) {
2878     cl.set_phase(VerifyNoCSetOopsSATBThread);
2879     satb_qs.iterate_thread_buffers_read_only(&cl);
2880   }
2881 
2882   if (verify_fingers) {
2883     // Verify the global finger
2884     HeapWord* global_finger = finger();
2885     if (global_finger != NULL && global_finger < _heap_end) {
2886       // The global finger always points to a heap region boundary. We
2887       // use heap_region_containing_raw() to get the containing region
2888       // given that the global finger could be pointing to a free region
2889       // which subsequently becomes continues humongous. If that
2890       // happens, heap_region_containing() will return the bottom of the
2891       // corresponding starts humongous region and the check below will
2892       // not hold any more.
2893       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2894       guarantee(global_finger == global_hr->bottom(),
2895                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2896                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2897     }
2898 
2899     // Verify the task fingers
2900     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2901     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2902       CMTask* task = _tasks[i];
2903       HeapWord* task_finger = task->finger();
2904       if (task_finger != NULL && task_finger < _heap_end) {
2905         // See above note on the global finger verification.
2906         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2907         guarantee(task_finger == task_hr->bottom() ||
2908                   !task_hr->in_collection_set(),
2909                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2910                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2911       }
2912     }
2913   }
2914 }
2915 #endif // PRODUCT
2916 
2917 // Aggregate the counting data that was constructed concurrently
2918 // with marking.
2919 class AggregateCountDataHRClosure: public HeapRegionClosure {
2920   G1CollectedHeap* _g1h;
2921   ConcurrentMark* _cm;
2922   CardTableModRefBS* _ct_bs;
2923   BitMap* _cm_card_bm;
2924   size_t _max_task_num;
2925 
2926  public:
2927   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2928                               BitMap* cm_card_bm,
2929                               size_t max_task_num) :
2930     _g1h(g1h), _cm(g1h->concurrent_mark()),
2931     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2932     _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2933 
2934   bool doHeapRegion(HeapRegion* hr) {
2935     if (hr->continuesHumongous()) {
2936       // We will ignore these here and process them when their
2937       // associated "starts humongous" region is processed.
2938       // Note that we cannot rely on their associated
2939       // "starts humongous" region to have their bit set to 1
2940       // since, due to the region chunking in the parallel region
2941       // iteration, a "continues humongous" region might be visited
2942       // before its associated "starts humongous".
2943       return false;
2944     }
2945 
2946     HeapWord* start = hr->bottom();
2947     HeapWord* limit = hr->next_top_at_mark_start();
2948     HeapWord* end = hr->end();
2949 
2950     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2951            err_msg("Preconditions not met - "
2952                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2953                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2954                    start, limit, hr->top(), hr->end()));
2955 
2956     assert(hr->next_marked_bytes() == 0, "Precondition");
2957 
2958     if (start == limit) {
2959       // NTAMS of this region has not been set so nothing to do.
2960       return false;
2961     }
2962 
2963     // 'start' should be in the heap.
2964     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2965     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2966     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2967 
2968     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2969     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2970     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2971 
2972     // If ntams is not card aligned then we bump card bitmap index
2973     // for limit so that we get the all the cards spanned by
2974     // the object ending at ntams.
2975     // Note: if this is the last region in the heap then ntams
2976     // could be actually just beyond the end of the the heap;
2977     // limit_idx will then  correspond to a (non-existent) card
2978     // that is also outside the heap.
2979     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2980       limit_idx += 1;
2981     }
2982 
2983     assert(limit_idx <= end_idx, "or else use atomics");
2984 
2985     // Aggregate the "stripe" in the count data associated with hr.
2986     uint hrs_index = hr->hrs_index();
2987     size_t marked_bytes = 0;
2988 
2989     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2990       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2991       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2992 
2993       // Fetch the marked_bytes in this region for task i and
2994       // add it to the running total for this region.
2995       marked_bytes += marked_bytes_array[hrs_index];
2996 
2997       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2998       // into the global card bitmap.
2999       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
3000 
3001       while (scan_idx < limit_idx) {
3002         assert(task_card_bm->at(scan_idx) == true, "should be");
3003         _cm_card_bm->set_bit(scan_idx);
3004         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3005 
3006         // BitMap::get_next_one_offset() can handle the case when
3007         // its left_offset parameter is greater than its right_offset
3008         // parameter. It does, however, have an early exit if
3009         // left_offset == right_offset. So let's limit the value
3010         // passed in for left offset here.
3011         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3012         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3013       }
3014     }
3015 
3016     // Update the marked bytes for this region.
3017     hr->add_to_marked_bytes(marked_bytes);
3018 
3019     // Next heap region
3020     return false;
3021   }
3022 };
3023 
3024 class G1AggregateCountDataTask: public AbstractGangTask {
3025 protected:
3026   G1CollectedHeap* _g1h;
3027   ConcurrentMark* _cm;
3028   BitMap* _cm_card_bm;
3029   size_t _max_task_num;
3030   int _active_workers;
3031 
3032 public:
3033   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3034                            ConcurrentMark* cm,
3035                            BitMap* cm_card_bm,
3036                            size_t max_task_num,
3037                            int n_workers) :
3038     AbstractGangTask("Count Aggregation"),
3039     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3040     _max_task_num(max_task_num),
3041     _active_workers(n_workers) { }
3042 
3043   void work(uint worker_id) {
3044     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
3045 
3046     if (G1CollectedHeap::use_parallel_gc_threads()) {
3047       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3048                                             _active_workers,
3049                                             HeapRegion::AggregateCountClaimValue);
3050     } else {
3051       _g1h->heap_region_iterate(&cl);
3052     }
3053   }
3054 };
3055 
3056 
3057 void ConcurrentMark::aggregate_count_data() {
3058   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3059                         _g1h->workers()->active_workers() :
3060                         1);
3061 
3062   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3063                                            _max_task_num, n_workers);
3064 
3065   if (G1CollectedHeap::use_parallel_gc_threads()) {
3066     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3067            "sanity check");
3068     _g1h->set_par_threads(n_workers);
3069     _g1h->workers()->run_task(&g1_par_agg_task);
3070     _g1h->set_par_threads(0);
3071 
3072     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3073            "sanity check");
3074     _g1h->reset_heap_region_claim_values();
3075   } else {
3076     g1_par_agg_task.work(0);
3077   }
3078 }
3079 
3080 // Clear the per-worker arrays used to store the per-region counting data
3081 void ConcurrentMark::clear_all_count_data() {
3082   // Clear the global card bitmap - it will be filled during
3083   // liveness count aggregation (during remark) and the
3084   // final counting task.
3085   _card_bm.clear();
3086 
3087   // Clear the global region bitmap - it will be filled as part
3088   // of the final counting task.
3089   _region_bm.clear();
3090 
3091   uint max_regions = _g1h->max_regions();
3092   assert(_max_task_num != 0, "unitialized");
3093 
3094   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3095     BitMap* task_card_bm = count_card_bitmap_for(i);
3096     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3097 
3098     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3099     assert(marked_bytes_array != NULL, "uninitialized");
3100 
3101     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3102     task_card_bm->clear();
3103   }
3104 }
3105 
3106 void ConcurrentMark::print_stats() {
3107   if (verbose_stats()) {
3108     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3109     for (size_t i = 0; i < _active_tasks; ++i) {
3110       _tasks[i]->print_stats();
3111       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3112     }
3113   }
3114 }
3115 
3116 // abandon current marking iteration due to a Full GC
3117 void ConcurrentMark::abort() {
3118   // Clear all marks to force marking thread to do nothing
3119   _nextMarkBitMap->clearAll();
3120   // Clear the liveness counting data
3121   clear_all_count_data();
3122   // Empty mark stack
3123   reset_marking_state();
3124   for (int i = 0; i < (int)_max_task_num; ++i) {
3125     _tasks[i]->clear_region_fields();
3126   }
3127   _has_aborted = true;
3128 
3129   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3130   satb_mq_set.abandon_partial_marking();
3131   // This can be called either during or outside marking, we'll read
3132   // the expected_active value from the SATB queue set.
3133   satb_mq_set.set_active_all_threads(
3134                                  false, /* new active value */
3135                                  satb_mq_set.is_active() /* expected_active */);
3136 
3137   _g1h->trace_heap_after_concurrent_cycle();
3138   _g1h->register_concurrent_cycle_end();
3139 }
3140 
3141 static void print_ms_time_info(const char* prefix, const char* name,
3142                                NumberSeq& ns) {
3143   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3144                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3145   if (ns.num() > 0) {
3146     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3147                            prefix, ns.sd(), ns.maximum());
3148   }
3149 }
3150 
3151 void ConcurrentMark::print_summary_info() {
3152   gclog_or_tty->print_cr(" Concurrent marking:");
3153   print_ms_time_info("  ", "init marks", _init_times);
3154   print_ms_time_info("  ", "remarks", _remark_times);
3155   {
3156     print_ms_time_info("     ", "final marks", _remark_mark_times);
3157     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3158 
3159   }
3160   print_ms_time_info("  ", "cleanups", _cleanup_times);
3161   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3162                          _total_counting_time,
3163                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3164                           (double)_cleanup_times.num()
3165                          : 0.0));
3166   if (G1ScrubRemSets) {
3167     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3168                            _total_rs_scrub_time,
3169                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3170                             (double)_cleanup_times.num()
3171                            : 0.0));
3172   }
3173   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3174                          (_init_times.sum() + _remark_times.sum() +
3175                           _cleanup_times.sum())/1000.0);
3176   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3177                 "(%8.2f s marking).",
3178                 cmThread()->vtime_accum(),
3179                 cmThread()->vtime_mark_accum());
3180 }
3181 
3182 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3183   if (use_parallel_marking_threads()) {
3184     _parallel_workers->print_worker_threads_on(st);
3185   }
3186 }
3187 
3188 // We take a break if someone is trying to stop the world.
3189 bool ConcurrentMark::do_yield_check(uint worker_id) {
3190   if (should_yield()) {
3191     if (worker_id == 0) {
3192       _g1h->g1_policy()->record_concurrent_pause();
3193     }
3194     cmThread()->yield();
3195     return true;
3196   } else {
3197     return false;
3198   }
3199 }
3200 
3201 bool ConcurrentMark::should_yield() {
3202   return cmThread()->should_yield();
3203 }
3204 
3205 bool ConcurrentMark::containing_card_is_marked(void* p) {
3206   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3207   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3208 }
3209 
3210 bool ConcurrentMark::containing_cards_are_marked(void* start,
3211                                                  void* last) {
3212   return containing_card_is_marked(start) &&
3213          containing_card_is_marked(last);
3214 }
3215 
3216 #ifndef PRODUCT
3217 // for debugging purposes
3218 void ConcurrentMark::print_finger() {
3219   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3220                          _heap_start, _heap_end, _finger);
3221   for (int i = 0; i < (int) _max_task_num; ++i) {
3222     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3223   }
3224   gclog_or_tty->print_cr("");
3225 }
3226 #endif
3227 
3228 void CMTask::scan_object(oop obj) {
3229   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3230 
3231   if (_cm->verbose_high()) {
3232     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3233                            _task_id, (void*) obj);
3234   }
3235 
3236   size_t obj_size = obj->size();
3237   _words_scanned += obj_size;
3238 
3239   obj->oop_iterate(_cm_oop_closure);
3240   statsOnly( ++_objs_scanned );
3241   check_limits();
3242 }
3243 
3244 // Closure for iteration over bitmaps
3245 class CMBitMapClosure : public BitMapClosure {
3246 private:
3247   // the bitmap that is being iterated over
3248   CMBitMap*                   _nextMarkBitMap;
3249   ConcurrentMark*             _cm;
3250   CMTask*                     _task;
3251 
3252 public:
3253   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3254     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3255 
3256   bool do_bit(size_t offset) {
3257     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3258     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3259     assert( addr < _cm->finger(), "invariant");
3260 
3261     statsOnly( _task->increase_objs_found_on_bitmap() );
3262     assert(addr >= _task->finger(), "invariant");
3263 
3264     // We move that task's local finger along.
3265     _task->move_finger_to(addr);
3266 
3267     _task->scan_object(oop(addr));
3268     // we only partially drain the local queue and global stack
3269     _task->drain_local_queue(true);
3270     _task->drain_global_stack(true);
3271 
3272     // if the has_aborted flag has been raised, we need to bail out of
3273     // the iteration
3274     return !_task->has_aborted();
3275   }
3276 };
3277 
3278 // Closure for iterating over objects, currently only used for
3279 // processing SATB buffers.
3280 class CMObjectClosure : public ObjectClosure {
3281 private:
3282   CMTask* _task;
3283 
3284 public:
3285   void do_object(oop obj) {
3286     _task->deal_with_reference(obj);
3287   }
3288 
3289   CMObjectClosure(CMTask* task) : _task(task) { }
3290 };
3291 
3292 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3293                                ConcurrentMark* cm,
3294                                CMTask* task)
3295   : _g1h(g1h), _cm(cm), _task(task) {
3296   assert(_ref_processor == NULL, "should be initialized to NULL");
3297 
3298   if (G1UseConcMarkReferenceProcessing) {
3299     _ref_processor = g1h->ref_processor_cm();
3300     assert(_ref_processor != NULL, "should not be NULL");
3301   }
3302 }
3303 
3304 void CMTask::setup_for_region(HeapRegion* hr) {
3305   // Separated the asserts so that we know which one fires.
3306   assert(hr != NULL,
3307         "claim_region() should have filtered out continues humongous regions");
3308   assert(!hr->continuesHumongous(),
3309         "claim_region() should have filtered out continues humongous regions");
3310 
3311   if (_cm->verbose_low()) {
3312     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3313                            _task_id, hr);
3314   }
3315 
3316   _curr_region  = hr;
3317   _finger       = hr->bottom();
3318   update_region_limit();
3319 }
3320 
3321 void CMTask::update_region_limit() {
3322   HeapRegion* hr            = _curr_region;
3323   HeapWord* bottom          = hr->bottom();
3324   HeapWord* limit           = hr->next_top_at_mark_start();
3325 
3326   if (limit == bottom) {
3327     if (_cm->verbose_low()) {
3328       gclog_or_tty->print_cr("[%d] found an empty region "
3329                              "["PTR_FORMAT", "PTR_FORMAT")",
3330                              _task_id, bottom, limit);
3331     }
3332     // The region was collected underneath our feet.
3333     // We set the finger to bottom to ensure that the bitmap
3334     // iteration that will follow this will not do anything.
3335     // (this is not a condition that holds when we set the region up,
3336     // as the region is not supposed to be empty in the first place)
3337     _finger = bottom;
3338   } else if (limit >= _region_limit) {
3339     assert(limit >= _finger, "peace of mind");
3340   } else {
3341     assert(limit < _region_limit, "only way to get here");
3342     // This can happen under some pretty unusual circumstances.  An
3343     // evacuation pause empties the region underneath our feet (NTAMS
3344     // at bottom). We then do some allocation in the region (NTAMS
3345     // stays at bottom), followed by the region being used as a GC
3346     // alloc region (NTAMS will move to top() and the objects
3347     // originally below it will be grayed). All objects now marked in
3348     // the region are explicitly grayed, if below the global finger,
3349     // and we do not need in fact to scan anything else. So, we simply
3350     // set _finger to be limit to ensure that the bitmap iteration
3351     // doesn't do anything.
3352     _finger = limit;
3353   }
3354 
3355   _region_limit = limit;
3356 }
3357 
3358 void CMTask::giveup_current_region() {
3359   assert(_curr_region != NULL, "invariant");
3360   if (_cm->verbose_low()) {
3361     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3362                            _task_id, _curr_region);
3363   }
3364   clear_region_fields();
3365 }
3366 
3367 void CMTask::clear_region_fields() {
3368   // Values for these three fields that indicate that we're not
3369   // holding on to a region.
3370   _curr_region   = NULL;
3371   _finger        = NULL;
3372   _region_limit  = NULL;
3373 }
3374 
3375 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3376   if (cm_oop_closure == NULL) {
3377     assert(_cm_oop_closure != NULL, "invariant");
3378   } else {
3379     assert(_cm_oop_closure == NULL, "invariant");
3380   }
3381   _cm_oop_closure = cm_oop_closure;
3382 }
3383 
3384 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3385   guarantee(nextMarkBitMap != NULL, "invariant");
3386 
3387   if (_cm->verbose_low()) {
3388     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3389   }
3390 
3391   _nextMarkBitMap                = nextMarkBitMap;
3392   clear_region_fields();
3393 
3394   _calls                         = 0;
3395   _elapsed_time_ms               = 0.0;
3396   _termination_time_ms           = 0.0;
3397   _termination_start_time_ms     = 0.0;
3398 
3399 #if _MARKING_STATS_
3400   _local_pushes                  = 0;
3401   _local_pops                    = 0;
3402   _local_max_size                = 0;
3403   _objs_scanned                  = 0;
3404   _global_pushes                 = 0;
3405   _global_pops                   = 0;
3406   _global_max_size               = 0;
3407   _global_transfers_to           = 0;
3408   _global_transfers_from         = 0;
3409   _regions_claimed               = 0;
3410   _objs_found_on_bitmap          = 0;
3411   _satb_buffers_processed        = 0;
3412   _steal_attempts                = 0;
3413   _steals                        = 0;
3414   _aborted                       = 0;
3415   _aborted_overflow              = 0;
3416   _aborted_cm_aborted            = 0;
3417   _aborted_yield                 = 0;
3418   _aborted_timed_out             = 0;
3419   _aborted_satb                  = 0;
3420   _aborted_termination           = 0;
3421 #endif // _MARKING_STATS_
3422 }
3423 
3424 bool CMTask::should_exit_termination() {
3425   regular_clock_call();
3426   // This is called when we are in the termination protocol. We should
3427   // quit if, for some reason, this task wants to abort or the global
3428   // stack is not empty (this means that we can get work from it).
3429   return !_cm->mark_stack_empty() || has_aborted();
3430 }
3431 
3432 void CMTask::reached_limit() {
3433   assert(_words_scanned >= _words_scanned_limit ||
3434          _refs_reached >= _refs_reached_limit ,
3435          "shouldn't have been called otherwise");
3436   regular_clock_call();
3437 }
3438 
3439 void CMTask::regular_clock_call() {
3440   if (has_aborted()) return;
3441 
3442   // First, we need to recalculate the words scanned and refs reached
3443   // limits for the next clock call.
3444   recalculate_limits();
3445 
3446   // During the regular clock call we do the following
3447 
3448   // (1) If an overflow has been flagged, then we abort.
3449   if (_cm->has_overflown()) {
3450     set_has_aborted();
3451     return;
3452   }
3453 
3454   // If we are not concurrent (i.e. we're doing remark) we don't need
3455   // to check anything else. The other steps are only needed during
3456   // the concurrent marking phase.
3457   if (!concurrent()) return;
3458 
3459   // (2) If marking has been aborted for Full GC, then we also abort.
3460   if (_cm->has_aborted()) {
3461     set_has_aborted();
3462     statsOnly( ++_aborted_cm_aborted );
3463     return;
3464   }
3465 
3466   double curr_time_ms = os::elapsedVTime() * 1000.0;
3467 
3468   // (3) If marking stats are enabled, then we update the step history.
3469 #if _MARKING_STATS_
3470   if (_words_scanned >= _words_scanned_limit) {
3471     ++_clock_due_to_scanning;
3472   }
3473   if (_refs_reached >= _refs_reached_limit) {
3474     ++_clock_due_to_marking;
3475   }
3476 
3477   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3478   _interval_start_time_ms = curr_time_ms;
3479   _all_clock_intervals_ms.add(last_interval_ms);
3480 
3481   if (_cm->verbose_medium()) {
3482       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3483                         "scanned = %d%s, refs reached = %d%s",
3484                         _task_id, last_interval_ms,
3485                         _words_scanned,
3486                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3487                         _refs_reached,
3488                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3489   }
3490 #endif // _MARKING_STATS_
3491 
3492   // (4) We check whether we should yield. If we have to, then we abort.
3493   if (_cm->should_yield()) {
3494     // We should yield. To do this we abort the task. The caller is
3495     // responsible for yielding.
3496     set_has_aborted();
3497     statsOnly( ++_aborted_yield );
3498     return;
3499   }
3500 
3501   // (5) We check whether we've reached our time quota. If we have,
3502   // then we abort.
3503   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3504   if (elapsed_time_ms > _time_target_ms) {
3505     set_has_aborted();
3506     _has_timed_out = true;
3507     statsOnly( ++_aborted_timed_out );
3508     return;
3509   }
3510 
3511   // (6) Finally, we check whether there are enough completed STAB
3512   // buffers available for processing. If there are, we abort.
3513   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3514   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3515     if (_cm->verbose_low()) {
3516       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3517                              _task_id);
3518     }
3519     // we do need to process SATB buffers, we'll abort and restart
3520     // the marking task to do so
3521     set_has_aborted();
3522     statsOnly( ++_aborted_satb );
3523     return;
3524   }
3525 }
3526 
3527 void CMTask::recalculate_limits() {
3528   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3529   _words_scanned_limit      = _real_words_scanned_limit;
3530 
3531   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3532   _refs_reached_limit       = _real_refs_reached_limit;
3533 }
3534 
3535 void CMTask::decrease_limits() {
3536   // This is called when we believe that we're going to do an infrequent
3537   // operation which will increase the per byte scanned cost (i.e. move
3538   // entries to/from the global stack). It basically tries to decrease the
3539   // scanning limit so that the clock is called earlier.
3540 
3541   if (_cm->verbose_medium()) {
3542     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3543   }
3544 
3545   _words_scanned_limit = _real_words_scanned_limit -
3546     3 * words_scanned_period / 4;
3547   _refs_reached_limit  = _real_refs_reached_limit -
3548     3 * refs_reached_period / 4;
3549 }
3550 
3551 void CMTask::move_entries_to_global_stack() {
3552   // local array where we'll store the entries that will be popped
3553   // from the local queue
3554   oop buffer[global_stack_transfer_size];
3555 
3556   int n = 0;
3557   oop obj;
3558   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3559     buffer[n] = obj;
3560     ++n;
3561   }
3562 
3563   if (n > 0) {
3564     // we popped at least one entry from the local queue
3565 
3566     statsOnly( ++_global_transfers_to; _local_pops += n );
3567 
3568     if (!_cm->mark_stack_push(buffer, n)) {
3569       if (_cm->verbose_low()) {
3570         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3571                                _task_id);
3572       }
3573       set_has_aborted();
3574     } else {
3575       // the transfer was successful
3576 
3577       if (_cm->verbose_medium()) {
3578         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3579                                _task_id, n);
3580       }
3581       statsOnly( int tmp_size = _cm->mark_stack_size();
3582                  if (tmp_size > _global_max_size) {
3583                    _global_max_size = tmp_size;
3584                  }
3585                  _global_pushes += n );
3586     }
3587   }
3588 
3589   // this operation was quite expensive, so decrease the limits
3590   decrease_limits();
3591 }
3592 
3593 void CMTask::get_entries_from_global_stack() {
3594   // local array where we'll store the entries that will be popped
3595   // from the global stack.
3596   oop buffer[global_stack_transfer_size];
3597   int n;
3598   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3599   assert(n <= global_stack_transfer_size,
3600          "we should not pop more than the given limit");
3601   if (n > 0) {
3602     // yes, we did actually pop at least one entry
3603 
3604     statsOnly( ++_global_transfers_from; _global_pops += n );
3605     if (_cm->verbose_medium()) {
3606       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3607                              _task_id, n);
3608     }
3609     for (int i = 0; i < n; ++i) {
3610       bool success = _task_queue->push(buffer[i]);
3611       // We only call this when the local queue is empty or under a
3612       // given target limit. So, we do not expect this push to fail.
3613       assert(success, "invariant");
3614     }
3615 
3616     statsOnly( int tmp_size = _task_queue->size();
3617                if (tmp_size > _local_max_size) {
3618                  _local_max_size = tmp_size;
3619                }
3620                _local_pushes += n );
3621   }
3622 
3623   // this operation was quite expensive, so decrease the limits
3624   decrease_limits();
3625 }
3626 
3627 void CMTask::drain_local_queue(bool partially) {
3628   if (has_aborted()) return;
3629 
3630   // Decide what the target size is, depending whether we're going to
3631   // drain it partially (so that other tasks can steal if they run out
3632   // of things to do) or totally (at the very end).
3633   size_t target_size;
3634   if (partially) {
3635     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3636   } else {
3637     target_size = 0;
3638   }
3639 
3640   if (_task_queue->size() > target_size) {
3641     if (_cm->verbose_high()) {
3642       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3643                              _task_id, target_size);
3644     }
3645 
3646     oop obj;
3647     bool ret = _task_queue->pop_local(obj);
3648     while (ret) {
3649       statsOnly( ++_local_pops );
3650 
3651       if (_cm->verbose_high()) {
3652         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3653                                (void*) obj);
3654       }
3655 
3656       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3657       assert(!_g1h->is_on_master_free_list(
3658                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3659 
3660       scan_object(obj);
3661 
3662       if (_task_queue->size() <= target_size || has_aborted()) {
3663         ret = false;
3664       } else {
3665         ret = _task_queue->pop_local(obj);
3666       }
3667     }
3668 
3669     if (_cm->verbose_high()) {
3670       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3671                              _task_id, _task_queue->size());
3672     }
3673   }
3674 }
3675 
3676 void CMTask::drain_global_stack(bool partially) {
3677   if (has_aborted()) return;
3678 
3679   // We have a policy to drain the local queue before we attempt to
3680   // drain the global stack.
3681   assert(partially || _task_queue->size() == 0, "invariant");
3682 
3683   // Decide what the target size is, depending whether we're going to
3684   // drain it partially (so that other tasks can steal if they run out
3685   // of things to do) or totally (at the very end).  Notice that,
3686   // because we move entries from the global stack in chunks or
3687   // because another task might be doing the same, we might in fact
3688   // drop below the target. But, this is not a problem.
3689   size_t target_size;
3690   if (partially) {
3691     target_size = _cm->partial_mark_stack_size_target();
3692   } else {
3693     target_size = 0;
3694   }
3695 
3696   if (_cm->mark_stack_size() > target_size) {
3697     if (_cm->verbose_low()) {
3698       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3699                              _task_id, target_size);
3700     }
3701 
3702     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3703       get_entries_from_global_stack();
3704       drain_local_queue(partially);
3705     }
3706 
3707     if (_cm->verbose_low()) {
3708       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3709                              _task_id, _cm->mark_stack_size());
3710     }
3711   }
3712 }
3713 
3714 // SATB Queue has several assumptions on whether to call the par or
3715 // non-par versions of the methods. this is why some of the code is
3716 // replicated. We should really get rid of the single-threaded version
3717 // of the code to simplify things.
3718 void CMTask::drain_satb_buffers() {
3719   if (has_aborted()) return;
3720 
3721   // We set this so that the regular clock knows that we're in the
3722   // middle of draining buffers and doesn't set the abort flag when it
3723   // notices that SATB buffers are available for draining. It'd be
3724   // very counter productive if it did that. :-)
3725   _draining_satb_buffers = true;
3726 
3727   CMObjectClosure oc(this);
3728   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3729   if (G1CollectedHeap::use_parallel_gc_threads()) {
3730     satb_mq_set.set_par_closure(_task_id, &oc);
3731   } else {
3732     satb_mq_set.set_closure(&oc);
3733   }
3734 
3735   // This keeps claiming and applying the closure to completed buffers
3736   // until we run out of buffers or we need to abort.
3737   if (G1CollectedHeap::use_parallel_gc_threads()) {
3738     while (!has_aborted() &&
3739            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3740       if (_cm->verbose_medium()) {
3741         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3742       }
3743       statsOnly( ++_satb_buffers_processed );
3744       regular_clock_call();
3745     }
3746   } else {
3747     while (!has_aborted() &&
3748            satb_mq_set.apply_closure_to_completed_buffer()) {
3749       if (_cm->verbose_medium()) {
3750         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3751       }
3752       statsOnly( ++_satb_buffers_processed );
3753       regular_clock_call();
3754     }
3755   }
3756 
3757   if (!concurrent() && !has_aborted()) {
3758     // We should only do this during remark.
3759     if (G1CollectedHeap::use_parallel_gc_threads()) {
3760       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3761     } else {
3762       satb_mq_set.iterate_closure_all_threads();
3763     }
3764   }
3765 
3766   _draining_satb_buffers = false;
3767 
3768   assert(has_aborted() ||
3769          concurrent() ||
3770          satb_mq_set.completed_buffers_num() == 0, "invariant");
3771 
3772   if (G1CollectedHeap::use_parallel_gc_threads()) {
3773     satb_mq_set.set_par_closure(_task_id, NULL);
3774   } else {
3775     satb_mq_set.set_closure(NULL);
3776   }
3777 
3778   // again, this was a potentially expensive operation, decrease the
3779   // limits to get the regular clock call early
3780   decrease_limits();
3781 }
3782 
3783 void CMTask::print_stats() {
3784   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3785                          _task_id, _calls);
3786   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3787                          _elapsed_time_ms, _termination_time_ms);
3788   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3789                          _step_times_ms.num(), _step_times_ms.avg(),
3790                          _step_times_ms.sd());
3791   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3792                          _step_times_ms.maximum(), _step_times_ms.sum());
3793 
3794 #if _MARKING_STATS_
3795   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3796                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3797                          _all_clock_intervals_ms.sd());
3798   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3799                          _all_clock_intervals_ms.maximum(),
3800                          _all_clock_intervals_ms.sum());
3801   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3802                          _clock_due_to_scanning, _clock_due_to_marking);
3803   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3804                          _objs_scanned, _objs_found_on_bitmap);
3805   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3806                          _local_pushes, _local_pops, _local_max_size);
3807   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3808                          _global_pushes, _global_pops, _global_max_size);
3809   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3810                          _global_transfers_to,_global_transfers_from);
3811   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3812   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3813   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3814                          _steal_attempts, _steals);
3815   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3816   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3817                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3818   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3819                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3820 #endif // _MARKING_STATS_
3821 }
3822 
3823 /*****************************************************************************
3824 
3825     The do_marking_step(time_target_ms, ...) method is the building
3826     block of the parallel marking framework. It can be called in parallel
3827     with other invocations of do_marking_step() on different tasks
3828     (but only one per task, obviously) and concurrently with the
3829     mutator threads, or during remark, hence it eliminates the need
3830     for two versions of the code. When called during remark, it will
3831     pick up from where the task left off during the concurrent marking
3832     phase. Interestingly, tasks are also claimable during evacuation
3833     pauses too, since do_marking_step() ensures that it aborts before
3834     it needs to yield.
3835 
3836     The data structures that it uses to do marking work are the
3837     following:
3838 
3839       (1) Marking Bitmap. If there are gray objects that appear only
3840       on the bitmap (this happens either when dealing with an overflow
3841       or when the initial marking phase has simply marked the roots
3842       and didn't push them on the stack), then tasks claim heap
3843       regions whose bitmap they then scan to find gray objects. A
3844       global finger indicates where the end of the last claimed region
3845       is. A local finger indicates how far into the region a task has
3846       scanned. The two fingers are used to determine how to gray an
3847       object (i.e. whether simply marking it is OK, as it will be
3848       visited by a task in the future, or whether it needs to be also
3849       pushed on a stack).
3850 
3851       (2) Local Queue. The local queue of the task which is accessed
3852       reasonably efficiently by the task. Other tasks can steal from
3853       it when they run out of work. Throughout the marking phase, a
3854       task attempts to keep its local queue short but not totally
3855       empty, so that entries are available for stealing by other
3856       tasks. Only when there is no more work, a task will totally
3857       drain its local queue.
3858 
3859       (3) Global Mark Stack. This handles local queue overflow. During
3860       marking only sets of entries are moved between it and the local
3861       queues, as access to it requires a mutex and more fine-grain
3862       interaction with it which might cause contention. If it
3863       overflows, then the marking phase should restart and iterate
3864       over the bitmap to identify gray objects. Throughout the marking
3865       phase, tasks attempt to keep the global mark stack at a small
3866       length but not totally empty, so that entries are available for
3867       popping by other tasks. Only when there is no more work, tasks
3868       will totally drain the global mark stack.
3869 
3870       (4) SATB Buffer Queue. This is where completed SATB buffers are
3871       made available. Buffers are regularly removed from this queue
3872       and scanned for roots, so that the queue doesn't get too
3873       long. During remark, all completed buffers are processed, as
3874       well as the filled in parts of any uncompleted buffers.
3875 
3876     The do_marking_step() method tries to abort when the time target
3877     has been reached. There are a few other cases when the
3878     do_marking_step() method also aborts:
3879 
3880       (1) When the marking phase has been aborted (after a Full GC).
3881 
3882       (2) When a global overflow (on the global stack) has been
3883       triggered. Before the task aborts, it will actually sync up with
3884       the other tasks to ensure that all the marking data structures
3885       (local queues, stacks, fingers etc.)  are re-initialized so that
3886       when do_marking_step() completes, the marking phase can
3887       immediately restart.
3888 
3889       (3) When enough completed SATB buffers are available. The
3890       do_marking_step() method only tries to drain SATB buffers right
3891       at the beginning. So, if enough buffers are available, the
3892       marking step aborts and the SATB buffers are processed at
3893       the beginning of the next invocation.
3894 
3895       (4) To yield. when we have to yield then we abort and yield
3896       right at the end of do_marking_step(). This saves us from a lot
3897       of hassle as, by yielding we might allow a Full GC. If this
3898       happens then objects will be compacted underneath our feet, the
3899       heap might shrink, etc. We save checking for this by just
3900       aborting and doing the yield right at the end.
3901 
3902     From the above it follows that the do_marking_step() method should
3903     be called in a loop (or, otherwise, regularly) until it completes.
3904 
3905     If a marking step completes without its has_aborted() flag being
3906     true, it means it has completed the current marking phase (and
3907     also all other marking tasks have done so and have all synced up).
3908 
3909     A method called regular_clock_call() is invoked "regularly" (in
3910     sub ms intervals) throughout marking. It is this clock method that
3911     checks all the abort conditions which were mentioned above and
3912     decides when the task should abort. A work-based scheme is used to
3913     trigger this clock method: when the number of object words the
3914     marking phase has scanned or the number of references the marking
3915     phase has visited reach a given limit. Additional invocations to
3916     the method clock have been planted in a few other strategic places
3917     too. The initial reason for the clock method was to avoid calling
3918     vtime too regularly, as it is quite expensive. So, once it was in
3919     place, it was natural to piggy-back all the other conditions on it
3920     too and not constantly check them throughout the code.
3921 
3922     If do_termination is true then do_marking_step will enter its
3923     termination protocol.
3924 
3925     The value of is_serial must be true when do_marking_step is being
3926     called serially (i.e. by the VMThread) and do_marking_step should
3927     skip any synchronization in the termination and overflow code.
3928     Examples include the serial remark code and the serial reference
3929     processing closures.
3930 
3931     The value of is_serial must be false when do_marking_step is
3932     being called by any of the worker threads in a work gang.
3933     Examples include the concurrent marking code (CMMarkingTask),
3934     the MT remark code, and the MT reference processing closures.
3935 
3936  *****************************************************************************/
3937 
3938 void CMTask::do_marking_step(double time_target_ms,
3939                              bool do_termination,
3940                              bool is_serial) {
3941   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3942   assert(concurrent() == _cm->concurrent(), "they should be the same");
3943 
3944   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3945   assert(_task_queues != NULL, "invariant");
3946   assert(_task_queue != NULL, "invariant");
3947   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3948 
3949   assert(!_claimed,
3950          "only one thread should claim this task at any one time");
3951 
3952   // OK, this doesn't safeguard again all possible scenarios, as it is
3953   // possible for two threads to set the _claimed flag at the same
3954   // time. But it is only for debugging purposes anyway and it will
3955   // catch most problems.
3956   _claimed = true;
3957 
3958   _start_time_ms = os::elapsedVTime() * 1000.0;
3959   statsOnly( _interval_start_time_ms = _start_time_ms );
3960 
3961   // If do_stealing is true then do_marking_step will attempt to
3962   // steal work from the other CMTasks. It only makes sense to
3963   // enable stealing when the termination protocol is enabled
3964   // and do_marking_step() is not being called serially.
3965   bool do_stealing = do_termination && !is_serial;
3966 
3967   double diff_prediction_ms =
3968     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3969   _time_target_ms = time_target_ms - diff_prediction_ms;
3970 
3971   // set up the variables that are used in the work-based scheme to
3972   // call the regular clock method
3973   _words_scanned = 0;
3974   _refs_reached  = 0;
3975   recalculate_limits();
3976 
3977   // clear all flags
3978   clear_has_aborted();
3979   _has_timed_out = false;
3980   _draining_satb_buffers = false;
3981 
3982   ++_calls;
3983 
3984   if (_cm->verbose_low()) {
3985     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3986                            "target = %1.2lfms >>>>>>>>>>",
3987                            _task_id, _calls, _time_target_ms);
3988   }
3989 
3990   // Set up the bitmap and oop closures. Anything that uses them is
3991   // eventually called from this method, so it is OK to allocate these
3992   // statically.
3993   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3994   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3995   set_cm_oop_closure(&cm_oop_closure);
3996 
3997   if (_cm->has_overflown()) {
3998     // This can happen if the mark stack overflows during a GC pause
3999     // and this task, after a yield point, restarts. We have to abort
4000     // as we need to get into the overflow protocol which happens
4001     // right at the end of this task.
4002     set_has_aborted();
4003   }
4004 
4005   // First drain any available SATB buffers. After this, we will not
4006   // look at SATB buffers before the next invocation of this method.
4007   // If enough completed SATB buffers are queued up, the regular clock
4008   // will abort this task so that it restarts.
4009   drain_satb_buffers();
4010   // ...then partially drain the local queue and the global stack
4011   drain_local_queue(true);
4012   drain_global_stack(true);
4013 
4014   do {
4015     if (!has_aborted() && _curr_region != NULL) {
4016       // This means that we're already holding on to a region.
4017       assert(_finger != NULL, "if region is not NULL, then the finger "
4018              "should not be NULL either");
4019 
4020       // We might have restarted this task after an evacuation pause
4021       // which might have evacuated the region we're holding on to
4022       // underneath our feet. Let's read its limit again to make sure
4023       // that we do not iterate over a region of the heap that
4024       // contains garbage (update_region_limit() will also move
4025       // _finger to the start of the region if it is found empty).
4026       update_region_limit();
4027       // We will start from _finger not from the start of the region,
4028       // as we might be restarting this task after aborting half-way
4029       // through scanning this region. In this case, _finger points to
4030       // the address where we last found a marked object. If this is a
4031       // fresh region, _finger points to start().
4032       MemRegion mr = MemRegion(_finger, _region_limit);
4033 
4034       if (_cm->verbose_low()) {
4035         gclog_or_tty->print_cr("[%d] we're scanning part "
4036                                "["PTR_FORMAT", "PTR_FORMAT") "
4037                                "of region "PTR_FORMAT,
4038                                _task_id, _finger, _region_limit, _curr_region);
4039       }
4040 
4041       // Let's iterate over the bitmap of the part of the
4042       // region that is left.
4043       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4044         // We successfully completed iterating over the region. Now,
4045         // let's give up the region.
4046         giveup_current_region();
4047         regular_clock_call();
4048       } else {
4049         assert(has_aborted(), "currently the only way to do so");
4050         // The only way to abort the bitmap iteration is to return
4051         // false from the do_bit() method. However, inside the
4052         // do_bit() method we move the _finger to point to the
4053         // object currently being looked at. So, if we bail out, we
4054         // have definitely set _finger to something non-null.
4055         assert(_finger != NULL, "invariant");
4056 
4057         // Region iteration was actually aborted. So now _finger
4058         // points to the address of the object we last scanned. If we
4059         // leave it there, when we restart this task, we will rescan
4060         // the object. It is easy to avoid this. We move the finger by
4061         // enough to point to the next possible object header (the
4062         // bitmap knows by how much we need to move it as it knows its
4063         // granularity).
4064         assert(_finger < _region_limit, "invariant");
4065         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4066         // Check if bitmap iteration was aborted while scanning the last object
4067         if (new_finger >= _region_limit) {
4068           giveup_current_region();
4069         } else {
4070           move_finger_to(new_finger);
4071         }
4072       }
4073     }
4074     // At this point we have either completed iterating over the
4075     // region we were holding on to, or we have aborted.
4076 
4077     // We then partially drain the local queue and the global stack.
4078     // (Do we really need this?)
4079     drain_local_queue(true);
4080     drain_global_stack(true);
4081 
4082     // Read the note on the claim_region() method on why it might
4083     // return NULL with potentially more regions available for
4084     // claiming and why we have to check out_of_regions() to determine
4085     // whether we're done or not.
4086     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4087       // We are going to try to claim a new region. We should have
4088       // given up on the previous one.
4089       // Separated the asserts so that we know which one fires.
4090       assert(_curr_region  == NULL, "invariant");
4091       assert(_finger       == NULL, "invariant");
4092       assert(_region_limit == NULL, "invariant");
4093       if (_cm->verbose_low()) {
4094         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4095       }
4096       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4097       if (claimed_region != NULL) {
4098         // Yes, we managed to claim one
4099         statsOnly( ++_regions_claimed );
4100 
4101         if (_cm->verbose_low()) {
4102           gclog_or_tty->print_cr("[%d] we successfully claimed "
4103                                  "region "PTR_FORMAT,
4104                                  _task_id, claimed_region);
4105         }
4106 
4107         setup_for_region(claimed_region);
4108         assert(_curr_region == claimed_region, "invariant");
4109       }
4110       // It is important to call the regular clock here. It might take
4111       // a while to claim a region if, for example, we hit a large
4112       // block of empty regions. So we need to call the regular clock
4113       // method once round the loop to make sure it's called
4114       // frequently enough.
4115       regular_clock_call();
4116     }
4117 
4118     if (!has_aborted() && _curr_region == NULL) {
4119       assert(_cm->out_of_regions(),
4120              "at this point we should be out of regions");
4121     }
4122   } while ( _curr_region != NULL && !has_aborted());
4123 
4124   if (!has_aborted()) {
4125     // We cannot check whether the global stack is empty, since other
4126     // tasks might be pushing objects to it concurrently.
4127     assert(_cm->out_of_regions(),
4128            "at this point we should be out of regions");
4129 
4130     if (_cm->verbose_low()) {
4131       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4132     }
4133 
4134     // Try to reduce the number of available SATB buffers so that
4135     // remark has less work to do.
4136     drain_satb_buffers();
4137   }
4138 
4139   // Since we've done everything else, we can now totally drain the
4140   // local queue and global stack.
4141   drain_local_queue(false);
4142   drain_global_stack(false);
4143 
4144   // Attempt at work stealing from other task's queues.
4145   if (do_stealing && !has_aborted()) {
4146     // We have not aborted. This means that we have finished all that
4147     // we could. Let's try to do some stealing...
4148 
4149     // We cannot check whether the global stack is empty, since other
4150     // tasks might be pushing objects to it concurrently.
4151     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4152            "only way to reach here");
4153 
4154     if (_cm->verbose_low()) {
4155       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4156     }
4157 
4158     while (!has_aborted()) {
4159       oop obj;
4160       statsOnly( ++_steal_attempts );
4161 
4162       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4163         if (_cm->verbose_medium()) {
4164           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4165                                  _task_id, (void*) obj);
4166         }
4167 
4168         statsOnly( ++_steals );
4169 
4170         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4171                "any stolen object should be marked");
4172         scan_object(obj);
4173 
4174         // And since we're towards the end, let's totally drain the
4175         // local queue and global stack.
4176         drain_local_queue(false);
4177         drain_global_stack(false);
4178       } else {
4179         break;
4180       }
4181     }
4182   }
4183 
4184   // If we are about to wrap up and go into termination, check if we
4185   // should raise the overflow flag.
4186   if (do_termination && !has_aborted()) {
4187     if (_cm->force_overflow()->should_force()) {
4188       _cm->set_has_overflown();
4189       regular_clock_call();
4190     }
4191   }
4192 
4193   // We still haven't aborted. Now, let's try to get into the
4194   // termination protocol.
4195   if (do_termination && !has_aborted()) {
4196     // We cannot check whether the global stack is empty, since other
4197     // tasks might be concurrently pushing objects on it.
4198     // Separated the asserts so that we know which one fires.
4199     assert(_cm->out_of_regions(), "only way to reach here");
4200     assert(_task_queue->size() == 0, "only way to reach here");
4201 
4202     if (_cm->verbose_low()) {
4203       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4204     }
4205 
4206     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4207 
4208     // The CMTask class also extends the TerminatorTerminator class,
4209     // hence its should_exit_termination() method will also decide
4210     // whether to exit the termination protocol or not.
4211     bool finished = (is_serial ||
4212                      _cm->terminator()->offer_termination(this));
4213     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4214     _termination_time_ms +=
4215       termination_end_time_ms - _termination_start_time_ms;
4216 
4217     if (finished) {
4218       // We're all done.
4219 
4220       if (_task_id == 0) {
4221         // let's allow task 0 to do this
4222         if (concurrent()) {
4223           assert(_cm->concurrent_marking_in_progress(), "invariant");
4224           // we need to set this to false before the next
4225           // safepoint. This way we ensure that the marking phase
4226           // doesn't observe any more heap expansions.
4227           _cm->clear_concurrent_marking_in_progress();
4228         }
4229       }
4230 
4231       // We can now guarantee that the global stack is empty, since
4232       // all other tasks have finished. We separated the guarantees so
4233       // that, if a condition is false, we can immediately find out
4234       // which one.
4235       guarantee(_cm->out_of_regions(), "only way to reach here");
4236       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4237       guarantee(_task_queue->size() == 0, "only way to reach here");
4238       guarantee(!_cm->has_overflown(), "only way to reach here");
4239       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4240 
4241       if (_cm->verbose_low()) {
4242         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4243       }
4244     } else {
4245       // Apparently there's more work to do. Let's abort this task. It
4246       // will restart it and we can hopefully find more things to do.
4247 
4248       if (_cm->verbose_low()) {
4249         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4250                                _task_id);
4251       }
4252 
4253       set_has_aborted();
4254       statsOnly( ++_aborted_termination );
4255     }
4256   }
4257 
4258   // Mainly for debugging purposes to make sure that a pointer to the
4259   // closure which was statically allocated in this frame doesn't
4260   // escape it by accident.
4261   set_cm_oop_closure(NULL);
4262   double end_time_ms = os::elapsedVTime() * 1000.0;
4263   double elapsed_time_ms = end_time_ms - _start_time_ms;
4264   // Update the step history.
4265   _step_times_ms.add(elapsed_time_ms);
4266 
4267   if (has_aborted()) {
4268     // The task was aborted for some reason.
4269 
4270     statsOnly( ++_aborted );
4271 
4272     if (_has_timed_out) {
4273       double diff_ms = elapsed_time_ms - _time_target_ms;
4274       // Keep statistics of how well we did with respect to hitting
4275       // our target only if we actually timed out (if we aborted for
4276       // other reasons, then the results might get skewed).
4277       _marking_step_diffs_ms.add(diff_ms);
4278     }
4279 
4280     if (_cm->has_overflown()) {
4281       // This is the interesting one. We aborted because a global
4282       // overflow was raised. This means we have to restart the
4283       // marking phase and start iterating over regions. However, in
4284       // order to do this we have to make sure that all tasks stop
4285       // what they are doing and re-initialise in a safe manner. We
4286       // will achieve this with the use of two barrier sync points.
4287 
4288       if (_cm->verbose_low()) {
4289         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4290       }
4291 
4292       if (!is_serial) {
4293         // We only need to enter the sync barrier if being called
4294         // from a parallel context
4295         _cm->enter_first_sync_barrier(_task_id);
4296 
4297         // When we exit this sync barrier we know that all tasks have
4298         // stopped doing marking work. So, it's now safe to
4299         // re-initialise our data structures. At the end of this method,
4300         // task 0 will clear the global data structures.
4301       }
4302 
4303       statsOnly( ++_aborted_overflow );
4304 
4305       // We clear the local state of this task...
4306       clear_region_fields();
4307 
4308       if (!is_serial) {
4309         // ...and enter the second barrier.
4310         _cm->enter_second_sync_barrier(_task_id);
4311       }
4312       // At this point, if we're during the concurrent phase of
4313       // marking, everything has been re-initialized and we're
4314       // ready to restart.
4315     }
4316 
4317     if (_cm->verbose_low()) {
4318       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4319                              "elapsed = %1.2lfms <<<<<<<<<<",
4320                              _task_id, _time_target_ms, elapsed_time_ms);
4321       if (_cm->has_aborted()) {
4322         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4323                                _task_id);
4324       }
4325     }
4326   } else {
4327     if (_cm->verbose_low()) {
4328       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4329                              "elapsed = %1.2lfms <<<<<<<<<<",
4330                              _task_id, _time_target_ms, elapsed_time_ms);
4331     }
4332   }
4333 
4334   _claimed = false;
4335 }
4336 
4337 CMTask::CMTask(int task_id,
4338                ConcurrentMark* cm,
4339                size_t* marked_bytes,
4340                BitMap* card_bm,
4341                CMTaskQueue* task_queue,
4342                CMTaskQueueSet* task_queues)
4343   : _g1h(G1CollectedHeap::heap()),
4344     _task_id(task_id), _cm(cm),
4345     _claimed(false),
4346     _nextMarkBitMap(NULL), _hash_seed(17),
4347     _task_queue(task_queue),
4348     _task_queues(task_queues),
4349     _cm_oop_closure(NULL),
4350     _marked_bytes_array(marked_bytes),
4351     _card_bm(card_bm) {
4352   guarantee(task_queue != NULL, "invariant");
4353   guarantee(task_queues != NULL, "invariant");
4354 
4355   statsOnly( _clock_due_to_scanning = 0;
4356              _clock_due_to_marking  = 0 );
4357 
4358   _marking_step_diffs_ms.add(0.5);
4359 }
4360 
4361 // These are formatting macros that are used below to ensure
4362 // consistent formatting. The *_H_* versions are used to format the
4363 // header for a particular value and they should be kept consistent
4364 // with the corresponding macro. Also note that most of the macros add
4365 // the necessary white space (as a prefix) which makes them a bit
4366 // easier to compose.
4367 
4368 // All the output lines are prefixed with this string to be able to
4369 // identify them easily in a large log file.
4370 #define G1PPRL_LINE_PREFIX            "###"
4371 
4372 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4373 #ifdef _LP64
4374 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4375 #else // _LP64
4376 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4377 #endif // _LP64
4378 
4379 // For per-region info
4380 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4381 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4382 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4383 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4384 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4385 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4386 
4387 // For summary info
4388 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4389 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4390 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4391 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4392 
4393 G1PrintRegionLivenessInfoClosure::
4394 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4395   : _out(out),
4396     _total_used_bytes(0), _total_capacity_bytes(0),
4397     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4398     _hum_used_bytes(0), _hum_capacity_bytes(0),
4399     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4400   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4401   MemRegion g1_committed = g1h->g1_committed();
4402   MemRegion g1_reserved = g1h->g1_reserved();
4403   double now = os::elapsedTime();
4404 
4405   // Print the header of the output.
4406   _out->cr();
4407   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4408   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4409                  G1PPRL_SUM_ADDR_FORMAT("committed")
4410                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4411                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4412                  g1_committed.start(), g1_committed.end(),
4413                  g1_reserved.start(), g1_reserved.end(),
4414                  HeapRegion::GrainBytes);
4415   _out->print_cr(G1PPRL_LINE_PREFIX);
4416   _out->print_cr(G1PPRL_LINE_PREFIX
4417                  G1PPRL_TYPE_H_FORMAT
4418                  G1PPRL_ADDR_BASE_H_FORMAT
4419                  G1PPRL_BYTE_H_FORMAT
4420                  G1PPRL_BYTE_H_FORMAT
4421                  G1PPRL_BYTE_H_FORMAT
4422                  G1PPRL_DOUBLE_H_FORMAT,
4423                  "type", "address-range",
4424                  "used", "prev-live", "next-live", "gc-eff");
4425   _out->print_cr(G1PPRL_LINE_PREFIX
4426                  G1PPRL_TYPE_H_FORMAT
4427                  G1PPRL_ADDR_BASE_H_FORMAT
4428                  G1PPRL_BYTE_H_FORMAT
4429                  G1PPRL_BYTE_H_FORMAT
4430                  G1PPRL_BYTE_H_FORMAT
4431                  G1PPRL_DOUBLE_H_FORMAT,
4432                  "", "",
4433                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4434 }
4435 
4436 // It takes as a parameter a reference to one of the _hum_* fields, it
4437 // deduces the corresponding value for a region in a humongous region
4438 // series (either the region size, or what's left if the _hum_* field
4439 // is < the region size), and updates the _hum_* field accordingly.
4440 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4441   size_t bytes = 0;
4442   // The > 0 check is to deal with the prev and next live bytes which
4443   // could be 0.
4444   if (*hum_bytes > 0) {
4445     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4446     *hum_bytes -= bytes;
4447   }
4448   return bytes;
4449 }
4450 
4451 // It deduces the values for a region in a humongous region series
4452 // from the _hum_* fields and updates those accordingly. It assumes
4453 // that that _hum_* fields have already been set up from the "starts
4454 // humongous" region and we visit the regions in address order.
4455 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4456                                                      size_t* capacity_bytes,
4457                                                      size_t* prev_live_bytes,
4458                                                      size_t* next_live_bytes) {
4459   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4460   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4461   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4462   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4463   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4464 }
4465 
4466 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4467   const char* type = "";
4468   HeapWord* bottom       = r->bottom();
4469   HeapWord* end          = r->end();
4470   size_t capacity_bytes  = r->capacity();
4471   size_t used_bytes      = r->used();
4472   size_t prev_live_bytes = r->live_bytes();
4473   size_t next_live_bytes = r->next_live_bytes();
4474   double gc_eff          = r->gc_efficiency();
4475   if (r->used() == 0) {
4476     type = "FREE";
4477   } else if (r->is_survivor()) {
4478     type = "SURV";
4479   } else if (r->is_young()) {
4480     type = "EDEN";
4481   } else if (r->startsHumongous()) {
4482     type = "HUMS";
4483 
4484     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4485            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4486            "they should have been zeroed after the last time we used them");
4487     // Set up the _hum_* fields.
4488     _hum_capacity_bytes  = capacity_bytes;
4489     _hum_used_bytes      = used_bytes;
4490     _hum_prev_live_bytes = prev_live_bytes;
4491     _hum_next_live_bytes = next_live_bytes;
4492     get_hum_bytes(&used_bytes, &capacity_bytes,
4493                   &prev_live_bytes, &next_live_bytes);
4494     end = bottom + HeapRegion::GrainWords;
4495   } else if (r->continuesHumongous()) {
4496     type = "HUMC";
4497     get_hum_bytes(&used_bytes, &capacity_bytes,
4498                   &prev_live_bytes, &next_live_bytes);
4499     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4500   } else {
4501     type = "OLD";
4502   }
4503 
4504   _total_used_bytes      += used_bytes;
4505   _total_capacity_bytes  += capacity_bytes;
4506   _total_prev_live_bytes += prev_live_bytes;
4507   _total_next_live_bytes += next_live_bytes;
4508 
4509   // Print a line for this particular region.
4510   _out->print_cr(G1PPRL_LINE_PREFIX
4511                  G1PPRL_TYPE_FORMAT
4512                  G1PPRL_ADDR_BASE_FORMAT
4513                  G1PPRL_BYTE_FORMAT
4514                  G1PPRL_BYTE_FORMAT
4515                  G1PPRL_BYTE_FORMAT
4516                  G1PPRL_DOUBLE_FORMAT,
4517                  type, bottom, end,
4518                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4519 
4520   return false;
4521 }
4522 
4523 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4524   // Print the footer of the output.
4525   _out->print_cr(G1PPRL_LINE_PREFIX);
4526   _out->print_cr(G1PPRL_LINE_PREFIX
4527                  " SUMMARY"
4528                  G1PPRL_SUM_MB_FORMAT("capacity")
4529                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4530                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4531                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4532                  bytes_to_mb(_total_capacity_bytes),
4533                  bytes_to_mb(_total_used_bytes),
4534                  perc(_total_used_bytes, _total_capacity_bytes),
4535                  bytes_to_mb(_total_prev_live_bytes),
4536                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4537                  bytes_to_mb(_total_next_live_bytes),
4538                  perc(_total_next_live_bytes, _total_capacity_bytes));
4539   _out->cr();
4540 }