Old src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "services/memTracker.hpp"
  49 
  50 // Concurrent marking bit map wrapper
  51 
  52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  53   _bm((uintptr_t*)NULL,0),
  54   _shifter(shifter) {
  55   _bmStartWord = (HeapWord*)(rs.base());
  56   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  57   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  58                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  59 
  60   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  61 
  62   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  63   // For now we'll just commit all of the bit map up fromt.
  64   // Later on we'll try to be more parsimonious with swap.
  65   guarantee(_virtual_space.initialize(brs, brs.size()),
  66             "couldn't reseve backing store for concurrent marking bit map");
  67   assert(_virtual_space.committed_size() == brs.size(),
  68          "didn't reserve backing store for all of concurrent marking bit map?");
  69   _bm.set_map((uintptr_t*)_virtual_space.low());
  70   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  71          _bmWordSize, "inconsistency in bit map sizing");
  72   _bm.set_size(_bmWordSize >> _shifter);
  73 }
  74 
  75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  76                                                HeapWord* limit) const {
  77   // First we must round addr *up* to a possible object boundary.
  78   addr = (HeapWord*)align_size_up((intptr_t)addr,
  79                                   HeapWordSize << _shifter);
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  94                                                  HeapWord* limit) const {
  95   size_t addrOffset = heapWordToOffset(addr);
  96   if (limit == NULL) {
  97     limit = _bmStartWord + _bmWordSize;
  98   }
  99   size_t limitOffset = heapWordToOffset(limit);
 100   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
 101   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
 102   assert(nextAddr >= addr, "get_next_one postcondition");
 103   assert(nextAddr == limit || !isMarked(nextAddr),
 104          "get_next_one postcondition");
 105   return nextAddr;
 106 }
 107 
 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 109   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 110   return (int) (diff >> _shifter);
 111 }
 112 
 113 #ifndef PRODUCT
 114 bool CMBitMapRO::covers(ReservedSpace rs) const {
 115   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 116   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 117          "size inconsistency");
 118   return _bmStartWord == (HeapWord*)(rs.base()) &&
 119          _bmWordSize  == rs.size()>>LogHeapWordSize;
 120 }
 121 #endif
 122 
 123 void CMBitMap::clearAll() {
 124   _bm.clear();
 125   return;
 126 }
 127 
 128 void CMBitMap::markRange(MemRegion mr) {
 129   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 130   assert(!mr.is_empty(), "unexpected empty region");
 131   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 132           ((HeapWord *) mr.end())),
 133          "markRange memory region end is not card aligned");
 134   // convert address range into offset range
 135   _bm.at_put_range(heapWordToOffset(mr.start()),
 136                    heapWordToOffset(mr.end()), true);
 137 }
 138 
 139 void CMBitMap::clearRange(MemRegion mr) {
 140   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 141   assert(!mr.is_empty(), "unexpected empty region");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), false);
 145 }
 146 
 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 148                                             HeapWord* end_addr) {
 149   HeapWord* start = getNextMarkedWordAddress(addr);
 150   start = MIN2(start, end_addr);
 151   HeapWord* end   = getNextUnmarkedWordAddress(start);
 152   end = MIN2(end, end_addr);
 153   assert(start <= end, "Consistency check");
 154   MemRegion mr(start, end);
 155   if (!mr.is_empty()) {
 156     clearRange(mr);
 157   }
 158   return mr;
 159 }
 160 
 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 162   _base(NULL), _cm(cm)
 163 #ifdef ASSERT
 164   , _drain_in_progress(false)
 165   , _drain_in_progress_yields(false)
 166 #endif
 167 {}
 168 
 169 void CMMarkStack::allocate(size_t size) {
 170   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 171   if (_base == NULL) {
 172     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 173   }
 174   _index = 0;
 175   _capacity = (jint) size;
 176   _saved_index = -1;
 177   NOT_PRODUCT(_max_depth = 0);
 178 }
 179 
 180 CMMarkStack::~CMMarkStack() {
 181   if (_base != NULL) {
 182     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 183   }
 184 }
 185 
 186 void CMMarkStack::par_push(oop ptr) {
 187   while (true) {
 188     if (isFull()) {
 189       _overflow = true;
 190       return;
 191     }
 192     // Otherwise...
 193     jint index = _index;
 194     jint next_index = index+1;
 195     jint res = Atomic::cmpxchg(next_index, &_index, index);
 196     if (res == index) {
 197       _base[index] = ptr;
 198       // Note that we don't maintain this atomically.  We could, but it
 199       // doesn't seem necessary.
 200       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 201       return;
 202     }
 203     // Otherwise, we need to try again.
 204   }
 205 }
 206 
 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 208   while (true) {
 209     if (isFull()) {
 210       _overflow = true;
 211       return;
 212     }
 213     // Otherwise...
 214     jint index = _index;
 215     jint next_index = index + n;
 216     if (next_index > _capacity) {
 217       _overflow = true;
 218       return;
 219     }
 220     jint res = Atomic::cmpxchg(next_index, &_index, index);
 221     if (res == index) {
 222       for (int i = 0; i < n; i++) {
 223         int ind = index + i;
 224         assert(ind < _capacity, "By overflow test above.");
 225         _base[ind] = ptr_arr[i];
 226       }
 227       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 228       return;
 229     }
 230     // Otherwise, we need to try again.
 231   }
 232 }
 233 
 234 
 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 236   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 237   jint start = _index;
 238   jint next_index = start + n;
 239   if (next_index > _capacity) {
 240     _overflow = true;
 241     return;
 242   }
 243   // Otherwise.
 244   _index = next_index;
 245   for (int i = 0; i < n; i++) {
 246     int ind = start + i;
 247     assert(ind < _capacity, "By overflow test above.");
 248     _base[ind] = ptr_arr[i];
 249   }
 250 }
 251 
 252 
 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 254   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 255   jint index = _index;
 256   if (index == 0) {
 257     *n = 0;
 258     return false;
 259   } else {
 260     int k = MIN2(max, index);
 261     jint new_ind = index - k;
 262     for (int j = 0; j < k; j++) {
 263       ptr_arr[j] = _base[new_ind + j];
 264     }
 265     _index = new_ind;
 266     *n = k;
 267     return true;
 268   }
 269 }
 270 
 271 template<class OopClosureClass>
 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 273   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 274          || SafepointSynchronize::is_at_safepoint(),
 275          "Drain recursion must be yield-safe.");
 276   bool res = true;
 277   debug_only(_drain_in_progress = true);
 278   debug_only(_drain_in_progress_yields = yield_after);
 279   while (!isEmpty()) {
 280     oop newOop = pop();
 281     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 282     assert(newOop->is_oop(), "Expected an oop");
 283     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 284            "only grey objects on this stack");
 285     newOop->oop_iterate(cl);
 286     if (yield_after && _cm->do_yield_check()) {
 287       res = false;
 288       break;
 289     }
 290   }
 291   debug_only(_drain_in_progress = false);
 292   return res;
 293 }
 294 
 295 void CMMarkStack::note_start_of_gc() {
 296   assert(_saved_index == -1,
 297          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 298   _saved_index = _index;
 299 }
 300 
 301 void CMMarkStack::note_end_of_gc() {
 302   // This is intentionally a guarantee, instead of an assert. If we
 303   // accidentally add something to the mark stack during GC, it
 304   // will be a correctness issue so it's better if we crash. we'll
 305   // only check this once per GC anyway, so it won't be a performance
 306   // issue in any way.
 307   guarantee(_saved_index == _index,
 308             err_msg("saved index: %d index: %d", _saved_index, _index));
 309   _saved_index = -1;
 310 }
 311 
 312 void CMMarkStack::oops_do(OopClosure* f) {
 313   assert(_saved_index == _index,
 314          err_msg("saved index: %d index: %d", _saved_index, _index));
 315   for (int i = 0; i < _index; i += 1) {
 316     f->do_oop(&_base[i]);
 317   }
 318 }
 319 
 320 bool ConcurrentMark::not_yet_marked(oop obj) const {
 321   return (_g1h->is_obj_ill(obj)
 322           || (_g1h->is_in_permanent(obj)
 323               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 324 }
 325 
 326 CMRootRegions::CMRootRegions() :
 327   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 328   _should_abort(false),  _next_survivor(NULL) { }
 329 
 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 331   _young_list = g1h->young_list();
 332   _cm = cm;
 333 }
 334 
 335 void CMRootRegions::prepare_for_scan() {
 336   assert(!scan_in_progress(), "pre-condition");
 337 
 338   // Currently, only survivors can be root regions.
 339   assert(_next_survivor == NULL, "pre-condition");
 340   _next_survivor = _young_list->first_survivor_region();
 341   _scan_in_progress = (_next_survivor != NULL);
 342   _should_abort = false;
 343 }
 344 
 345 HeapRegion* CMRootRegions::claim_next() {
 346   if (_should_abort) {
 347     // If someone has set the should_abort flag, we return NULL to
 348     // force the caller to bail out of their loop.
 349     return NULL;
 350   }
 351 
 352   // Currently, only survivors can be root regions.
 353   HeapRegion* res = _next_survivor;
 354   if (res != NULL) {
 355     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 356     // Read it again in case it changed while we were waiting for the lock.
 357     res = _next_survivor;
 358     if (res != NULL) {
 359       if (res == _young_list->last_survivor_region()) {
 360         // We just claimed the last survivor so store NULL to indicate
 361         // that we're done.
 362         _next_survivor = NULL;
 363       } else {
 364         _next_survivor = res->get_next_young_region();
 365       }
 366     } else {
 367       // Someone else claimed the last survivor while we were trying
 368       // to take the lock so nothing else to do.
 369     }
 370   }
 371   assert(res == NULL || res->is_survivor(), "post-condition");
 372 
 373   return res;
 374 }
 375 
 376 void CMRootRegions::scan_finished() {
 377   assert(scan_in_progress(), "pre-condition");
 378 
 379   // Currently, only survivors can be root regions.
 380   if (!_should_abort) {
 381     assert(_next_survivor == NULL, "we should have claimed all survivors");
 382   }
 383   _next_survivor = NULL;
 384 
 385   {
 386     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 387     _scan_in_progress = false;
 388     RootRegionScan_lock->notify_all();
 389   }
 390 }
 391 
 392 bool CMRootRegions::wait_until_scan_finished() {
 393   if (!scan_in_progress()) return false;
 394 
 395   {
 396     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 397     while (scan_in_progress()) {
 398       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 399     }
 400   }
 401   return true;
 402 }
 403 
 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 406 #endif // _MSC_VER
 407 
 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 409   return MAX2((n_par_threads + 2) / 4, 1U);
 410 }
 411 
 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 413   _markBitMap1(rs, MinObjAlignment - 1),
 414   _markBitMap2(rs, MinObjAlignment - 1),
 415 
 416   _parallel_marking_threads(0),
 417   _max_parallel_marking_threads(0),
 418   _sleep_factor(0.0),
 419   _marking_task_overhead(1.0),
 420   _cleanup_sleep_factor(0.0),
 421   _cleanup_task_overhead(1.0),
 422   _cleanup_list("Cleanup List"),
 423   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 424   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 425            CardTableModRefBS::card_shift,
 426            false /* in_resource_area*/),
 427 
 428   _prevMarkBitMap(&_markBitMap1),
 429   _nextMarkBitMap(&_markBitMap2),
 430 
 431   _markStack(this),
 432   // _finger set in set_non_marking_state
 433 
 434   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 435   // _active_tasks set in set_non_marking_state
 436   // _tasks set inside the constructor
 437   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 438   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 439 
 440   _has_overflown(false),
 441   _concurrent(false),
 442   _has_aborted(false),
 443   _restart_for_overflow(false),
 444   _concurrent_marking_in_progress(false),
 445 
 446   // _verbose_level set below
 447 
 448   _init_times(),
 449   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 450   _cleanup_times(),
 451   _total_counting_time(0.0),
 452   _total_rs_scrub_time(0.0),
 453 
 454   _parallel_workers(NULL),
 455 
 456   _count_card_bitmaps(NULL),
 457   _count_marked_bytes(NULL) {
 458   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 459   if (verbose_level < no_verbose) {
 460     verbose_level = no_verbose;
 461   }
 462   if (verbose_level > high_verbose) {
 463     verbose_level = high_verbose;
 464   }
 465   _verbose_level = verbose_level;
 466 
 467   if (verbose_low()) {
 468     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 469                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 470   }
 471 
 472   _markStack.allocate(MarkStackSize);
 473 
 474   // Create & start a ConcurrentMark thread.
 475   _cmThread = new ConcurrentMarkThread(this);
 476   assert(cmThread() != NULL, "CM Thread should have been created");
 477   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 478 
 479   _g1h = G1CollectedHeap::heap();
 480   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 481   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 482   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 483 
 484   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 485   satb_qs.set_buffer_size(G1SATBBufferSize);
 486 
 487   _root_regions.init(_g1h, this);
 488 
 489   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 490   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 491 
 492   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 493   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 494 
 495   BitMap::idx_t card_bm_size = _card_bm.size();
 496 
 497   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 498   _active_tasks = _max_task_num;
 499   for (int i = 0; i < (int) _max_task_num; ++i) {
 500     CMTaskQueue* task_queue = new CMTaskQueue();
 501     task_queue->initialize();
 502     _task_queues->register_queue(i, task_queue);
 503 
 504     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 505     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 506 
 507     _tasks[i] = new CMTask(i, this,
 508                            _count_marked_bytes[i],
 509                            &_count_card_bitmaps[i],
 510                            task_queue, _task_queues);
 511 
 512     _accum_task_vtime[i] = 0.0;
 513   }
 514 
 515   // Calculate the card number for the bottom of the heap. Used
 516   // in biasing indexes into the accounting card bitmaps.
 517   _heap_bottom_card_num =
 518     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 519                                 CardTableModRefBS::card_shift);
 520 
 521   // Clear all the liveness counting data
 522   clear_all_count_data();
 523 
 524   if (ConcGCThreads > ParallelGCThreads) {
 525     vm_exit_during_initialization("Can't have more ConcGCThreads "
 526                                   "than ParallelGCThreads.");
 527   }
 528   if (ParallelGCThreads == 0) {
 529     // if we are not running with any parallel GC threads we will not
 530     // spawn any marking threads either
 531     _parallel_marking_threads =       0;
 532     _max_parallel_marking_threads =   0;
 533     _sleep_factor             =     0.0;
 534     _marking_task_overhead    =     1.0;
 535   } else {
 536     if (ConcGCThreads > 0) {
 537       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 538       // if both are set
 539 
 540       _parallel_marking_threads = (uint) ConcGCThreads;
 541       _max_parallel_marking_threads = _parallel_marking_threads;
 542       _sleep_factor             = 0.0;
 543       _marking_task_overhead    = 1.0;
 544     } else if (G1MarkingOverheadPercent > 0) {
 545       // we will calculate the number of parallel marking threads
 546       // based on a target overhead with respect to the soft real-time
 547       // goal
 548 
 549       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 550       double overall_cm_overhead =
 551         (double) MaxGCPauseMillis * marking_overhead /
 552         (double) GCPauseIntervalMillis;
 553       double cpu_ratio = 1.0 / (double) os::processor_count();
 554       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 555       double marking_task_overhead =
 556         overall_cm_overhead / marking_thread_num *
 557                                                 (double) os::processor_count();
 558       double sleep_factor =
 559                          (1.0 - marking_task_overhead) / marking_task_overhead;
 560 
 561       _parallel_marking_threads = (uint) marking_thread_num;
 562       _max_parallel_marking_threads = _parallel_marking_threads;
 563       _sleep_factor             = sleep_factor;
 564       _marking_task_overhead    = marking_task_overhead;
 565     } else {
 566       _parallel_marking_threads = scale_parallel_threads((uint)ParallelGCThreads);
 567       _max_parallel_marking_threads = _parallel_marking_threads;
 568       _sleep_factor             = 0.0;
 569       _marking_task_overhead    = 1.0;
 570     }
 571 
 572     if (parallel_marking_threads() > 1) {
 573       _cleanup_task_overhead = 1.0;
 574     } else {
 575       _cleanup_task_overhead = marking_task_overhead();
 576     }
 577     _cleanup_sleep_factor =
 578                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 579 
 580 #if 0
 581     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 582     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 583     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 584     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 585     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 586 #endif
 587 
 588     guarantee(parallel_marking_threads() > 0, "peace of mind");
 589     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 590          _max_parallel_marking_threads, false, true);
 591     if (_parallel_workers == NULL) {
 592       vm_exit_during_initialization("Failed necessary allocation.");
 593     } else {
 594       _parallel_workers->initialize_workers();
 595     }
 596   }
 597 
 598   // so that the call below can read a sensible value
 599   _heap_start = (HeapWord*) rs.base();
 600   set_non_marking_state();
 601 }
 602 
 603 void ConcurrentMark::update_g1_committed(bool force) {
 604   // If concurrent marking is not in progress, then we do not need to
 605   // update _heap_end.
 606   if (!concurrent_marking_in_progress() && !force) return;
 607 
 608   MemRegion committed = _g1h->g1_committed();
 609   assert(committed.start() == _heap_start, "start shouldn't change");
 610   HeapWord* new_end = committed.end();
 611   if (new_end > _heap_end) {
 612     // The heap has been expanded.
 613 
 614     _heap_end = new_end;
 615   }
 616   // Notice that the heap can also shrink. However, this only happens
 617   // during a Full GC (at least currently) and the entire marking
 618   // phase will bail out and the task will not be restarted. So, let's
 619   // do nothing.
 620 }
 621 
 622 void ConcurrentMark::reset() {
 623   // Starting values for these two. This should be called in a STW
 624   // phase. CM will be notified of any future g1_committed expansions
 625   // will be at the end of evacuation pauses, when tasks are
 626   // inactive.
 627   MemRegion committed = _g1h->g1_committed();
 628   _heap_start = committed.start();
 629   _heap_end   = committed.end();
 630 
 631   // Separated the asserts so that we know which one fires.
 632   assert(_heap_start != NULL, "heap bounds should look ok");
 633   assert(_heap_end != NULL, "heap bounds should look ok");
 634   assert(_heap_start < _heap_end, "heap bounds should look ok");
 635 
 636   // Reset all the marking data structures and any necessary flags
 637   reset_marking_state();
 638 
 639   if (verbose_low()) {
 640     gclog_or_tty->print_cr("[global] resetting");
 641   }
 642 
 643   // We do reset all of them, since different phases will use
 644   // different number of active threads. So, it's easiest to have all
 645   // of them ready.
 646   for (int i = 0; i < (int) _max_task_num; ++i) {
 647     _tasks[i]->reset(_nextMarkBitMap);
 648   }
 649 
 650   // we need this to make sure that the flag is on during the evac
 651   // pause with initial mark piggy-backed
 652   set_concurrent_marking_in_progress();
 653 }
 654 
 655 
 656 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 657   _markStack.setEmpty();
 658   _markStack.clear_overflow();
 659   if (clear_overflow) {
 660     clear_has_overflown();
 661   } else {
 662     assert(has_overflown(), "pre-condition");
 663   }
 664   _finger = _heap_start;
 665 
 666   for (uint i = 0; i < _max_task_num; ++i) {
 667     CMTaskQueue* queue = _task_queues->queue(i);
 668     queue->set_empty();
 669   }
 670 }
 671 
 672 void ConcurrentMark::set_phase(uint active_tasks, bool concurrent) {
 673   assert(active_tasks <= _max_task_num, "we should not have more");
 674 
 675   _active_tasks = active_tasks;
 676   // Need to update the three data structures below according to the
 677   // number of active threads for this phase.
 678   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 679   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 680   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 681 
 682   _concurrent = concurrent;
 683   // We propagate this to all tasks, not just the active ones.
 684   for (int i = 0; i < (int) _max_task_num; ++i)
 685     _tasks[i]->set_concurrent(concurrent);
 686 
 687   if (concurrent) {
 688     set_concurrent_marking_in_progress();
 689   } else {
 690     // We currently assume that the concurrent flag has been set to
 691     // false before we start remark. At this point we should also be
 692     // in a STW phase.
 693     assert(!concurrent_marking_in_progress(), "invariant");
 694     assert(_finger == _heap_end, "only way to get here");
 695     update_g1_committed(true);
 696   }
 697 }
 698 
 699 void ConcurrentMark::set_non_marking_state() {
 700   // We set the global marking state to some default values when we're
 701   // not doing marking.
 702   reset_marking_state();
 703   _active_tasks = 0;
 704   clear_concurrent_marking_in_progress();
 705 }
 706 
 707 ConcurrentMark::~ConcurrentMark() {
 708   // The ConcurrentMark instance is never freed.
 709   ShouldNotReachHere();
 710 }
 711 
 712 void ConcurrentMark::clearNextBitmap() {
 713   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 714   G1CollectorPolicy* g1p = g1h->g1_policy();
 715 
 716   // Make sure that the concurrent mark thread looks to still be in
 717   // the current cycle.
 718   guarantee(cmThread()->during_cycle(), "invariant");
 719 
 720   // We are finishing up the current cycle by clearing the next
 721   // marking bitmap and getting it ready for the next cycle. During
 722   // this time no other cycle can start. So, let's make sure that this
 723   // is the case.
 724   guarantee(!g1h->mark_in_progress(), "invariant");
 725 
 726   // clear the mark bitmap (no grey objects to start with).
 727   // We need to do this in chunks and offer to yield in between
 728   // each chunk.
 729   HeapWord* start  = _nextMarkBitMap->startWord();
 730   HeapWord* end    = _nextMarkBitMap->endWord();
 731   HeapWord* cur    = start;
 732   size_t chunkSize = M;
 733   while (cur < end) {
 734     HeapWord* next = cur + chunkSize;
 735     if (next > end) {
 736       next = end;
 737     }
 738     MemRegion mr(cur,next);
 739     _nextMarkBitMap->clearRange(mr);
 740     cur = next;
 741     do_yield_check();
 742 
 743     // Repeat the asserts from above. We'll do them as asserts here to
 744     // minimize their overhead on the product. However, we'll have
 745     // them as guarantees at the beginning / end of the bitmap
 746     // clearing to get some checking in the product.
 747     assert(cmThread()->during_cycle(), "invariant");
 748     assert(!g1h->mark_in_progress(), "invariant");
 749   }
 750 
 751   // Clear the liveness counting data
 752   clear_all_count_data();
 753 
 754   // Repeat the asserts from above.
 755   guarantee(cmThread()->during_cycle(), "invariant");
 756   guarantee(!g1h->mark_in_progress(), "invariant");
 757 }
 758 
 759 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 760 public:
 761   bool doHeapRegion(HeapRegion* r) {
 762     if (!r->continuesHumongous()) {
 763       r->note_start_of_marking();
 764     }
 765     return false;
 766   }
 767 };
 768 
 769 void ConcurrentMark::checkpointRootsInitialPre() {
 770   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 771   G1CollectorPolicy* g1p = g1h->g1_policy();
 772 
 773   _has_aborted = false;
 774 
 775 #ifndef PRODUCT
 776   if (G1PrintReachableAtInitialMark) {
 777     print_reachable("at-cycle-start",
 778                     VerifyOption_G1UsePrevMarking, true /* all */);
 779   }
 780 #endif
 781 
 782   // Initialise marking structures. This has to be done in a STW phase.
 783   reset();
 784 
 785   // For each region note start of marking.
 786   NoteStartOfMarkHRClosure startcl;
 787   g1h->heap_region_iterate(&startcl);
 788 }
 789 
 790 
 791 void ConcurrentMark::checkpointRootsInitialPost() {
 792   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 793 
 794   // If we force an overflow during remark, the remark operation will
 795   // actually abort and we'll restart concurrent marking. If we always
 796   // force an oveflow during remark we'll never actually complete the
 797   // marking phase. So, we initilize this here, at the start of the
 798   // cycle, so that at the remaining overflow number will decrease at
 799   // every remark and we'll eventually not need to cause one.
 800   force_overflow_stw()->init();
 801 
 802   // Start Concurrent Marking weak-reference discovery.
 803   ReferenceProcessor* rp = g1h->ref_processor_cm();
 804   // enable ("weak") refs discovery
 805   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 806   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 807 
 808   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 809   // This is the start of  the marking cycle, we're expected all
 810   // threads to have SATB queues with active set to false.
 811   satb_mq_set.set_active_all_threads(true, /* new active value */
 812                                      false /* expected_active */);
 813 
 814   _root_regions.prepare_for_scan();
 815 
 816   // update_g1_committed() will be called at the end of an evac pause
 817   // when marking is on. So, it's also called at the end of the
 818   // initial-mark pause to update the heap end, if the heap expands
 819   // during it. No need to call it here.
 820 }
 821 
 822 /*
 823  * Notice that in the next two methods, we actually leave the STS
 824  * during the barrier sync and join it immediately afterwards. If we
 825  * do not do this, the following deadlock can occur: one thread could
 826  * be in the barrier sync code, waiting for the other thread to also
 827  * sync up, whereas another one could be trying to yield, while also
 828  * waiting for the other threads to sync up too.
 829  *
 830  * Note, however, that this code is also used during remark and in
 831  * this case we should not attempt to leave / enter the STS, otherwise
 832  * we'll either hit an asseert (debug / fastdebug) or deadlock
 833  * (product). So we should only leave / enter the STS if we are
 834  * operating concurrently.
 835  *
 836  * Because the thread that does the sync barrier has left the STS, it
 837  * is possible to be suspended for a Full GC or an evacuation pause
 838  * could occur. This is actually safe, since the entering the sync
 839  * barrier is one of the last things do_marking_step() does, and it
 840  * doesn't manipulate any data structures afterwards.
 841  */
 842 
 843 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 844   if (verbose_low()) {
 845     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 846   }
 847 
 848   if (concurrent()) {
 849     ConcurrentGCThread::stsLeave();
 850   }
 851   _first_overflow_barrier_sync.enter();
 852   if (concurrent()) {
 853     ConcurrentGCThread::stsJoin();
 854   }
 855   // at this point everyone should have synced up and not be doing any
 856   // more work
 857 
 858   if (verbose_low()) {
 859     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 860   }
 861 
 862   // let task 0 do this
 863   if (task_num == 0) {
 864     // task 0 is responsible for clearing the global data structures
 865     // We should be here because of an overflow. During STW we should
 866     // not clear the overflow flag since we rely on it being true when
 867     // we exit this method to abort the pause and restart concurent
 868     // marking.
 869     reset_marking_state(concurrent() /* clear_overflow */);
 870     force_overflow()->update();
 871 
 872     if (G1Log::fine()) {
 873       gclog_or_tty->date_stamp(PrintGCDateStamps);
 874       gclog_or_tty->stamp(PrintGCTimeStamps);
 875       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 876     }
 877   }
 878 
 879   // after this, each task should reset its own data structures then
 880   // then go into the second barrier
 881 }
 882 
 883 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 884   if (verbose_low()) {
 885     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 886   }
 887 
 888   if (concurrent()) {
 889     ConcurrentGCThread::stsLeave();
 890   }
 891   _second_overflow_barrier_sync.enter();
 892   if (concurrent()) {
 893     ConcurrentGCThread::stsJoin();
 894   }
 895   // at this point everything should be re-initialised and ready to go
 896 
 897   if (verbose_low()) {
 898     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 899   }
 900 }
 901 
 902 #ifndef PRODUCT
 903 void ForceOverflowSettings::init() {
 904   _num_remaining = G1ConcMarkForceOverflow;
 905   _force = false;
 906   update();
 907 }
 908 
 909 void ForceOverflowSettings::update() {
 910   if (_num_remaining > 0) {
 911     _num_remaining -= 1;
 912     _force = true;
 913   } else {
 914     _force = false;
 915   }
 916 }
 917 
 918 bool ForceOverflowSettings::should_force() {
 919   if (_force) {
 920     _force = false;
 921     return true;
 922   } else {
 923     return false;
 924   }
 925 }
 926 #endif // !PRODUCT
 927 
 928 class CMConcurrentMarkingTask: public AbstractGangTask {
 929 private:
 930   ConcurrentMark*       _cm;
 931   ConcurrentMarkThread* _cmt;
 932 
 933 public:
 934   void work(uint worker_id) {
 935     assert(Thread::current()->is_ConcurrentGC_thread(),
 936            "this should only be done by a conc GC thread");
 937     ResourceMark rm;
 938 
 939     double start_vtime = os::elapsedVTime();
 940 
 941     ConcurrentGCThread::stsJoin();
 942 
 943     assert(worker_id < _cm->active_tasks(), "invariant");
 944     CMTask* the_task = _cm->task(worker_id);
 945     the_task->record_start_time();
 946     if (!_cm->has_aborted()) {
 947       do {
 948         double start_vtime_sec = os::elapsedVTime();
 949         double start_time_sec = os::elapsedTime();
 950         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 951 
 952         the_task->do_marking_step(mark_step_duration_ms,
 953                                   true /* do_stealing    */,
 954                                   true /* do_termination */);
 955 
 956         double end_time_sec = os::elapsedTime();
 957         double end_vtime_sec = os::elapsedVTime();
 958         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 959         double elapsed_time_sec = end_time_sec - start_time_sec;
 960         _cm->clear_has_overflown();
 961 
 962         bool ret = _cm->do_yield_check(worker_id);
 963 
 964         jlong sleep_time_ms;
 965         if (!_cm->has_aborted() && the_task->has_aborted()) {
 966           sleep_time_ms =
 967             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 968           ConcurrentGCThread::stsLeave();
 969           os::sleep(Thread::current(), sleep_time_ms, false);
 970           ConcurrentGCThread::stsJoin();
 971         }
 972         double end_time2_sec = os::elapsedTime();
 973         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 974 
 975 #if 0
 976           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 977                                  "overhead %1.4lf",
 978                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 979                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 980           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 981                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 982 #endif
 983       } while (!_cm->has_aborted() && the_task->has_aborted());
 984     }
 985     the_task->record_end_time();
 986     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
 987 
 988     ConcurrentGCThread::stsLeave();
 989 
 990     double end_vtime = os::elapsedVTime();
 991     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
 992   }
 993 
 994   CMConcurrentMarkingTask(ConcurrentMark* cm,
 995                           ConcurrentMarkThread* cmt) :
 996       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
 997 
 998   ~CMConcurrentMarkingTask() { }
 999 };
1000 
1001 // Calculates the number of active workers for a concurrent
1002 // phase.
1003 uint ConcurrentMark::calc_parallel_marking_threads() {
1004   if (G1CollectedHeap::use_parallel_gc_threads()) {
1005     uint n_conc_workers = 0;
1006     if (!UseDynamicNumberOfGCThreads ||
1007         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1008          !ForceDynamicNumberOfGCThreads)) {
1009       n_conc_workers = max_parallel_marking_threads();
1010     } else {
1011       n_conc_workers =
1012         AdaptiveSizePolicy::calc_default_active_workers(
1013                                      max_parallel_marking_threads(),
1014                                      1, /* Minimum workers */
1015                                      parallel_marking_threads(),
1016                                      Threads::number_of_non_daemon_threads());
1017       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1018       // that scaling has already gone into "_max_parallel_marking_threads".
1019     }
1020     assert(n_conc_workers > 0, "Always need at least 1");
1021     return n_conc_workers;
1022   }
1023   // If we are not running with any parallel GC threads we will not
1024   // have spawned any marking threads either. Hence the number of
1025   // concurrent workers should be 0.
1026   return 0;
1027 }
1028 
1029 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1030   // Currently, only survivors can be root regions.
1031   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1032   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1033 
1034   const uintx interval = PrefetchScanIntervalInBytes;
1035   HeapWord* curr = hr->bottom();
1036   const HeapWord* end = hr->top();
1037   while (curr < end) {
1038     Prefetch::read(curr, interval);
1039     oop obj = oop(curr);
1040     int size = obj->oop_iterate(&cl);
1041     assert(size == obj->size(), "sanity");
1042     curr += size;
1043   }
1044 }
1045 
1046 class CMRootRegionScanTask : public AbstractGangTask {
1047 private:
1048   ConcurrentMark* _cm;
1049 
1050 public:
1051   CMRootRegionScanTask(ConcurrentMark* cm) :
1052     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1053 
1054   void work(uint worker_id) {
1055     assert(Thread::current()->is_ConcurrentGC_thread(),
1056            "this should only be done by a conc GC thread");
1057 
1058     CMRootRegions* root_regions = _cm->root_regions();
1059     HeapRegion* hr = root_regions->claim_next();
1060     while (hr != NULL) {
1061       _cm->scanRootRegion(hr, worker_id);
1062       hr = root_regions->claim_next();
1063     }
1064   }
1065 };
1066 
1067 void ConcurrentMark::scanRootRegions() {
1068   // scan_in_progress() will have been set to true only if there was
1069   // at least one root region to scan. So, if it's false, we
1070   // should not attempt to do any further work.
1071   if (root_regions()->scan_in_progress()) {
1072     _parallel_marking_threads = calc_parallel_marking_threads();
1073     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1074            "Maximum number of marking threads exceeded");
1075     uint active_workers = MAX2(1U, parallel_marking_threads());
1076 
1077     CMRootRegionScanTask task(this);
1078     if (use_parallel_marking_threads()) {
1079       _parallel_workers->set_active_workers((int) active_workers);
1080       _parallel_workers->run_task(&task);
1081     } else {
1082       task.work(0);
1083     }
1084 
1085     // It's possible that has_aborted() is true here without actually
1086     // aborting the survivor scan earlier. This is OK as it's
1087     // mainly used for sanity checking.
1088     root_regions()->scan_finished();
1089   }
1090 }
1091 
1092 void ConcurrentMark::markFromRoots() {
1093   // we might be tempted to assert that:
1094   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1095   //        "inconsistent argument?");
1096   // However that wouldn't be right, because it's possible that
1097   // a safepoint is indeed in progress as a younger generation
1098   // stop-the-world GC happens even as we mark in this generation.
1099 
1100   _restart_for_overflow = false;
1101   force_overflow_conc()->init();
1102 
1103   // _g1h has _n_par_threads
1104   _parallel_marking_threads = calc_parallel_marking_threads();
1105   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1106     "Maximum number of marking threads exceeded");
1107 
1108   uint active_workers = MAX2(1U, parallel_marking_threads());
1109 
1110   // Parallel task terminator is set in "set_phase()"
1111   set_phase(active_workers, true /* concurrent */);
1112 
1113   CMConcurrentMarkingTask markingTask(this, cmThread());
1114   if (use_parallel_marking_threads()) {
1115     _parallel_workers->set_active_workers((int)active_workers);
1116     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1117     // and the decisions on that MT processing is made elsewhere.
1118     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1119     _parallel_workers->run_task(&markingTask);
1120   } else {
1121     markingTask.work(0);
1122   }
1123   print_stats();
1124 }
1125 
1126 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1127   // world is stopped at this checkpoint
1128   assert(SafepointSynchronize::is_at_safepoint(),
1129          "world should be stopped");
1130 
1131   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1132 
1133   // If a full collection has happened, we shouldn't do this.
1134   if (has_aborted()) {
1135     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1136     return;
1137   }
1138 
1139   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1140 
1141   if (VerifyDuringGC) {
1142     HandleMark hm;  // handle scope
1143     gclog_or_tty->print(" VerifyDuringGC:(before)");
1144     Universe::heap()->prepare_for_verify();
1145     Universe::verify(/* silent */ false,
1146                      /* option */ VerifyOption_G1UsePrevMarking);
1147   }
1148 
1149   G1CollectorPolicy* g1p = g1h->g1_policy();
1150   g1p->record_concurrent_mark_remark_start();
1151 
1152   double start = os::elapsedTime();
1153 
1154   checkpointRootsFinalWork();
1155 
1156   double mark_work_end = os::elapsedTime();
1157 
1158   weakRefsWork(clear_all_soft_refs);
1159 
1160   if (has_overflown()) {
1161     // Oops.  We overflowed.  Restart concurrent marking.
1162     _restart_for_overflow = true;
1163     // Clear the marking state because we will be restarting
1164     // marking due to overflowing the global mark stack.
1165     reset_marking_state();
1166     if (G1TraceMarkStackOverflow) {
1167       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1168     }
1169   } else {
1170     // Aggregate the per-task counting data that we have accumulated
1171     // while marking.
1172     aggregate_count_data();
1173 
1174     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1175     // We're done with marking.
1176     // This is the end of  the marking cycle, we're expected all
1177     // threads to have SATB queues with active set to true.
1178     satb_mq_set.set_active_all_threads(false, /* new active value */
1179                                        true /* expected_active */);
1180 
1181     if (VerifyDuringGC) {
1182       HandleMark hm;  // handle scope
1183       gclog_or_tty->print(" VerifyDuringGC:(after)");
1184       Universe::heap()->prepare_for_verify();
1185       Universe::verify(/* silent */ false,
1186                        /* option */ VerifyOption_G1UseNextMarking);
1187     }
1188     assert(!restart_for_overflow(), "sanity");
1189     // Completely reset the marking state since marking completed
1190     set_non_marking_state();
1191   }
1192 
1193 #if VERIFY_OBJS_PROCESSED
1194   _scan_obj_cl.objs_processed = 0;
1195   ThreadLocalObjQueue::objs_enqueued = 0;
1196 #endif
1197 
1198   // Statistics
1199   double now = os::elapsedTime();
1200   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1201   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1202   _remark_times.add((now - start) * 1000.0);
1203 
1204   g1p->record_concurrent_mark_remark_end();
1205 }
1206 
1207 // Base class of the closures that finalize and verify the
1208 // liveness counting data.
1209 class CMCountDataClosureBase: public HeapRegionClosure {
1210 protected:
1211   G1CollectedHeap* _g1h;
1212   ConcurrentMark* _cm;
1213   CardTableModRefBS* _ct_bs;
1214 
1215   BitMap* _region_bm;
1216   BitMap* _card_bm;
1217 
1218   // Takes a region that's not empty (i.e., it has at least one
1219   // live object in it and sets its corresponding bit on the region
1220   // bitmap to 1. If the region is "starts humongous" it will also set
1221   // to 1 the bits on the region bitmap that correspond to its
1222   // associated "continues humongous" regions.
1223   void set_bit_for_region(HeapRegion* hr) {
1224     assert(!hr->continuesHumongous(), "should have filtered those out");
1225 
1226     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1227     if (!hr->startsHumongous()) {
1228       // Normal (non-humongous) case: just set the bit.
1229       _region_bm->par_at_put(index, true);
1230     } else {
1231       // Starts humongous case: calculate how many regions are part of
1232       // this humongous region and then set the bit range.
1233       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1234       _region_bm->par_at_put_range(index, end_index, true);
1235     }
1236   }
1237 
1238 public:
1239   CMCountDataClosureBase(G1CollectedHeap* g1h,
1240                          BitMap* region_bm, BitMap* card_bm):
1241     _g1h(g1h), _cm(g1h->concurrent_mark()),
1242     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1243     _region_bm(region_bm), _card_bm(card_bm) { }
1244 };
1245 
1246 // Closure that calculates the # live objects per region. Used
1247 // for verification purposes during the cleanup pause.
1248 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1249   CMBitMapRO* _bm;
1250   size_t _region_marked_bytes;
1251 
1252 public:
1253   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1254                          BitMap* region_bm, BitMap* card_bm) :
1255     CMCountDataClosureBase(g1h, region_bm, card_bm),
1256     _bm(bm), _region_marked_bytes(0) { }
1257 
1258   bool doHeapRegion(HeapRegion* hr) {
1259 
1260     if (hr->continuesHumongous()) {
1261       // We will ignore these here and process them when their
1262       // associated "starts humongous" region is processed (see
1263       // set_bit_for_heap_region()). Note that we cannot rely on their
1264       // associated "starts humongous" region to have their bit set to
1265       // 1 since, due to the region chunking in the parallel region
1266       // iteration, a "continues humongous" region might be visited
1267       // before its associated "starts humongous".
1268       return false;
1269     }
1270 
1271     HeapWord* ntams = hr->next_top_at_mark_start();
1272     HeapWord* start = hr->bottom();
1273 
1274     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1275            err_msg("Preconditions not met - "
1276                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1277                    start, ntams, hr->end()));
1278 
1279     // Find the first marked object at or after "start".
1280     start = _bm->getNextMarkedWordAddress(start, ntams);
1281 
1282     size_t marked_bytes = 0;
1283 
1284     while (start < ntams) {
1285       oop obj = oop(start);
1286       int obj_sz = obj->size();
1287       HeapWord* obj_end = start + obj_sz;
1288 
1289       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1290       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1291 
1292       // Note: if we're looking at the last region in heap - obj_end
1293       // could be actually just beyond the end of the heap; end_idx
1294       // will then correspond to a (non-existent) card that is also
1295       // just beyond the heap.
1296       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1297         // end of object is not card aligned - increment to cover
1298         // all the cards spanned by the object
1299         end_idx += 1;
1300       }
1301 
1302       // Set the bits in the card BM for the cards spanned by this object.
1303       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1304 
1305       // Add the size of this object to the number of marked bytes.
1306       marked_bytes += (size_t)obj_sz * HeapWordSize;
1307 
1308       // Find the next marked object after this one.
1309       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1310     }
1311 
1312     // Mark the allocated-since-marking portion...
1313     HeapWord* top = hr->top();
1314     if (ntams < top) {
1315       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1316       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1317 
1318       // Note: if we're looking at the last region in heap - top
1319       // could be actually just beyond the end of the heap; end_idx
1320       // will then correspond to a (non-existent) card that is also
1321       // just beyond the heap.
1322       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1323         // end of object is not card aligned - increment to cover
1324         // all the cards spanned by the object
1325         end_idx += 1;
1326       }
1327       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1328 
1329       // This definitely means the region has live objects.
1330       set_bit_for_region(hr);
1331     }
1332 
1333     // Update the live region bitmap.
1334     if (marked_bytes > 0) {
1335       set_bit_for_region(hr);
1336     }
1337 
1338     // Set the marked bytes for the current region so that
1339     // it can be queried by a calling verificiation routine
1340     _region_marked_bytes = marked_bytes;
1341 
1342     return false;
1343   }
1344 
1345   size_t region_marked_bytes() const { return _region_marked_bytes; }
1346 };
1347 
1348 // Heap region closure used for verifying the counting data
1349 // that was accumulated concurrently and aggregated during
1350 // the remark pause. This closure is applied to the heap
1351 // regions during the STW cleanup pause.
1352 
1353 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1354   G1CollectedHeap* _g1h;
1355   ConcurrentMark* _cm;
1356   CalcLiveObjectsClosure _calc_cl;
1357   BitMap* _region_bm;   // Region BM to be verified
1358   BitMap* _card_bm;     // Card BM to be verified
1359   bool _verbose;        // verbose output?
1360 
1361   BitMap* _exp_region_bm; // Expected Region BM values
1362   BitMap* _exp_card_bm;   // Expected card BM values
1363 
1364   int _failures;
1365 
1366 public:
1367   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1368                                 BitMap* region_bm,
1369                                 BitMap* card_bm,
1370                                 BitMap* exp_region_bm,
1371                                 BitMap* exp_card_bm,
1372                                 bool verbose) :
1373     _g1h(g1h), _cm(g1h->concurrent_mark()),
1374     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1375     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1376     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1377     _failures(0) { }
1378 
1379   int failures() const { return _failures; }
1380 
1381   bool doHeapRegion(HeapRegion* hr) {
1382     if (hr->continuesHumongous()) {
1383       // We will ignore these here and process them when their
1384       // associated "starts humongous" region is processed (see
1385       // set_bit_for_heap_region()). Note that we cannot rely on their
1386       // associated "starts humongous" region to have their bit set to
1387       // 1 since, due to the region chunking in the parallel region
1388       // iteration, a "continues humongous" region might be visited
1389       // before its associated "starts humongous".
1390       return false;
1391     }
1392 
1393     int failures = 0;
1394 
1395     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1396     // this region and set the corresponding bits in the expected region
1397     // and card bitmaps.
1398     bool res = _calc_cl.doHeapRegion(hr);
1399     assert(res == false, "should be continuing");
1400 
1401     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1402                     Mutex::_no_safepoint_check_flag);
1403 
1404     // Verify the marked bytes for this region.
1405     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1406     size_t act_marked_bytes = hr->next_marked_bytes();
1407 
1408     // We're not OK if expected marked bytes > actual marked bytes. It means
1409     // we have missed accounting some objects during the actual marking.
1410     if (exp_marked_bytes > act_marked_bytes) {
1411       if (_verbose) {
1412         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1413                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1414                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1415       }
1416       failures += 1;
1417     }
1418 
1419     // Verify the bit, for this region, in the actual and expected
1420     // (which was just calculated) region bit maps.
1421     // We're not OK if the bit in the calculated expected region
1422     // bitmap is set and the bit in the actual region bitmap is not.
1423     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1424 
1425     bool expected = _exp_region_bm->at(index);
1426     bool actual = _region_bm->at(index);
1427     if (expected && !actual) {
1428       if (_verbose) {
1429         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1430                                "expected: %s, actual: %s",
1431                                hr->hrs_index(),
1432                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1433       }
1434       failures += 1;
1435     }
1436 
1437     // Verify that the card bit maps for the cards spanned by the current
1438     // region match. We have an error if we have a set bit in the expected
1439     // bit map and the corresponding bit in the actual bitmap is not set.
1440 
1441     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1442     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1443 
1444     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1445       expected = _exp_card_bm->at(i);
1446       actual = _card_bm->at(i);
1447 
1448       if (expected && !actual) {
1449         if (_verbose) {
1450           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1451                                  "expected: %s, actual: %s",
1452                                  hr->hrs_index(), i,
1453                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1454         }
1455         failures += 1;
1456       }
1457     }
1458 
1459     if (failures > 0 && _verbose)  {
1460       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1461                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1462                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1463                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1464     }
1465 
1466     _failures += failures;
1467 
1468     // We could stop iteration over the heap when we
1469     // find the first violating region by returning true.
1470     return false;
1471   }
1472 };
1473 
1474 
1475 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1476 protected:
1477   G1CollectedHeap* _g1h;
1478   ConcurrentMark* _cm;
1479   BitMap* _actual_region_bm;
1480   BitMap* _actual_card_bm;
1481 
1482   uint    _n_workers;
1483 
1484   BitMap* _expected_region_bm;
1485   BitMap* _expected_card_bm;
1486 
1487   int  _failures;
1488   bool _verbose;
1489 
1490 public:
1491   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1492                             BitMap* region_bm, BitMap* card_bm,
1493                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1494     : AbstractGangTask("G1 verify final counting"),
1495       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1496       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1497       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1498       _failures(0), _verbose(false),
1499       _n_workers(0) {
1500     assert(VerifyDuringGC, "don't call this otherwise");
1501 
1502     // Use the value already set as the number of active threads
1503     // in the call to run_task().
1504     if (G1CollectedHeap::use_parallel_gc_threads()) {
1505       assert( _g1h->workers()->active_workers() > 0,
1506         "Should have been previously set");
1507       _n_workers = _g1h->workers()->active_workers();
1508     } else {
1509       _n_workers = 1;
1510     }
1511 
1512     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1513     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1514 
1515     _verbose = _cm->verbose_medium();
1516   }
1517 
1518   void work(uint worker_id) {
1519     assert(worker_id < _n_workers, "invariant");
1520 
1521     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1522                                             _actual_region_bm, _actual_card_bm,
1523                                             _expected_region_bm,
1524                                             _expected_card_bm,
1525                                             _verbose);
1526 
1527     if (G1CollectedHeap::use_parallel_gc_threads()) {
1528       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1529                                             worker_id,
1530                                             _n_workers,
1531                                             HeapRegion::VerifyCountClaimValue);
1532     } else {
1533       _g1h->heap_region_iterate(&verify_cl);
1534     }
1535 
1536     Atomic::add(verify_cl.failures(), &_failures);
1537   }
1538 
1539   int failures() const { return _failures; }
1540 };
1541 
1542 // Closure that finalizes the liveness counting data.
1543 // Used during the cleanup pause.
1544 // Sets the bits corresponding to the interval [NTAMS, top]
1545 // (which contains the implicitly live objects) in the
1546 // card liveness bitmap. Also sets the bit for each region,
1547 // containing live data, in the region liveness bitmap.
1548 
1549 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1550  public:
1551   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1552                               BitMap* region_bm,
1553                               BitMap* card_bm) :
1554     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1555 
1556   bool doHeapRegion(HeapRegion* hr) {
1557 
1558     if (hr->continuesHumongous()) {
1559       // We will ignore these here and process them when their
1560       // associated "starts humongous" region is processed (see
1561       // set_bit_for_heap_region()). Note that we cannot rely on their
1562       // associated "starts humongous" region to have their bit set to
1563       // 1 since, due to the region chunking in the parallel region
1564       // iteration, a "continues humongous" region might be visited
1565       // before its associated "starts humongous".
1566       return false;
1567     }
1568 
1569     HeapWord* ntams = hr->next_top_at_mark_start();
1570     HeapWord* top   = hr->top();
1571 
1572     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1573 
1574     // Mark the allocated-since-marking portion...
1575     if (ntams < top) {
1576       // This definitely means the region has live objects.
1577       set_bit_for_region(hr);
1578 
1579       // Now set the bits in the card bitmap for [ntams, top)
1580       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1581       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1582 
1583       // Note: if we're looking at the last region in heap - top
1584       // could be actually just beyond the end of the heap; end_idx
1585       // will then correspond to a (non-existent) card that is also
1586       // just beyond the heap.
1587       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1588         // end of object is not card aligned - increment to cover
1589         // all the cards spanned by the object
1590         end_idx += 1;
1591       }
1592 
1593       assert(end_idx <= _card_bm->size(),
1594              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1595                      end_idx, _card_bm->size()));
1596       assert(start_idx < _card_bm->size(),
1597              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1598                      start_idx, _card_bm->size()));
1599 
1600       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1601      }
1602 
1603     // Set the bit for the region if it contains live data
1604     if (hr->next_marked_bytes() > 0) {
1605       set_bit_for_region(hr);
1606     }
1607 
1608     return false;
1609   }
1610 };
1611 
1612 class G1ParFinalCountTask: public AbstractGangTask {
1613 protected:
1614   G1CollectedHeap* _g1h;
1615   ConcurrentMark* _cm;
1616   BitMap* _actual_region_bm;
1617   BitMap* _actual_card_bm;
1618 
1619   uint    _n_workers;
1620 
1621 public:
1622   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1623     : AbstractGangTask("G1 final counting"),
1624       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1625       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1626       _n_workers(0) {
1627     // Use the value already set as the number of active threads
1628     // in the call to run_task().
1629     if (G1CollectedHeap::use_parallel_gc_threads()) {
1630       assert( _g1h->workers()->active_workers() > 0,
1631         "Should have been previously set");
1632       _n_workers = _g1h->workers()->active_workers();
1633     } else {
1634       _n_workers = 1;
1635     }
1636   }
1637 
1638   void work(uint worker_id) {
1639     assert(worker_id < _n_workers, "invariant");
1640 
1641     FinalCountDataUpdateClosure final_update_cl(_g1h,
1642                                                 _actual_region_bm,
1643                                                 _actual_card_bm);
1644 
1645     if (G1CollectedHeap::use_parallel_gc_threads()) {
1646       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1647                                             worker_id,
1648                                             _n_workers,
1649                                             HeapRegion::FinalCountClaimValue);
1650     } else {
1651       _g1h->heap_region_iterate(&final_update_cl);
1652     }
1653   }
1654 };
1655 
1656 class G1ParNoteEndTask;
1657 
1658 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1659   G1CollectedHeap* _g1;
1660   int _worker_num;
1661   size_t _max_live_bytes;
1662   uint _regions_claimed;
1663   size_t _freed_bytes;
1664   FreeRegionList* _local_cleanup_list;
1665   OldRegionSet* _old_proxy_set;
1666   HumongousRegionSet* _humongous_proxy_set;
1667   HRRSCleanupTask* _hrrs_cleanup_task;
1668   double _claimed_region_time;
1669   double _max_region_time;
1670 
1671 public:
1672   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1673                              int worker_num,
1674                              FreeRegionList* local_cleanup_list,
1675                              OldRegionSet* old_proxy_set,
1676                              HumongousRegionSet* humongous_proxy_set,
1677                              HRRSCleanupTask* hrrs_cleanup_task) :
1678     _g1(g1), _worker_num(worker_num),
1679     _max_live_bytes(0), _regions_claimed(0),
1680     _freed_bytes(0),
1681     _claimed_region_time(0.0), _max_region_time(0.0),
1682     _local_cleanup_list(local_cleanup_list),
1683     _old_proxy_set(old_proxy_set),
1684     _humongous_proxy_set(humongous_proxy_set),
1685     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1686 
1687   size_t freed_bytes() { return _freed_bytes; }
1688 
1689   bool doHeapRegion(HeapRegion *hr) {
1690     if (hr->continuesHumongous()) {
1691       return false;
1692     }
1693     // We use a claim value of zero here because all regions
1694     // were claimed with value 1 in the FinalCount task.
1695     _g1->reset_gc_time_stamps(hr);
1696     double start = os::elapsedTime();
1697     _regions_claimed++;
1698     hr->note_end_of_marking();
1699     _max_live_bytes += hr->max_live_bytes();
1700     _g1->free_region_if_empty(hr,
1701                               &_freed_bytes,
1702                               _local_cleanup_list,
1703                               _old_proxy_set,
1704                               _humongous_proxy_set,
1705                               _hrrs_cleanup_task,
1706                               true /* par */);
1707     double region_time = (os::elapsedTime() - start);
1708     _claimed_region_time += region_time;
1709     if (region_time > _max_region_time) {
1710       _max_region_time = region_time;
1711     }
1712     return false;
1713   }
1714 
1715   size_t max_live_bytes() { return _max_live_bytes; }
1716   uint regions_claimed() { return _regions_claimed; }
1717   double claimed_region_time_sec() { return _claimed_region_time; }
1718   double max_region_time_sec() { return _max_region_time; }
1719 };
1720 
1721 class G1ParNoteEndTask: public AbstractGangTask {
1722   friend class G1NoteEndOfConcMarkClosure;
1723 
1724 protected:
1725   G1CollectedHeap* _g1h;
1726   size_t _max_live_bytes;
1727   size_t _freed_bytes;
1728   FreeRegionList* _cleanup_list;
1729 
1730 public:
1731   G1ParNoteEndTask(G1CollectedHeap* g1h,
1732                    FreeRegionList* cleanup_list) :
1733     AbstractGangTask("G1 note end"), _g1h(g1h),
1734     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1735 
1736   void work(uint worker_id) {
1737     double start = os::elapsedTime();
1738     FreeRegionList local_cleanup_list("Local Cleanup List");
1739     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1740     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1741     HRRSCleanupTask hrrs_cleanup_task;
1742     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1743                                            &old_proxy_set,
1744                                            &humongous_proxy_set,
1745                                            &hrrs_cleanup_task);
1746     if (G1CollectedHeap::use_parallel_gc_threads()) {
1747       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1748                                             _g1h->workers()->active_workers(),
1749                                             HeapRegion::NoteEndClaimValue);
1750     } else {
1751       _g1h->heap_region_iterate(&g1_note_end);
1752     }
1753     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1754 
1755     // Now update the lists
1756     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1757                                             NULL /* free_list */,
1758                                             &old_proxy_set,
1759                                             &humongous_proxy_set,
1760                                             true /* par */);
1761     {
1762       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1763       _max_live_bytes += g1_note_end.max_live_bytes();
1764       _freed_bytes += g1_note_end.freed_bytes();
1765 
1766       // If we iterate over the global cleanup list at the end of
1767       // cleanup to do this printing we will not guarantee to only
1768       // generate output for the newly-reclaimed regions (the list
1769       // might not be empty at the beginning of cleanup; we might
1770       // still be working on its previous contents). So we do the
1771       // printing here, before we append the new regions to the global
1772       // cleanup list.
1773 
1774       G1HRPrinter* hr_printer = _g1h->hr_printer();
1775       if (hr_printer->is_active()) {
1776         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1777         while (iter.more_available()) {
1778           HeapRegion* hr = iter.get_next();
1779           hr_printer->cleanup(hr);
1780         }
1781       }
1782 
1783       _cleanup_list->add_as_tail(&local_cleanup_list);
1784       assert(local_cleanup_list.is_empty(), "post-condition");
1785 
1786       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1787     }
1788   }
1789   size_t max_live_bytes() { return _max_live_bytes; }
1790   size_t freed_bytes() { return _freed_bytes; }
1791 };
1792 
1793 class G1ParScrubRemSetTask: public AbstractGangTask {
1794 protected:
1795   G1RemSet* _g1rs;
1796   BitMap* _region_bm;
1797   BitMap* _card_bm;
1798 public:
1799   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1800                        BitMap* region_bm, BitMap* card_bm) :
1801     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1802     _region_bm(region_bm), _card_bm(card_bm) { }
1803 
1804   void work(uint worker_id) {
1805     if (G1CollectedHeap::use_parallel_gc_threads()) {
1806       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1807                        HeapRegion::ScrubRemSetClaimValue);
1808     } else {
1809       _g1rs->scrub(_region_bm, _card_bm);
1810     }
1811   }
1812 
1813 };
1814 
1815 void ConcurrentMark::cleanup() {
1816   // world is stopped at this checkpoint
1817   assert(SafepointSynchronize::is_at_safepoint(),
1818          "world should be stopped");
1819   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1820 
1821   // If a full collection has happened, we shouldn't do this.
1822   if (has_aborted()) {
1823     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1824     return;
1825   }
1826 
1827   HRSPhaseSetter x(HRSPhaseCleanup);
1828   g1h->verify_region_sets_optional();
1829 
1830   if (VerifyDuringGC) {
1831     HandleMark hm;  // handle scope
1832     gclog_or_tty->print(" VerifyDuringGC:(before)");
1833     Universe::heap()->prepare_for_verify();
1834     Universe::verify(/* silent */ false,
1835                      /* option */ VerifyOption_G1UsePrevMarking);
1836   }
1837 
1838   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1839   g1p->record_concurrent_mark_cleanup_start();
1840 
1841   double start = os::elapsedTime();
1842 
1843   HeapRegionRemSet::reset_for_cleanup_tasks();
1844 
1845   uint n_workers;
1846 
1847   // Do counting once more with the world stopped for good measure.
1848   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1849 
1850   if (G1CollectedHeap::use_parallel_gc_threads()) {
1851    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1852            "sanity check");
1853 
1854     g1h->set_par_threads();
1855     n_workers = g1h->n_par_threads();
1856     assert(g1h->n_par_threads() == n_workers,
1857            "Should not have been reset");
1858     g1h->workers()->run_task(&g1_par_count_task);
1859     // Done with the parallel phase so reset to 0.
1860     g1h->set_par_threads(0);
1861 
1862     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1863            "sanity check");
1864   } else {
1865     n_workers = 1;
1866     g1_par_count_task.work(0);
1867   }
1868 
1869   if (VerifyDuringGC) {
1870     // Verify that the counting data accumulated during marking matches
1871     // that calculated by walking the marking bitmap.
1872 
1873     // Bitmaps to hold expected values
1874     BitMap expected_region_bm(_region_bm.size(), false);
1875     BitMap expected_card_bm(_card_bm.size(), false);
1876 
1877     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1878                                                  &_region_bm,
1879                                                  &_card_bm,
1880                                                  &expected_region_bm,
1881                                                  &expected_card_bm);
1882 
1883     if (G1CollectedHeap::use_parallel_gc_threads()) {
1884       g1h->set_par_threads((int)n_workers);
1885       g1h->workers()->run_task(&g1_par_verify_task);
1886       // Done with the parallel phase so reset to 0.
1887       g1h->set_par_threads(0);
1888 
1889       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1890              "sanity check");
1891     } else {
1892       g1_par_verify_task.work(0);
1893     }
1894 
1895     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1896   }
1897 
1898   size_t start_used_bytes = g1h->used();
1899   g1h->set_marking_complete();
1900 
1901   double count_end = os::elapsedTime();
1902   double this_final_counting_time = (count_end - start);
1903   _total_counting_time += this_final_counting_time;
1904 
1905   if (G1PrintRegionLivenessInfo) {
1906     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1907     _g1h->heap_region_iterate(&cl);
1908   }
1909 
1910   // Install newly created mark bitMap as "prev".
1911   swapMarkBitMaps();
1912 
1913   g1h->reset_gc_time_stamp();
1914 
1915   // Note end of marking in all heap regions.
1916   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1917   if (G1CollectedHeap::use_parallel_gc_threads()) {
1918     g1h->set_par_threads((int)n_workers);
1919     g1h->workers()->run_task(&g1_par_note_end_task);
1920     g1h->set_par_threads(0);
1921 
1922     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1923            "sanity check");
1924   } else {
1925     g1_par_note_end_task.work(0);
1926   }
1927   g1h->check_gc_time_stamps();
1928 
1929   if (!cleanup_list_is_empty()) {
1930     // The cleanup list is not empty, so we'll have to process it
1931     // concurrently. Notify anyone else that might be wanting free
1932     // regions that there will be more free regions coming soon.
1933     g1h->set_free_regions_coming();
1934   }
1935 
1936   // call below, since it affects the metric by which we sort the heap
1937   // regions.
1938   if (G1ScrubRemSets) {
1939     double rs_scrub_start = os::elapsedTime();
1940     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1941     if (G1CollectedHeap::use_parallel_gc_threads()) {
1942       g1h->set_par_threads((int)n_workers);
1943       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1944       g1h->set_par_threads(0);
1945 
1946       assert(g1h->check_heap_region_claim_values(
1947                                             HeapRegion::ScrubRemSetClaimValue),
1948              "sanity check");
1949     } else {
1950       g1_par_scrub_rs_task.work(0);
1951     }
1952 
1953     double rs_scrub_end = os::elapsedTime();
1954     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1955     _total_rs_scrub_time += this_rs_scrub_time;
1956   }
1957 
1958   // this will also free any regions totally full of garbage objects,
1959   // and sort the regions.
1960   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1961 
1962   // Statistics.
1963   double end = os::elapsedTime();
1964   _cleanup_times.add((end - start) * 1000.0);
1965 
1966   if (G1Log::fine()) {
1967     g1h->print_size_transition(gclog_or_tty,
1968                                start_used_bytes,
1969                                g1h->used(),
1970                                g1h->capacity());
1971   }
1972 
1973   // Clean up will have freed any regions completely full of garbage.
1974   // Update the soft reference policy with the new heap occupancy.
1975   Universe::update_heap_info_at_gc();
1976 
1977   // We need to make this be a "collection" so any collection pause that
1978   // races with it goes around and waits for completeCleanup to finish.
1979   g1h->increment_total_collections();
1980 
1981   // We reclaimed old regions so we should calculate the sizes to make
1982   // sure we update the old gen/space data.
1983   g1h->g1mm()->update_sizes();
1984 
1985   if (VerifyDuringGC) {
1986     HandleMark hm;  // handle scope
1987     gclog_or_tty->print(" VerifyDuringGC:(after)");
1988     Universe::heap()->prepare_for_verify();
1989     Universe::verify(/* silent */ false,
1990                      /* option */ VerifyOption_G1UsePrevMarking);
1991   }
1992 
1993   g1h->verify_region_sets_optional();
1994   g1h->trace_heap_after_concurrent_cycle();
1995 }
1996 
1997 void ConcurrentMark::completeCleanup() {
1998   if (has_aborted()) return;
1999 
2000   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2001 
2002   _cleanup_list.verify_optional();
2003   FreeRegionList tmp_free_list("Tmp Free List");
2004 
2005   if (G1ConcRegionFreeingVerbose) {
2006     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2007                            "cleanup list has %u entries",
2008                            _cleanup_list.length());
2009   }
2010 
2011   // Noone else should be accessing the _cleanup_list at this point,
2012   // so it's not necessary to take any locks
2013   while (!_cleanup_list.is_empty()) {
2014     HeapRegion* hr = _cleanup_list.remove_head();
2015     assert(hr != NULL, "the list was not empty");
2016     hr->par_clear();
2017     tmp_free_list.add_as_tail(hr);
2018 
2019     // Instead of adding one region at a time to the secondary_free_list,
2020     // we accumulate them in the local list and move them a few at a
2021     // time. This also cuts down on the number of notify_all() calls
2022     // we do during this process. We'll also append the local list when
2023     // _cleanup_list is empty (which means we just removed the last
2024     // region from the _cleanup_list).
2025     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2026         _cleanup_list.is_empty()) {
2027       if (G1ConcRegionFreeingVerbose) {
2028         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2029                                "appending %u entries to the secondary_free_list, "
2030                                "cleanup list still has %u entries",
2031                                tmp_free_list.length(),
2032                                _cleanup_list.length());
2033       }
2034 
2035       {
2036         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2037         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2038         SecondaryFreeList_lock->notify_all();
2039       }
2040 
2041       if (G1StressConcRegionFreeing) {
2042         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2043           os::sleep(Thread::current(), (jlong) 1, false);
2044         }
2045       }
2046     }
2047   }
2048   assert(tmp_free_list.is_empty(), "post-condition");
2049 }
2050 
2051 // Support closures for reference procssing in G1
2052 
2053 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2054   HeapWord* addr = (HeapWord*)obj;
2055   return addr != NULL &&
2056          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2057 }
2058 
2059 class G1CMKeepAliveClosure: public OopClosure {
2060   G1CollectedHeap* _g1;
2061   ConcurrentMark*  _cm;
2062  public:
2063   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm) :
2064     _g1(g1), _cm(cm) {
2065     assert(Thread::current()->is_VM_thread(), "otherwise fix worker id");
2066   }
2067 
2068   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2069   virtual void do_oop(      oop* p) { do_oop_work(p); }
2070 
2071   template <class T> void do_oop_work(T* p) {
2072     oop obj = oopDesc::load_decode_heap_oop(p);
2073     HeapWord* addr = (HeapWord*)obj;
2074 
2075     if (_cm->verbose_high()) {
2076       gclog_or_tty->print_cr("\t[0] we're looking at location "
2077                              "*"PTR_FORMAT" = "PTR_FORMAT,
2078                              p, (void*) obj);
2079     }
2080 
2081     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
2082       _cm->mark_and_count(obj);
2083       _cm->mark_stack_push(obj);
2084     }
2085   }
2086 };
2087 
2088 class G1CMDrainMarkingStackClosure: public VoidClosure {
2089   ConcurrentMark*               _cm;
2090   CMMarkStack*                  _markStack;
2091   G1CMKeepAliveClosure*         _oopClosure;
2092  public:
2093   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMMarkStack* markStack,
2094                                G1CMKeepAliveClosure* oopClosure) :
2095     _cm(cm),
2096     _markStack(markStack),
2097     _oopClosure(oopClosure) { }
2098 
2099   void do_void() {
2100     _markStack->drain((OopClosure*)_oopClosure, _cm->nextMarkBitMap(), false);
2101   }
2102 };
2103 
2104 // 'Keep Alive' closure used by parallel reference processing.
2105 // An instance of this closure is used in the parallel reference processing
2106 // code rather than an instance of G1CMKeepAliveClosure. We could have used
2107 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
2108 // placed on to discovered ref lists once so we can mark and push with no
2109 // need to check whether the object has already been marked. Using the
2110 // G1CMKeepAliveClosure would mean, however, having all the worker threads
2111 // operating on the global mark stack. This means that an individual
2112 // worker would be doing lock-free pushes while it processes its own
2113 // discovered ref list followed by drain call. If the discovered ref lists
2114 // are unbalanced then this could cause interference with the other
2115 // workers. Using a CMTask (and its embedded local data structures)
2116 // avoids that potential interference.
2117 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2118   ConcurrentMark*  _cm;
2119   CMTask*          _task;
2120   int              _ref_counter_limit;
2121   int              _ref_counter;
2122  public:
2123   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task) :
2124     _cm(cm), _task(task),
2125     _ref_counter_limit(G1RefProcDrainInterval) {
2126     assert(_ref_counter_limit > 0, "sanity");
2127     _ref_counter = _ref_counter_limit;
2128   }
2129 
2130   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2131   virtual void do_oop(      oop* p) { do_oop_work(p); }
2132 
2133   template <class T> void do_oop_work(T* p) {
2134     if (!_cm->has_overflown()) {
2135       oop obj = oopDesc::load_decode_heap_oop(p);
2136       if (_cm->verbose_high()) {
2137         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2138                                "*"PTR_FORMAT" = "PTR_FORMAT,
2139                                _task->task_id(), p, (void*) obj);
2140       }
2141 
2142       _task->deal_with_reference(obj);
2143       _ref_counter--;
2144 
2145       if (_ref_counter == 0) {
2146         // We have dealt with _ref_counter_limit references, pushing them and objects
2147         // reachable from them on to the local stack (and possibly the global stack).
2148         // Call do_marking_step() to process these entries. We call the routine in a
2149         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2150         // with the entries that we've pushed as a result of the deal_with_reference
2151         // calls above) or we overflow.
2152         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2153         // while there may still be some work to do. (See the comment at the
2154         // beginning of CMTask::do_marking_step() for those conditions - one of which
2155         // is reaching the specified time target.) It is only when
2156         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2157         // that the marking has completed.
2158         do {
2159           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2160           _task->do_marking_step(mark_step_duration_ms,
2161                                  false /* do_stealing    */,
2162                                  false /* do_termination */);
2163         } while (_task->has_aborted() && !_cm->has_overflown());
2164         _ref_counter = _ref_counter_limit;
2165       }
2166     } else {
2167       if (_cm->verbose_high()) {
2168          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2169       }
2170     }
2171   }
2172 };
2173 
2174 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2175   ConcurrentMark* _cm;
2176   CMTask* _task;
2177  public:
2178   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2179     _cm(cm), _task(task) { }
2180 
2181   void do_void() {
2182     do {
2183       if (_cm->verbose_high()) {
2184         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2185                                _task->task_id());
2186       }
2187 
2188       // We call CMTask::do_marking_step() to completely drain the local and
2189       // global marking stacks. The routine is called in a loop, which we'll
2190       // exit if there's nothing more to do (i.e. we'completely drained the
2191       // entries that were pushed as a result of applying the
2192       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2193       // lists above) or we overflow the global marking stack.
2194       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2195       // while there may still be some work to do. (See the comment at the
2196       // beginning of CMTask::do_marking_step() for those conditions - one of which
2197       // is reaching the specified time target.) It is only when
2198       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2199       // that the marking has completed.
2200 
2201       _task->do_marking_step(1000000000.0 /* something very large */,
2202                              true /* do_stealing    */,
2203                              true /* do_termination */);
2204     } while (_task->has_aborted() && !_cm->has_overflown());
2205   }
2206 };
2207 
2208 // Implementation of AbstractRefProcTaskExecutor for parallel
2209 // reference processing at the end of G1 concurrent marking
2210 
2211 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2212 private:
2213   G1CollectedHeap* _g1h;
2214   ConcurrentMark*  _cm;
2215   WorkGang*        _workers;
2216   int              _active_workers;
2217 
2218 public:
2219   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2220                         ConcurrentMark* cm,
2221                         WorkGang* workers,
2222                         int n_workers) :
2223     _g1h(g1h), _cm(cm),
2224     _workers(workers), _active_workers(n_workers) { }
2225 
2226   // Executes the given task using concurrent marking worker threads.
2227   virtual void execute(ProcessTask& task);
2228   virtual void execute(EnqueueTask& task);
2229 };
2230 
2231 class G1CMRefProcTaskProxy: public AbstractGangTask {
2232   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2233   ProcessTask&     _proc_task;
2234   G1CollectedHeap* _g1h;
2235   ConcurrentMark*  _cm;
2236 
2237 public:
2238   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2239                      G1CollectedHeap* g1h,
2240                      ConcurrentMark* cm) :
2241     AbstractGangTask("Process reference objects in parallel"),
2242     _proc_task(proc_task), _g1h(g1h), _cm(cm) { }
2243 
2244   virtual void work(uint worker_id) {
2245     CMTask* marking_task = _cm->task(worker_id);
2246     G1CMIsAliveClosure g1_is_alive(_g1h);
2247     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task);
2248     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2249 
2250     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2251   }
2252 };
2253 
2254 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2255   assert(_workers != NULL, "Need parallel worker threads.");
2256 
2257   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2258 
2259   // We need to reset the phase for each task execution so that
2260   // the termination protocol of CMTask::do_marking_step works.
2261   _cm->set_phase(_active_workers, false /* concurrent */);
2262   _g1h->set_par_threads(_active_workers);
2263   _workers->run_task(&proc_task_proxy);
2264   _g1h->set_par_threads(0);
2265 }
2266 
2267 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2268   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2269   EnqueueTask& _enq_task;
2270 
2271 public:
2272   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2273     AbstractGangTask("Enqueue reference objects in parallel"),
2274     _enq_task(enq_task) { }
2275 
2276   virtual void work(uint worker_id) {
2277     _enq_task.work(worker_id);
2278   }
2279 };
2280 
2281 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2282   assert(_workers != NULL, "Need parallel worker threads.");
2283 
2284   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2285 
2286   _g1h->set_par_threads(_active_workers);
2287   _workers->run_task(&enq_task_proxy);
2288   _g1h->set_par_threads(0);
2289 }
2290 
2291 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2292   ResourceMark rm;
2293   HandleMark   hm;
2294 
2295   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2296 
2297   // Is alive closure.
2298   G1CMIsAliveClosure g1_is_alive(g1h);
2299 
2300   // Inner scope to exclude the cleaning of the string and symbol
2301   // tables from the displayed time.
2302   {
2303     if (G1Log::finer()) {
2304       gclog_or_tty->put(' ');
2305     }
2306     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2307 
2308     ReferenceProcessor* rp = g1h->ref_processor_cm();
2309 
2310     // See the comment in G1CollectedHeap::ref_processing_init()
2311     // about how reference processing currently works in G1.
2312 
2313     // Process weak references.
2314     rp->setup_policy(clear_all_soft_refs);
2315     assert(_markStack.isEmpty(), "mark stack should be empty");
2316 
2317     G1CMKeepAliveClosure g1_keep_alive(g1h, this);
2318     G1CMDrainMarkingStackClosure
2319       g1_drain_mark_stack(this, &_markStack, &g1_keep_alive);
2320 
2321     // We use the work gang from the G1CollectedHeap and we utilize all
2322     // the worker threads.
2323     uint active_workers = g1h->workers() ? g1h->workers()->active_workers() : 1U;
2324     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2325 
2326     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2327                                               g1h->workers(), active_workers);
2328 
2329     ReferenceProcessorStats stats;
2330     if (rp->processing_is_mt()) {
2331       // Set the degree of MT here.  If the discovery is done MT, there
2332       // may have been a different number of threads doing the discovery
2333       // and a different number of discovered lists may have Ref objects.
2334       // That is OK as long as the Reference lists are balanced (see
2335       // balance_all_queues() and balance_queues()).
2336       rp->set_active_mt_degree(active_workers);
2337 
2338       stats = rp->process_discovered_references(&g1_is_alive,
2339                                       &g1_keep_alive,
2340                                       &g1_drain_mark_stack,
2341                                       &par_task_executor,
2342                                       g1h->gc_timer_cm());
2343 
2344       // The work routines of the parallel keep_alive and drain_marking_stack
2345       // will set the has_overflown flag if we overflow the global marking
2346       // stack.
2347     } else {
2348       stats = rp->process_discovered_references(&g1_is_alive,
2349                                         &g1_keep_alive,
2350                                         &g1_drain_mark_stack,
2351                                         NULL,
2352                                         g1h->gc_timer_cm());
2353     }
2354 
2355     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2356 
2357     assert(_markStack.overflow() || _markStack.isEmpty(),
2358             "mark stack should be empty (unless it overflowed)");
2359     if (_markStack.overflow()) {
2360       // Should have been done already when we tried to push an
2361       // entry on to the global mark stack. But let's do it again.
2362       set_has_overflown();
2363     }
2364 
2365     if (rp->processing_is_mt()) {
2366       assert(rp->num_q() == active_workers, "why not");
2367       rp->enqueue_discovered_references(&par_task_executor);
2368     } else {
2369       rp->enqueue_discovered_references();
2370     }
2371 
2372     rp->verify_no_references_recorded();
2373     assert(!rp->discovery_enabled(), "Post condition");
2374   }
2375 
2376   // Now clean up stale oops in StringTable
2377   StringTable::unlink(&g1_is_alive);
2378   // Clean up unreferenced symbols in symbol table.
2379   SymbolTable::unlink();
2380 }
2381 
2382 void ConcurrentMark::swapMarkBitMaps() {
2383   CMBitMapRO* temp = _prevMarkBitMap;
2384   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2385   _nextMarkBitMap  = (CMBitMap*)  temp;
2386 }
2387 
2388 class CMRemarkTask: public AbstractGangTask {
2389 private:
2390   ConcurrentMark *_cm;
2391 
2392 public:
2393   void work(uint worker_id) {
2394     // Since all available tasks are actually started, we should
2395     // only proceed if we're supposed to be actived.
2396     if (worker_id < _cm->active_tasks()) {
2397       CMTask* task = _cm->task(worker_id);
2398       task->record_start_time();
2399       do {
2400         task->do_marking_step(1000000000.0 /* something very large */,
2401                               true /* do_stealing    */,
2402                               true /* do_termination */);
2403       } while (task->has_aborted() && !_cm->has_overflown());
2404       // If we overflow, then we do not want to restart. We instead
2405       // want to abort remark and do concurrent marking again.
2406       task->record_end_time();
2407     }
2408   }
2409 
2410   CMRemarkTask(ConcurrentMark* cm, int active_workers) :
2411     AbstractGangTask("Par Remark"), _cm(cm) {
2412     _cm->terminator()->reset_for_reuse(active_workers);
2413   }
2414 };
2415 
2416 void ConcurrentMark::checkpointRootsFinalWork() {
2417   ResourceMark rm;
2418   HandleMark   hm;
2419   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2420 
2421   g1h->ensure_parsability(false);
2422 
2423   if (G1CollectedHeap::use_parallel_gc_threads()) {
2424     G1CollectedHeap::StrongRootsScope srs(g1h);
2425     // this is remark, so we'll use up all active threads
2426     uint active_workers = g1h->workers()->active_workers();
2427     if (active_workers == 0) {
2428       assert(active_workers > 0, "Should have been set earlier");
2429       active_workers = (uint) ParallelGCThreads;
2430       g1h->workers()->set_active_workers(active_workers);
2431     }
2432     set_phase(active_workers, false /* concurrent */);
2433     // Leave _parallel_marking_threads at it's
2434     // value originally calculated in the ConcurrentMark
2435     // constructor and pass values of the active workers
2436     // through the gang in the task.
2437 
2438     CMRemarkTask remarkTask(this, active_workers);
2439     g1h->set_par_threads(active_workers);
2440     g1h->workers()->run_task(&remarkTask);
2441     g1h->set_par_threads(0);
2442   } else {
2443     G1CollectedHeap::StrongRootsScope srs(g1h);
2444     // this is remark, so we'll use up all available threads
2445     uint active_workers = 1;
2446     set_phase(active_workers, false /* concurrent */);
2447 
2448     CMRemarkTask remarkTask(this, active_workers);
2449     // We will start all available threads, even if we decide that the
2450     // active_workers will be fewer. The extra ones will just bail out
2451     // immediately.
2452     remarkTask.work(0);
2453   }
2454   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2455   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2456 
2457   print_stats();
2458 
2459 #if VERIFY_OBJS_PROCESSED
2460   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2461     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2462                            _scan_obj_cl.objs_processed,
2463                            ThreadLocalObjQueue::objs_enqueued);
2464     guarantee(_scan_obj_cl.objs_processed ==
2465               ThreadLocalObjQueue::objs_enqueued,
2466               "Different number of objs processed and enqueued.");
2467   }
2468 #endif
2469 }
2470 
2471 #ifndef PRODUCT
2472 
2473 class PrintReachableOopClosure: public OopClosure {
2474 private:
2475   G1CollectedHeap* _g1h;
2476   outputStream*    _out;
2477   VerifyOption     _vo;
2478   bool             _all;
2479 
2480 public:
2481   PrintReachableOopClosure(outputStream* out,
2482                            VerifyOption  vo,
2483                            bool          all) :
2484     _g1h(G1CollectedHeap::heap()),
2485     _out(out), _vo(vo), _all(all) { }
2486 
2487   void do_oop(narrowOop* p) { do_oop_work(p); }
2488   void do_oop(      oop* p) { do_oop_work(p); }
2489 
2490   template <class T> void do_oop_work(T* p) {
2491     oop         obj = oopDesc::load_decode_heap_oop(p);
2492     const char* str = NULL;
2493     const char* str2 = "";
2494 
2495     if (obj == NULL) {
2496       str = "";
2497     } else if (!_g1h->is_in_g1_reserved(obj)) {
2498       str = " O";
2499     } else {
2500       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2501       guarantee(hr != NULL, "invariant");
2502       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2503       bool marked = _g1h->is_marked(obj, _vo);
2504 
2505       if (over_tams) {
2506         str = " >";
2507         if (marked) {
2508           str2 = " AND MARKED";
2509         }
2510       } else if (marked) {
2511         str = " M";
2512       } else {
2513         str = " NOT";
2514       }
2515     }
2516 
2517     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2518                    p, (void*) obj, str, str2);
2519   }
2520 };
2521 
2522 class PrintReachableObjectClosure : public ObjectClosure {
2523 private:
2524   G1CollectedHeap* _g1h;
2525   outputStream*    _out;
2526   VerifyOption     _vo;
2527   bool             _all;
2528   HeapRegion*      _hr;
2529 
2530 public:
2531   PrintReachableObjectClosure(outputStream* out,
2532                               VerifyOption  vo,
2533                               bool          all,
2534                               HeapRegion*   hr) :
2535     _g1h(G1CollectedHeap::heap()),
2536     _out(out), _vo(vo), _all(all), _hr(hr) { }
2537 
2538   void do_object(oop o) {
2539     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2540     bool marked = _g1h->is_marked(o, _vo);
2541     bool print_it = _all || over_tams || marked;
2542 
2543     if (print_it) {
2544       _out->print_cr(" "PTR_FORMAT"%s",
2545                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2546       PrintReachableOopClosure oopCl(_out, _vo, _all);
2547       o->oop_iterate(&oopCl);
2548     }
2549   }
2550 };
2551 
2552 class PrintReachableRegionClosure : public HeapRegionClosure {
2553 private:
2554   G1CollectedHeap* _g1h;
2555   outputStream*    _out;
2556   VerifyOption     _vo;
2557   bool             _all;
2558 
2559 public:
2560   bool doHeapRegion(HeapRegion* hr) {
2561     HeapWord* b = hr->bottom();
2562     HeapWord* e = hr->end();
2563     HeapWord* t = hr->top();
2564     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2565     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2566                    "TAMS: "PTR_FORMAT, b, e, t, p);
2567     _out->cr();
2568 
2569     HeapWord* from = b;
2570     HeapWord* to   = t;
2571 
2572     if (to > from) {
2573       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2574       _out->cr();
2575       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2576       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2577       _out->cr();
2578     }
2579 
2580     return false;
2581   }
2582 
2583   PrintReachableRegionClosure(outputStream* out,
2584                               VerifyOption  vo,
2585                               bool          all) :
2586     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2587 };
2588 
2589 void ConcurrentMark::print_reachable(const char* str,
2590                                      VerifyOption vo,
2591                                      bool all) {
2592   gclog_or_tty->cr();
2593   gclog_or_tty->print_cr("== Doing heap dump... ");
2594 
2595   if (G1PrintReachableBaseFile == NULL) {
2596     gclog_or_tty->print_cr("  #### error: no base file defined");
2597     return;
2598   }
2599 
2600   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2601       (JVM_MAXPATHLEN - 1)) {
2602     gclog_or_tty->print_cr("  #### error: file name too long");
2603     return;
2604   }
2605 
2606   char file_name[JVM_MAXPATHLEN];
2607   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2608   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2609 
2610   fileStream fout(file_name);
2611   if (!fout.is_open()) {
2612     gclog_or_tty->print_cr("  #### error: could not open file");
2613     return;
2614   }
2615 
2616   outputStream* out = &fout;
2617   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2618   out->cr();
2619 
2620   out->print_cr("--- ITERATING OVER REGIONS");
2621   out->cr();
2622   PrintReachableRegionClosure rcl(out, vo, all);
2623   _g1h->heap_region_iterate(&rcl);
2624   out->cr();
2625 
2626   gclog_or_tty->print_cr("  done");
2627   gclog_or_tty->flush();
2628 }
2629 
2630 #endif // PRODUCT
2631 
2632 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2633   // Note we are overriding the read-only view of the prev map here, via
2634   // the cast.
2635   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2636 }
2637 
2638 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2639   _nextMarkBitMap->clearRange(mr);
2640 }
2641 
2642 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2643   clearRangePrevBitmap(mr);
2644   clearRangeNextBitmap(mr);
2645 }
2646 
2647 HeapRegion*
2648 ConcurrentMark::claim_region(int task_num) {
2649   // "checkpoint" the finger
2650   HeapWord* finger = _finger;
2651 
2652   // _heap_end will not change underneath our feet; it only changes at
2653   // yield points.
2654   while (finger < _heap_end) {
2655     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2656 
2657     // Note on how this code handles humongous regions. In the
2658     // normal case the finger will reach the start of a "starts
2659     // humongous" (SH) region. Its end will either be the end of the
2660     // last "continues humongous" (CH) region in the sequence, or the
2661     // standard end of the SH region (if the SH is the only region in
2662     // the sequence). That way claim_region() will skip over the CH
2663     // regions. However, there is a subtle race between a CM thread
2664     // executing this method and a mutator thread doing a humongous
2665     // object allocation. The two are not mutually exclusive as the CM
2666     // thread does not need to hold the Heap_lock when it gets
2667     // here. So there is a chance that claim_region() will come across
2668     // a free region that's in the progress of becoming a SH or a CH
2669     // region. In the former case, it will either
2670     //   a) Miss the update to the region's end, in which case it will
2671     //      visit every subsequent CH region, will find their bitmaps
2672     //      empty, and do nothing, or
2673     //   b) Will observe the update of the region's end (in which case
2674     //      it will skip the subsequent CH regions).
2675     // If it comes across a region that suddenly becomes CH, the
2676     // scenario will be similar to b). So, the race between
2677     // claim_region() and a humongous object allocation might force us
2678     // to do a bit of unnecessary work (due to some unnecessary bitmap
2679     // iterations) but it should not introduce and correctness issues.
2680     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2681     HeapWord*   bottom        = curr_region->bottom();
2682     HeapWord*   end           = curr_region->end();
2683     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2684 
2685     if (verbose_low()) {
2686       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2687                              "["PTR_FORMAT", "PTR_FORMAT"), "
2688                              "limit = "PTR_FORMAT,
2689                              task_num, curr_region, bottom, end, limit);
2690     }
2691 
2692     // Is the gap between reading the finger and doing the CAS too long?
2693     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2694     if (res == finger) {
2695       // we succeeded
2696 
2697       // notice that _finger == end cannot be guaranteed here since,
2698       // someone else might have moved the finger even further
2699       assert(_finger >= end, "the finger should have moved forward");
2700 
2701       if (verbose_low()) {
2702         gclog_or_tty->print_cr("[%d] we were successful with region = "
2703                                PTR_FORMAT, task_num, curr_region);
2704       }
2705 
2706       if (limit > bottom) {
2707         if (verbose_low()) {
2708           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2709                                  "returning it ", task_num, curr_region);
2710         }
2711         return curr_region;
2712       } else {
2713         assert(limit == bottom,
2714                "the region limit should be at bottom");
2715         if (verbose_low()) {
2716           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2717                                  "returning NULL", task_num, curr_region);
2718         }
2719         // we return NULL and the caller should try calling
2720         // claim_region() again.
2721         return NULL;
2722       }
2723     } else {
2724       assert(_finger > finger, "the finger should have moved forward");
2725       if (verbose_low()) {
2726         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2727                                "global finger = "PTR_FORMAT", "
2728                                "our finger = "PTR_FORMAT,
2729                                task_num, _finger, finger);
2730       }
2731 
2732       // read it again
2733       finger = _finger;
2734     }
2735   }
2736 
2737   return NULL;
2738 }
2739 
2740 #ifndef PRODUCT
2741 enum VerifyNoCSetOopsPhase {
2742   VerifyNoCSetOopsStack,
2743   VerifyNoCSetOopsQueues,
2744   VerifyNoCSetOopsSATBCompleted,
2745   VerifyNoCSetOopsSATBThread
2746 };
2747 
2748 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2749 private:
2750   G1CollectedHeap* _g1h;
2751   VerifyNoCSetOopsPhase _phase;
2752   int _info;
2753 
2754   const char* phase_str() {
2755     switch (_phase) {
2756     case VerifyNoCSetOopsStack:         return "Stack";
2757     case VerifyNoCSetOopsQueues:        return "Queue";
2758     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2759     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2760     default:                            ShouldNotReachHere();
2761     }
2762     return NULL;
2763   }
2764 
2765   void do_object_work(oop obj) {
2766     guarantee(!_g1h->obj_in_cs(obj),
2767               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2768                       (void*) obj, phase_str(), _info));
2769   }
2770 
2771 public:
2772   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2773 
2774   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2775     _phase = phase;
2776     _info = info;
2777   }
2778 
2779   virtual void do_oop(oop* p) {
2780     oop obj = oopDesc::load_decode_heap_oop(p);
2781     do_object_work(obj);
2782   }
2783 
2784   virtual void do_oop(narrowOop* p) {
2785     // We should not come across narrow oops while scanning marking
2786     // stacks and SATB buffers.
2787     ShouldNotReachHere();
2788   }
2789 
2790   virtual void do_object(oop obj) {
2791     do_object_work(obj);
2792   }
2793 };
2794 
2795 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2796                                          bool verify_enqueued_buffers,
2797                                          bool verify_thread_buffers,
2798                                          bool verify_fingers) {
2799   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2800   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2801     return;
2802   }
2803 
2804   VerifyNoCSetOopsClosure cl;
2805 
2806   if (verify_stacks) {
2807     // Verify entries on the global mark stack
2808     cl.set_phase(VerifyNoCSetOopsStack);
2809     _markStack.oops_do(&cl);
2810 
2811     // Verify entries on the task queues
2812     for (int i = 0; i < (int) _max_task_num; i += 1) {
2813       cl.set_phase(VerifyNoCSetOopsQueues, i);
2814       OopTaskQueue* queue = _task_queues->queue(i);
2815       queue->oops_do(&cl);
2816     }
2817   }
2818 
2819   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2820 
2821   // Verify entries on the enqueued SATB buffers
2822   if (verify_enqueued_buffers) {
2823     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2824     satb_qs.iterate_completed_buffers_read_only(&cl);
2825   }
2826 
2827   // Verify entries on the per-thread SATB buffers
2828   if (verify_thread_buffers) {
2829     cl.set_phase(VerifyNoCSetOopsSATBThread);
2830     satb_qs.iterate_thread_buffers_read_only(&cl);
2831   }
2832 
2833   if (verify_fingers) {
2834     // Verify the global finger
2835     HeapWord* global_finger = finger();
2836     if (global_finger != NULL && global_finger < _heap_end) {
2837       // The global finger always points to a heap region boundary. We
2838       // use heap_region_containing_raw() to get the containing region
2839       // given that the global finger could be pointing to a free region
2840       // which subsequently becomes continues humongous. If that
2841       // happens, heap_region_containing() will return the bottom of the
2842       // corresponding starts humongous region and the check below will
2843       // not hold any more.
2844       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2845       guarantee(global_finger == global_hr->bottom(),
2846                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2847                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2848     }
2849 
2850     // Verify the task fingers
2851     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2852     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2853       CMTask* task = _tasks[i];
2854       HeapWord* task_finger = task->finger();
2855       if (task_finger != NULL && task_finger < _heap_end) {
2856         // See above note on the global finger verification.
2857         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2858         guarantee(task_finger == task_hr->bottom() ||
2859                   !task_hr->in_collection_set(),
2860                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2861                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2862       }
2863     }
2864   }
2865 }
2866 #endif // PRODUCT
2867 
2868 // Aggregate the counting data that was constructed concurrently
2869 // with marking.
2870 class AggregateCountDataHRClosure: public HeapRegionClosure {
2871   G1CollectedHeap* _g1h;
2872   ConcurrentMark* _cm;
2873   CardTableModRefBS* _ct_bs;
2874   BitMap* _cm_card_bm;
2875   size_t _max_task_num;
2876 
2877  public:
2878   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2879                               BitMap* cm_card_bm,
2880                               size_t max_task_num) :
2881     _g1h(g1h), _cm(g1h->concurrent_mark()),
2882     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2883     _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2884 
2885   bool doHeapRegion(HeapRegion* hr) {
2886     if (hr->continuesHumongous()) {
2887       // We will ignore these here and process them when their
2888       // associated "starts humongous" region is processed.
2889       // Note that we cannot rely on their associated
2890       // "starts humongous" region to have their bit set to 1
2891       // since, due to the region chunking in the parallel region
2892       // iteration, a "continues humongous" region might be visited
2893       // before its associated "starts humongous".
2894       return false;
2895     }
2896 
2897     HeapWord* start = hr->bottom();
2898     HeapWord* limit = hr->next_top_at_mark_start();
2899     HeapWord* end = hr->end();
2900 
2901     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2902            err_msg("Preconditions not met - "
2903                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2904                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2905                    start, limit, hr->top(), hr->end()));
2906 
2907     assert(hr->next_marked_bytes() == 0, "Precondition");
2908 
2909     if (start == limit) {
2910       // NTAMS of this region has not been set so nothing to do.
2911       return false;
2912     }
2913 
2914     // 'start' should be in the heap.
2915     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2916     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2917     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2918 
2919     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2920     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2921     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2922 
2923     // If ntams is not card aligned then we bump card bitmap index
2924     // for limit so that we get the all the cards spanned by
2925     // the object ending at ntams.
2926     // Note: if this is the last region in the heap then ntams
2927     // could be actually just beyond the end of the the heap;
2928     // limit_idx will then  correspond to a (non-existent) card
2929     // that is also outside the heap.
2930     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2931       limit_idx += 1;
2932     }
2933 
2934     assert(limit_idx <= end_idx, "or else use atomics");
2935 
2936     // Aggregate the "stripe" in the count data associated with hr.
2937     uint hrs_index = hr->hrs_index();
2938     size_t marked_bytes = 0;
2939 
2940     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2941       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2942       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2943 
2944       // Fetch the marked_bytes in this region for task i and
2945       // add it to the running total for this region.
2946       marked_bytes += marked_bytes_array[hrs_index];
2947 
2948       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2949       // into the global card bitmap.
2950       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2951 
2952       while (scan_idx < limit_idx) {
2953         assert(task_card_bm->at(scan_idx) == true, "should be");
2954         _cm_card_bm->set_bit(scan_idx);
2955         assert(_cm_card_bm->at(scan_idx) == true, "should be");
2956 
2957         // BitMap::get_next_one_offset() can handle the case when
2958         // its left_offset parameter is greater than its right_offset
2959         // parameter. It does, however, have an early exit if
2960         // left_offset == right_offset. So let's limit the value
2961         // passed in for left offset here.
2962         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
2963         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
2964       }
2965     }
2966 
2967     // Update the marked bytes for this region.
2968     hr->add_to_marked_bytes(marked_bytes);
2969 
2970     // Next heap region
2971     return false;
2972   }
2973 };
2974 
2975 class G1AggregateCountDataTask: public AbstractGangTask {
2976 protected:
2977   G1CollectedHeap* _g1h;
2978   ConcurrentMark* _cm;
2979   BitMap* _cm_card_bm;
2980   size_t _max_task_num;
2981   int _active_workers;
2982 
2983 public:
2984   G1AggregateCountDataTask(G1CollectedHeap* g1h,
2985                            ConcurrentMark* cm,
2986                            BitMap* cm_card_bm,
2987                            size_t max_task_num,
2988                            int n_workers) :
2989     AbstractGangTask("Count Aggregation"),
2990     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
2991     _max_task_num(max_task_num),
2992     _active_workers(n_workers) { }
2993 
2994   void work(uint worker_id) {
2995     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
2996 
2997     if (G1CollectedHeap::use_parallel_gc_threads()) {
2998       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
2999                                             _active_workers,
3000                                             HeapRegion::AggregateCountClaimValue);
3001     } else {
3002       _g1h->heap_region_iterate(&cl);
3003     }
3004   }
3005 };
3006 
3007 
3008 void ConcurrentMark::aggregate_count_data() {
3009   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3010                         _g1h->workers()->active_workers() :
3011                         1);
3012 
3013   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3014                                            _max_task_num, n_workers);
3015 
3016   if (G1CollectedHeap::use_parallel_gc_threads()) {
3017     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3018            "sanity check");
3019     _g1h->set_par_threads(n_workers);
3020     _g1h->workers()->run_task(&g1_par_agg_task);
3021     _g1h->set_par_threads(0);
3022 
3023     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3024            "sanity check");
3025     _g1h->reset_heap_region_claim_values();
3026   } else {
3027     g1_par_agg_task.work(0);
3028   }
3029 }
3030 
3031 // Clear the per-worker arrays used to store the per-region counting data
3032 void ConcurrentMark::clear_all_count_data() {
3033   // Clear the global card bitmap - it will be filled during
3034   // liveness count aggregation (during remark) and the
3035   // final counting task.
3036   _card_bm.clear();
3037 
3038   // Clear the global region bitmap - it will be filled as part
3039   // of the final counting task.
3040   _region_bm.clear();
3041 
3042   uint max_regions = _g1h->max_regions();
3043   assert(_max_task_num != 0, "unitialized");
3044 
3045   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3046     BitMap* task_card_bm = count_card_bitmap_for(i);
3047     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3048 
3049     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3050     assert(marked_bytes_array != NULL, "uninitialized");
3051 
3052     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3053     task_card_bm->clear();
3054   }
3055 }
3056 
3057 void ConcurrentMark::print_stats() {
3058   if (verbose_stats()) {
3059     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3060     for (size_t i = 0; i < _active_tasks; ++i) {
3061       _tasks[i]->print_stats();
3062       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3063     }
3064   }
3065 }
3066 
3067 // abandon current marking iteration due to a Full GC
3068 void ConcurrentMark::abort() {
3069   // Clear all marks to force marking thread to do nothing
3070   _nextMarkBitMap->clearAll();
3071   // Clear the liveness counting data
3072   clear_all_count_data();
3073   // Empty mark stack
3074   reset_marking_state();
3075   for (int i = 0; i < (int)_max_task_num; ++i) {
3076     _tasks[i]->clear_region_fields();
3077   }
3078   _has_aborted = true;
3079 
3080   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3081   satb_mq_set.abandon_partial_marking();
3082   // This can be called either during or outside marking, we'll read
3083   // the expected_active value from the SATB queue set.
3084   satb_mq_set.set_active_all_threads(
3085                                  false, /* new active value */
3086                                  satb_mq_set.is_active() /* expected_active */);
3087 
3088   _g1h->trace_heap_after_concurrent_cycle();
3089   _g1h->register_concurrent_cycle_end();
3090 }
3091 
3092 static void print_ms_time_info(const char* prefix, const char* name,
3093                                NumberSeq& ns) {
3094   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3095                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3096   if (ns.num() > 0) {
3097     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3098                            prefix, ns.sd(), ns.maximum());
3099   }
3100 }
3101 
3102 void ConcurrentMark::print_summary_info() {
3103   gclog_or_tty->print_cr(" Concurrent marking:");
3104   print_ms_time_info("  ", "init marks", _init_times);
3105   print_ms_time_info("  ", "remarks", _remark_times);
3106   {
3107     print_ms_time_info("     ", "final marks", _remark_mark_times);
3108     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3109 
3110   }
3111   print_ms_time_info("  ", "cleanups", _cleanup_times);
3112   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3113                          _total_counting_time,
3114                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3115                           (double)_cleanup_times.num()
3116                          : 0.0));
3117   if (G1ScrubRemSets) {
3118     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3119                            _total_rs_scrub_time,
3120                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3121                             (double)_cleanup_times.num()
3122                            : 0.0));
3123   }
3124   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3125                          (_init_times.sum() + _remark_times.sum() +
3126                           _cleanup_times.sum())/1000.0);
3127   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3128                 "(%8.2f s marking).",
3129                 cmThread()->vtime_accum(),
3130                 cmThread()->vtime_mark_accum());
3131 }
3132 
3133 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3134   if (use_parallel_marking_threads()) {
3135     _parallel_workers->print_worker_threads_on(st);
3136   }
3137 }
3138 
3139 // We take a break if someone is trying to stop the world.
3140 bool ConcurrentMark::do_yield_check(uint worker_id) {
3141   if (should_yield()) {
3142     if (worker_id == 0) {
3143       _g1h->g1_policy()->record_concurrent_pause();
3144     }
3145     cmThread()->yield();
3146     return true;
3147   } else {
3148     return false;
3149   }
3150 }
3151 
3152 bool ConcurrentMark::should_yield() {
3153   return cmThread()->should_yield();
3154 }
3155 
3156 bool ConcurrentMark::containing_card_is_marked(void* p) {
3157   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3158   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3159 }
3160 
3161 bool ConcurrentMark::containing_cards_are_marked(void* start,
3162                                                  void* last) {
3163   return containing_card_is_marked(start) &&
3164          containing_card_is_marked(last);
3165 }
3166 
3167 #ifndef PRODUCT
3168 // for debugging purposes
3169 void ConcurrentMark::print_finger() {
3170   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3171                          _heap_start, _heap_end, _finger);
3172   for (int i = 0; i < (int) _max_task_num; ++i) {
3173     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3174   }
3175   gclog_or_tty->print_cr("");
3176 }
3177 #endif
3178 
3179 void CMTask::scan_object(oop obj) {
3180   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3181 
3182   if (_cm->verbose_high()) {
3183     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3184                            _task_id, (void*) obj);
3185   }
3186 
3187   size_t obj_size = obj->size();
3188   _words_scanned += obj_size;
3189 
3190   obj->oop_iterate(_cm_oop_closure);
3191   statsOnly( ++_objs_scanned );
3192   check_limits();
3193 }
3194 
3195 // Closure for iteration over bitmaps
3196 class CMBitMapClosure : public BitMapClosure {
3197 private:
3198   // the bitmap that is being iterated over
3199   CMBitMap*                   _nextMarkBitMap;
3200   ConcurrentMark*             _cm;
3201   CMTask*                     _task;
3202 
3203 public:
3204   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3205     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3206 
3207   bool do_bit(size_t offset) {
3208     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3209     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3210     assert( addr < _cm->finger(), "invariant");
3211 
3212     statsOnly( _task->increase_objs_found_on_bitmap() );
3213     assert(addr >= _task->finger(), "invariant");
3214 
3215     // We move that task's local finger along.
3216     _task->move_finger_to(addr);
3217 
3218     _task->scan_object(oop(addr));
3219     // we only partially drain the local queue and global stack
3220     _task->drain_local_queue(true);
3221     _task->drain_global_stack(true);
3222 
3223     // if the has_aborted flag has been raised, we need to bail out of
3224     // the iteration
3225     return !_task->has_aborted();
3226   }
3227 };
3228 
3229 // Closure for iterating over objects, currently only used for
3230 // processing SATB buffers.
3231 class CMObjectClosure : public ObjectClosure {
3232 private:
3233   CMTask* _task;
3234 
3235 public:
3236   void do_object(oop obj) {
3237     _task->deal_with_reference(obj);
3238   }
3239 
3240   CMObjectClosure(CMTask* task) : _task(task) { }
3241 };
3242 
3243 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3244                                ConcurrentMark* cm,
3245                                CMTask* task)
3246   : _g1h(g1h), _cm(cm), _task(task) {
3247   assert(_ref_processor == NULL, "should be initialized to NULL");
3248 
3249   if (G1UseConcMarkReferenceProcessing) {
3250     _ref_processor = g1h->ref_processor_cm();
3251     assert(_ref_processor != NULL, "should not be NULL");
3252   }
3253 }
3254 
3255 void CMTask::setup_for_region(HeapRegion* hr) {
3256   // Separated the asserts so that we know which one fires.
3257   assert(hr != NULL,
3258         "claim_region() should have filtered out continues humongous regions");
3259   assert(!hr->continuesHumongous(),
3260         "claim_region() should have filtered out continues humongous regions");
3261 
3262   if (_cm->verbose_low()) {
3263     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3264                            _task_id, hr);
3265   }
3266 
3267   _curr_region  = hr;
3268   _finger       = hr->bottom();
3269   update_region_limit();
3270 }
3271 
3272 void CMTask::update_region_limit() {
3273   HeapRegion* hr            = _curr_region;
3274   HeapWord* bottom          = hr->bottom();
3275   HeapWord* limit           = hr->next_top_at_mark_start();
3276 
3277   if (limit == bottom) {
3278     if (_cm->verbose_low()) {
3279       gclog_or_tty->print_cr("[%d] found an empty region "
3280                              "["PTR_FORMAT", "PTR_FORMAT")",
3281                              _task_id, bottom, limit);
3282     }
3283     // The region was collected underneath our feet.
3284     // We set the finger to bottom to ensure that the bitmap
3285     // iteration that will follow this will not do anything.
3286     // (this is not a condition that holds when we set the region up,
3287     // as the region is not supposed to be empty in the first place)
3288     _finger = bottom;
3289   } else if (limit >= _region_limit) {
3290     assert(limit >= _finger, "peace of mind");
3291   } else {
3292     assert(limit < _region_limit, "only way to get here");
3293     // This can happen under some pretty unusual circumstances.  An
3294     // evacuation pause empties the region underneath our feet (NTAMS
3295     // at bottom). We then do some allocation in the region (NTAMS
3296     // stays at bottom), followed by the region being used as a GC
3297     // alloc region (NTAMS will move to top() and the objects
3298     // originally below it will be grayed). All objects now marked in
3299     // the region are explicitly grayed, if below the global finger,
3300     // and we do not need in fact to scan anything else. So, we simply
3301     // set _finger to be limit to ensure that the bitmap iteration
3302     // doesn't do anything.
3303     _finger = limit;
3304   }
3305 
3306   _region_limit = limit;
3307 }
3308 
3309 void CMTask::giveup_current_region() {
3310   assert(_curr_region != NULL, "invariant");
3311   if (_cm->verbose_low()) {
3312     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3313                            _task_id, _curr_region);
3314   }
3315   clear_region_fields();
3316 }
3317 
3318 void CMTask::clear_region_fields() {
3319   // Values for these three fields that indicate that we're not
3320   // holding on to a region.
3321   _curr_region   = NULL;
3322   _finger        = NULL;
3323   _region_limit  = NULL;
3324 }
3325 
3326 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3327   if (cm_oop_closure == NULL) {
3328     assert(_cm_oop_closure != NULL, "invariant");
3329   } else {
3330     assert(_cm_oop_closure == NULL, "invariant");
3331   }
3332   _cm_oop_closure = cm_oop_closure;
3333 }
3334 
3335 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3336   guarantee(nextMarkBitMap != NULL, "invariant");
3337 
3338   if (_cm->verbose_low()) {
3339     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3340   }
3341 
3342   _nextMarkBitMap                = nextMarkBitMap;
3343   clear_region_fields();
3344 
3345   _calls                         = 0;
3346   _elapsed_time_ms               = 0.0;
3347   _termination_time_ms           = 0.0;
3348   _termination_start_time_ms     = 0.0;
3349 
3350 #if _MARKING_STATS_
3351   _local_pushes                  = 0;
3352   _local_pops                    = 0;
3353   _local_max_size                = 0;
3354   _objs_scanned                  = 0;
3355   _global_pushes                 = 0;
3356   _global_pops                   = 0;
3357   _global_max_size               = 0;
3358   _global_transfers_to           = 0;
3359   _global_transfers_from         = 0;
3360   _regions_claimed               = 0;
3361   _objs_found_on_bitmap          = 0;
3362   _satb_buffers_processed        = 0;
3363   _steal_attempts                = 0;
3364   _steals                        = 0;
3365   _aborted                       = 0;
3366   _aborted_overflow              = 0;
3367   _aborted_cm_aborted            = 0;
3368   _aborted_yield                 = 0;
3369   _aborted_timed_out             = 0;
3370   _aborted_satb                  = 0;
3371   _aborted_termination           = 0;
3372 #endif // _MARKING_STATS_
3373 }
3374 
3375 bool CMTask::should_exit_termination() {
3376   regular_clock_call();
3377   // This is called when we are in the termination protocol. We should
3378   // quit if, for some reason, this task wants to abort or the global
3379   // stack is not empty (this means that we can get work from it).
3380   return !_cm->mark_stack_empty() || has_aborted();
3381 }
3382 
3383 void CMTask::reached_limit() {
3384   assert(_words_scanned >= _words_scanned_limit ||
3385          _refs_reached >= _refs_reached_limit ,
3386          "shouldn't have been called otherwise");
3387   regular_clock_call();
3388 }
3389 
3390 void CMTask::regular_clock_call() {
3391   if (has_aborted()) return;
3392 
3393   // First, we need to recalculate the words scanned and refs reached
3394   // limits for the next clock call.
3395   recalculate_limits();
3396 
3397   // During the regular clock call we do the following
3398 
3399   // (1) If an overflow has been flagged, then we abort.
3400   if (_cm->has_overflown()) {
3401     set_has_aborted();
3402     return;
3403   }
3404 
3405   // If we are not concurrent (i.e. we're doing remark) we don't need
3406   // to check anything else. The other steps are only needed during
3407   // the concurrent marking phase.
3408   if (!concurrent()) return;
3409 
3410   // (2) If marking has been aborted for Full GC, then we also abort.
3411   if (_cm->has_aborted()) {
3412     set_has_aborted();
3413     statsOnly( ++_aborted_cm_aborted );
3414     return;
3415   }
3416 
3417   double curr_time_ms = os::elapsedVTime() * 1000.0;
3418 
3419   // (3) If marking stats are enabled, then we update the step history.
3420 #if _MARKING_STATS_
3421   if (_words_scanned >= _words_scanned_limit) {
3422     ++_clock_due_to_scanning;
3423   }
3424   if (_refs_reached >= _refs_reached_limit) {
3425     ++_clock_due_to_marking;
3426   }
3427 
3428   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3429   _interval_start_time_ms = curr_time_ms;
3430   _all_clock_intervals_ms.add(last_interval_ms);
3431 
3432   if (_cm->verbose_medium()) {
3433       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3434                         "scanned = %d%s, refs reached = %d%s",
3435                         _task_id, last_interval_ms,
3436                         _words_scanned,
3437                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3438                         _refs_reached,
3439                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3440   }
3441 #endif // _MARKING_STATS_
3442 
3443   // (4) We check whether we should yield. If we have to, then we abort.
3444   if (_cm->should_yield()) {
3445     // We should yield. To do this we abort the task. The caller is
3446     // responsible for yielding.
3447     set_has_aborted();
3448     statsOnly( ++_aborted_yield );
3449     return;
3450   }
3451 
3452   // (5) We check whether we've reached our time quota. If we have,
3453   // then we abort.
3454   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3455   if (elapsed_time_ms > _time_target_ms) {
3456     set_has_aborted();
3457     _has_timed_out = true;
3458     statsOnly( ++_aborted_timed_out );
3459     return;
3460   }
3461 
3462   // (6) Finally, we check whether there are enough completed STAB
3463   // buffers available for processing. If there are, we abort.
3464   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3465   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3466     if (_cm->verbose_low()) {
3467       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3468                              _task_id);
3469     }
3470     // we do need to process SATB buffers, we'll abort and restart
3471     // the marking task to do so
3472     set_has_aborted();
3473     statsOnly( ++_aborted_satb );
3474     return;
3475   }
3476 }
3477 
3478 void CMTask::recalculate_limits() {
3479   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3480   _words_scanned_limit      = _real_words_scanned_limit;
3481 
3482   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3483   _refs_reached_limit       = _real_refs_reached_limit;
3484 }
3485 
3486 void CMTask::decrease_limits() {
3487   // This is called when we believe that we're going to do an infrequent
3488   // operation which will increase the per byte scanned cost (i.e. move
3489   // entries to/from the global stack). It basically tries to decrease the
3490   // scanning limit so that the clock is called earlier.
3491 
3492   if (_cm->verbose_medium()) {
3493     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3494   }
3495 
3496   _words_scanned_limit = _real_words_scanned_limit -
3497     3 * words_scanned_period / 4;
3498   _refs_reached_limit  = _real_refs_reached_limit -
3499     3 * refs_reached_period / 4;
3500 }
3501 
3502 void CMTask::move_entries_to_global_stack() {
3503   // local array where we'll store the entries that will be popped
3504   // from the local queue
3505   oop buffer[global_stack_transfer_size];
3506 
3507   int n = 0;
3508   oop obj;
3509   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3510     buffer[n] = obj;
3511     ++n;
3512   }
3513 
3514   if (n > 0) {
3515     // we popped at least one entry from the local queue
3516 
3517     statsOnly( ++_global_transfers_to; _local_pops += n );
3518 
3519     if (!_cm->mark_stack_push(buffer, n)) {
3520       if (_cm->verbose_low()) {
3521         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3522                                _task_id);
3523       }
3524       set_has_aborted();
3525     } else {
3526       // the transfer was successful
3527 
3528       if (_cm->verbose_medium()) {
3529         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3530                                _task_id, n);
3531       }
3532       statsOnly( int tmp_size = _cm->mark_stack_size();
3533                  if (tmp_size > _global_max_size) {
3534                    _global_max_size = tmp_size;
3535                  }
3536                  _global_pushes += n );
3537     }
3538   }
3539 
3540   // this operation was quite expensive, so decrease the limits
3541   decrease_limits();
3542 }
3543 
3544 void CMTask::get_entries_from_global_stack() {
3545   // local array where we'll store the entries that will be popped
3546   // from the global stack.
3547   oop buffer[global_stack_transfer_size];
3548   int n;
3549   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3550   assert(n <= global_stack_transfer_size,
3551          "we should not pop more than the given limit");
3552   if (n > 0) {
3553     // yes, we did actually pop at least one entry
3554 
3555     statsOnly( ++_global_transfers_from; _global_pops += n );
3556     if (_cm->verbose_medium()) {
3557       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3558                              _task_id, n);
3559     }
3560     for (int i = 0; i < n; ++i) {
3561       bool success = _task_queue->push(buffer[i]);
3562       // We only call this when the local queue is empty or under a
3563       // given target limit. So, we do not expect this push to fail.
3564       assert(success, "invariant");
3565     }
3566 
3567     statsOnly( int tmp_size = _task_queue->size();
3568                if (tmp_size > _local_max_size) {
3569                  _local_max_size = tmp_size;
3570                }
3571                _local_pushes += n );
3572   }
3573 
3574   // this operation was quite expensive, so decrease the limits
3575   decrease_limits();
3576 }
3577 
3578 void CMTask::drain_local_queue(bool partially) {
3579   if (has_aborted()) return;
3580 
3581   // Decide what the target size is, depending whether we're going to
3582   // drain it partially (so that other tasks can steal if they run out
3583   // of things to do) or totally (at the very end).
3584   size_t target_size;
3585   if (partially) {
3586     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3587   } else {
3588     target_size = 0;
3589   }
3590 
3591   if (_task_queue->size() > target_size) {
3592     if (_cm->verbose_high()) {
3593       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3594                              _task_id, target_size);
3595     }
3596 
3597     oop obj;
3598     bool ret = _task_queue->pop_local(obj);
3599     while (ret) {
3600       statsOnly( ++_local_pops );
3601 
3602       if (_cm->verbose_high()) {
3603         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3604                                (void*) obj);
3605       }
3606 
3607       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3608       assert(!_g1h->is_on_master_free_list(
3609                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3610 
3611       scan_object(obj);
3612 
3613       if (_task_queue->size() <= target_size || has_aborted()) {
3614         ret = false;
3615       } else {
3616         ret = _task_queue->pop_local(obj);
3617       }
3618     }
3619 
3620     if (_cm->verbose_high()) {
3621       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3622                              _task_id, _task_queue->size());
3623     }
3624   }
3625 }
3626 
3627 void CMTask::drain_global_stack(bool partially) {
3628   if (has_aborted()) return;
3629 
3630   // We have a policy to drain the local queue before we attempt to
3631   // drain the global stack.
3632   assert(partially || _task_queue->size() == 0, "invariant");
3633 
3634   // Decide what the target size is, depending whether we're going to
3635   // drain it partially (so that other tasks can steal if they run out
3636   // of things to do) or totally (at the very end).  Notice that,
3637   // because we move entries from the global stack in chunks or
3638   // because another task might be doing the same, we might in fact
3639   // drop below the target. But, this is not a problem.
3640   size_t target_size;
3641   if (partially) {
3642     target_size = _cm->partial_mark_stack_size_target();
3643   } else {
3644     target_size = 0;
3645   }
3646 
3647   if (_cm->mark_stack_size() > target_size) {
3648     if (_cm->verbose_low()) {
3649       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3650                              _task_id, target_size);
3651     }
3652 
3653     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3654       get_entries_from_global_stack();
3655       drain_local_queue(partially);
3656     }
3657 
3658     if (_cm->verbose_low()) {
3659       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3660                              _task_id, _cm->mark_stack_size());
3661     }
3662   }
3663 }
3664 
3665 // SATB Queue has several assumptions on whether to call the par or
3666 // non-par versions of the methods. this is why some of the code is
3667 // replicated. We should really get rid of the single-threaded version
3668 // of the code to simplify things.
3669 void CMTask::drain_satb_buffers() {
3670   if (has_aborted()) return;
3671 
3672   // We set this so that the regular clock knows that we're in the
3673   // middle of draining buffers and doesn't set the abort flag when it
3674   // notices that SATB buffers are available for draining. It'd be
3675   // very counter productive if it did that. :-)
3676   _draining_satb_buffers = true;
3677 
3678   CMObjectClosure oc(this);
3679   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3680   if (G1CollectedHeap::use_parallel_gc_threads()) {
3681     satb_mq_set.set_par_closure(_task_id, &oc);
3682   } else {
3683     satb_mq_set.set_closure(&oc);
3684   }
3685 
3686   // This keeps claiming and applying the closure to completed buffers
3687   // until we run out of buffers or we need to abort.
3688   if (G1CollectedHeap::use_parallel_gc_threads()) {
3689     while (!has_aborted() &&
3690            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3691       if (_cm->verbose_medium()) {
3692         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3693       }
3694       statsOnly( ++_satb_buffers_processed );
3695       regular_clock_call();
3696     }
3697   } else {
3698     while (!has_aborted() &&
3699            satb_mq_set.apply_closure_to_completed_buffer()) {
3700       if (_cm->verbose_medium()) {
3701         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3702       }
3703       statsOnly( ++_satb_buffers_processed );
3704       regular_clock_call();
3705     }
3706   }
3707 
3708   if (!concurrent() && !has_aborted()) {
3709     // We should only do this during remark.
3710     if (G1CollectedHeap::use_parallel_gc_threads()) {
3711       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3712     } else {
3713       satb_mq_set.iterate_closure_all_threads();
3714     }
3715   }
3716 
3717   _draining_satb_buffers = false;
3718 
3719   assert(has_aborted() ||
3720          concurrent() ||
3721          satb_mq_set.completed_buffers_num() == 0, "invariant");
3722 
3723   if (G1CollectedHeap::use_parallel_gc_threads()) {
3724     satb_mq_set.set_par_closure(_task_id, NULL);
3725   } else {
3726     satb_mq_set.set_closure(NULL);
3727   }
3728 
3729   // again, this was a potentially expensive operation, decrease the
3730   // limits to get the regular clock call early
3731   decrease_limits();
3732 }
3733 
3734 void CMTask::print_stats() {
3735   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3736                          _task_id, _calls);
3737   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3738                          _elapsed_time_ms, _termination_time_ms);
3739   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3740                          _step_times_ms.num(), _step_times_ms.avg(),
3741                          _step_times_ms.sd());
3742   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3743                          _step_times_ms.maximum(), _step_times_ms.sum());
3744 
3745 #if _MARKING_STATS_
3746   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3747                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3748                          _all_clock_intervals_ms.sd());
3749   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3750                          _all_clock_intervals_ms.maximum(),
3751                          _all_clock_intervals_ms.sum());
3752   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3753                          _clock_due_to_scanning, _clock_due_to_marking);
3754   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3755                          _objs_scanned, _objs_found_on_bitmap);
3756   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3757                          _local_pushes, _local_pops, _local_max_size);
3758   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3759                          _global_pushes, _global_pops, _global_max_size);
3760   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3761                          _global_transfers_to,_global_transfers_from);
3762   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3763   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3764   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3765                          _steal_attempts, _steals);
3766   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3767   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3768                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3769   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3770                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3771 #endif // _MARKING_STATS_
3772 }
3773 
3774 /*****************************************************************************
3775 
3776     The do_marking_step(time_target_ms) method is the building block
3777     of the parallel marking framework. It can be called in parallel
3778     with other invocations of do_marking_step() on different tasks
3779     (but only one per task, obviously) and concurrently with the
3780     mutator threads, or during remark, hence it eliminates the need
3781     for two versions of the code. When called during remark, it will
3782     pick up from where the task left off during the concurrent marking
3783     phase. Interestingly, tasks are also claimable during evacuation
3784     pauses too, since do_marking_step() ensures that it aborts before
3785     it needs to yield.
3786 
3787     The data structures that is uses to do marking work are the
3788     following:
3789 
3790       (1) Marking Bitmap. If there are gray objects that appear only
3791       on the bitmap (this happens either when dealing with an overflow
3792       or when the initial marking phase has simply marked the roots
3793       and didn't push them on the stack), then tasks claim heap
3794       regions whose bitmap they then scan to find gray objects. A
3795       global finger indicates where the end of the last claimed region
3796       is. A local finger indicates how far into the region a task has
3797       scanned. The two fingers are used to determine how to gray an
3798       object (i.e. whether simply marking it is OK, as it will be
3799       visited by a task in the future, or whether it needs to be also
3800       pushed on a stack).
3801 
3802       (2) Local Queue. The local queue of the task which is accessed
3803       reasonably efficiently by the task. Other tasks can steal from
3804       it when they run out of work. Throughout the marking phase, a
3805       task attempts to keep its local queue short but not totally
3806       empty, so that entries are available for stealing by other
3807       tasks. Only when there is no more work, a task will totally
3808       drain its local queue.
3809 
3810       (3) Global Mark Stack. This handles local queue overflow. During
3811       marking only sets of entries are moved between it and the local
3812       queues, as access to it requires a mutex and more fine-grain
3813       interaction with it which might cause contention. If it
3814       overflows, then the marking phase should restart and iterate
3815       over the bitmap to identify gray objects. Throughout the marking
3816       phase, tasks attempt to keep the global mark stack at a small
3817       length but not totally empty, so that entries are available for
3818       popping by other tasks. Only when there is no more work, tasks
3819       will totally drain the global mark stack.
3820 
3821       (4) SATB Buffer Queue. This is where completed SATB buffers are
3822       made available. Buffers are regularly removed from this queue
3823       and scanned for roots, so that the queue doesn't get too
3824       long. During remark, all completed buffers are processed, as
3825       well as the filled in parts of any uncompleted buffers.
3826 
3827     The do_marking_step() method tries to abort when the time target
3828     has been reached. There are a few other cases when the
3829     do_marking_step() method also aborts:
3830 
3831       (1) When the marking phase has been aborted (after a Full GC).
3832 
3833       (2) When a global overflow (on the global stack) has been
3834       triggered. Before the task aborts, it will actually sync up with
3835       the other tasks to ensure that all the marking data structures
3836       (local queues, stacks, fingers etc.)  are re-initialised so that
3837       when do_marking_step() completes, the marking phase can
3838       immediately restart.
3839 
3840       (3) When enough completed SATB buffers are available. The
3841       do_marking_step() method only tries to drain SATB buffers right
3842       at the beginning. So, if enough buffers are available, the
3843       marking step aborts and the SATB buffers are processed at
3844       the beginning of the next invocation.
3845 
3846       (4) To yield. when we have to yield then we abort and yield
3847       right at the end of do_marking_step(). This saves us from a lot
3848       of hassle as, by yielding we might allow a Full GC. If this
3849       happens then objects will be compacted underneath our feet, the
3850       heap might shrink, etc. We save checking for this by just
3851       aborting and doing the yield right at the end.
3852 
3853     From the above it follows that the do_marking_step() method should
3854     be called in a loop (or, otherwise, regularly) until it completes.
3855 
3856     If a marking step completes without its has_aborted() flag being
3857     true, it means it has completed the current marking phase (and
3858     also all other marking tasks have done so and have all synced up).
3859 
3860     A method called regular_clock_call() is invoked "regularly" (in
3861     sub ms intervals) throughout marking. It is this clock method that
3862     checks all the abort conditions which were mentioned above and
3863     decides when the task should abort. A work-based scheme is used to
3864     trigger this clock method: when the number of object words the
3865     marking phase has scanned or the number of references the marking
3866     phase has visited reach a given limit. Additional invocations to
3867     the method clock have been planted in a few other strategic places
3868     too. The initial reason for the clock method was to avoid calling
3869     vtime too regularly, as it is quite expensive. So, once it was in
3870     place, it was natural to piggy-back all the other conditions on it
3871     too and not constantly check them throughout the code.
3872 
3873  *****************************************************************************/
3874 
3875 void CMTask::do_marking_step(double time_target_ms,
3876                              bool do_stealing,
3877                              bool do_termination) {
3878   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3879   assert(concurrent() == _cm->concurrent(), "they should be the same");
3880 
3881   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3882   assert(_task_queues != NULL, "invariant");
3883   assert(_task_queue != NULL, "invariant");
3884   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3885 
3886   assert(!_claimed,
3887          "only one thread should claim this task at any one time");
3888 
3889   // OK, this doesn't safeguard again all possible scenarios, as it is
3890   // possible for two threads to set the _claimed flag at the same
3891   // time. But it is only for debugging purposes anyway and it will
3892   // catch most problems.
3893   _claimed = true;
3894 
3895   _start_time_ms = os::elapsedVTime() * 1000.0;
3896   statsOnly( _interval_start_time_ms = _start_time_ms );
3897 
3898   double diff_prediction_ms =
3899     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3900   _time_target_ms = time_target_ms - diff_prediction_ms;
3901 
3902   // set up the variables that are used in the work-based scheme to
3903   // call the regular clock method
3904   _words_scanned = 0;
3905   _refs_reached  = 0;
3906   recalculate_limits();
3907 
3908   // clear all flags
3909   clear_has_aborted();
3910   _has_timed_out = false;
3911   _draining_satb_buffers = false;
3912 
3913   ++_calls;
3914 
3915   if (_cm->verbose_low()) {
3916     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3917                            "target = %1.2lfms >>>>>>>>>>",
3918                            _task_id, _calls, _time_target_ms);
3919   }
3920 
3921   // Set up the bitmap and oop closures. Anything that uses them is
3922   // eventually called from this method, so it is OK to allocate these
3923   // statically.
3924   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3925   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3926   set_cm_oop_closure(&cm_oop_closure);
3927 
3928   if (_cm->has_overflown()) {
3929     // This can happen if the mark stack overflows during a GC pause
3930     // and this task, after a yield point, restarts. We have to abort
3931     // as we need to get into the overflow protocol which happens
3932     // right at the end of this task.
3933     set_has_aborted();
3934   }
3935 
3936   // First drain any available SATB buffers. After this, we will not
3937   // look at SATB buffers before the next invocation of this method.
3938   // If enough completed SATB buffers are queued up, the regular clock
3939   // will abort this task so that it restarts.
3940   drain_satb_buffers();
3941   // ...then partially drain the local queue and the global stack
3942   drain_local_queue(true);
3943   drain_global_stack(true);
3944 
3945   do {
3946     if (!has_aborted() && _curr_region != NULL) {
3947       // This means that we're already holding on to a region.
3948       assert(_finger != NULL, "if region is not NULL, then the finger "
3949              "should not be NULL either");
3950 
3951       // We might have restarted this task after an evacuation pause
3952       // which might have evacuated the region we're holding on to
3953       // underneath our feet. Let's read its limit again to make sure
3954       // that we do not iterate over a region of the heap that
3955       // contains garbage (update_region_limit() will also move
3956       // _finger to the start of the region if it is found empty).
3957       update_region_limit();
3958       // We will start from _finger not from the start of the region,
3959       // as we might be restarting this task after aborting half-way
3960       // through scanning this region. In this case, _finger points to
3961       // the address where we last found a marked object. If this is a
3962       // fresh region, _finger points to start().
3963       MemRegion mr = MemRegion(_finger, _region_limit);
3964 
3965       if (_cm->verbose_low()) {
3966         gclog_or_tty->print_cr("[%d] we're scanning part "
3967                                "["PTR_FORMAT", "PTR_FORMAT") "
3968                                "of region "PTR_FORMAT,
3969                                _task_id, _finger, _region_limit, _curr_region);
3970       }
3971 
3972       // Let's iterate over the bitmap of the part of the
3973       // region that is left.
3974       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3975         // We successfully completed iterating over the region. Now,
3976         // let's give up the region.
3977         giveup_current_region();
3978         regular_clock_call();
3979       } else {
3980         assert(has_aborted(), "currently the only way to do so");
3981         // The only way to abort the bitmap iteration is to return
3982         // false from the do_bit() method. However, inside the
3983         // do_bit() method we move the _finger to point to the
3984         // object currently being looked at. So, if we bail out, we
3985         // have definitely set _finger to something non-null.
3986         assert(_finger != NULL, "invariant");
3987 
3988         // Region iteration was actually aborted. So now _finger
3989         // points to the address of the object we last scanned. If we
3990         // leave it there, when we restart this task, we will rescan
3991         // the object. It is easy to avoid this. We move the finger by
3992         // enough to point to the next possible object header (the
3993         // bitmap knows by how much we need to move it as it knows its
3994         // granularity).
3995         assert(_finger < _region_limit, "invariant");
3996         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3997         // Check if bitmap iteration was aborted while scanning the last object
3998         if (new_finger >= _region_limit) {
3999           giveup_current_region();
4000         } else {
4001           move_finger_to(new_finger);
4002         }
4003       }
4004     }
4005     // At this point we have either completed iterating over the
4006     // region we were holding on to, or we have aborted.
4007 
4008     // We then partially drain the local queue and the global stack.
4009     // (Do we really need this?)
4010     drain_local_queue(true);
4011     drain_global_stack(true);
4012 
4013     // Read the note on the claim_region() method on why it might
4014     // return NULL with potentially more regions available for
4015     // claiming and why we have to check out_of_regions() to determine
4016     // whether we're done or not.
4017     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4018       // We are going to try to claim a new region. We should have
4019       // given up on the previous one.
4020       // Separated the asserts so that we know which one fires.
4021       assert(_curr_region  == NULL, "invariant");
4022       assert(_finger       == NULL, "invariant");
4023       assert(_region_limit == NULL, "invariant");
4024       if (_cm->verbose_low()) {
4025         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4026       }
4027       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4028       if (claimed_region != NULL) {
4029         // Yes, we managed to claim one
4030         statsOnly( ++_regions_claimed );
4031 
4032         if (_cm->verbose_low()) {
4033           gclog_or_tty->print_cr("[%d] we successfully claimed "
4034                                  "region "PTR_FORMAT,
4035                                  _task_id, claimed_region);
4036         }
4037 
4038         setup_for_region(claimed_region);
4039         assert(_curr_region == claimed_region, "invariant");
4040       }
4041       // It is important to call the regular clock here. It might take
4042       // a while to claim a region if, for example, we hit a large
4043       // block of empty regions. So we need to call the regular clock
4044       // method once round the loop to make sure it's called
4045       // frequently enough.
4046       regular_clock_call();
4047     }
4048 
4049     if (!has_aborted() && _curr_region == NULL) {
4050       assert(_cm->out_of_regions(),
4051              "at this point we should be out of regions");
4052     }
4053   } while ( _curr_region != NULL && !has_aborted());
4054 
4055   if (!has_aborted()) {
4056     // We cannot check whether the global stack is empty, since other
4057     // tasks might be pushing objects to it concurrently.
4058     assert(_cm->out_of_regions(),
4059            "at this point we should be out of regions");
4060 
4061     if (_cm->verbose_low()) {
4062       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4063     }
4064 
4065     // Try to reduce the number of available SATB buffers so that
4066     // remark has less work to do.
4067     drain_satb_buffers();
4068   }
4069 
4070   // Since we've done everything else, we can now totally drain the
4071   // local queue and global stack.
4072   drain_local_queue(false);
4073   drain_global_stack(false);
4074 
4075   // Attempt at work stealing from other task's queues.
4076   if (do_stealing && !has_aborted()) {
4077     // We have not aborted. This means that we have finished all that
4078     // we could. Let's try to do some stealing...
4079 
4080     // We cannot check whether the global stack is empty, since other
4081     // tasks might be pushing objects to it concurrently.
4082     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4083            "only way to reach here");
4084 
4085     if (_cm->verbose_low()) {
4086       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4087     }
4088 
4089     while (!has_aborted()) {
4090       oop obj;
4091       statsOnly( ++_steal_attempts );
4092 
4093       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4094         if (_cm->verbose_medium()) {
4095           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4096                                  _task_id, (void*) obj);
4097         }
4098 
4099         statsOnly( ++_steals );
4100 
4101         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4102                "any stolen object should be marked");
4103         scan_object(obj);
4104 
4105         // And since we're towards the end, let's totally drain the
4106         // local queue and global stack.
4107         drain_local_queue(false);
4108         drain_global_stack(false);
4109       } else {
4110         break;
4111       }
4112     }
4113   }
4114 
4115   // If we are about to wrap up and go into termination, check if we
4116   // should raise the overflow flag.
4117   if (do_termination && !has_aborted()) {
4118     if (_cm->force_overflow()->should_force()) {
4119       _cm->set_has_overflown();
4120       regular_clock_call();
4121     }
4122   }
4123 
4124   // We still haven't aborted. Now, let's try to get into the
4125   // termination protocol.
4126   if (do_termination && !has_aborted()) {
4127     // We cannot check whether the global stack is empty, since other
4128     // tasks might be concurrently pushing objects on it.
4129     // Separated the asserts so that we know which one fires.
4130     assert(_cm->out_of_regions(), "only way to reach here");
4131     assert(_task_queue->size() == 0, "only way to reach here");
4132 
4133     if (_cm->verbose_low()) {
4134       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4135     }
4136 
4137     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4138     // The CMTask class also extends the TerminatorTerminator class,
4139     // hence its should_exit_termination() method will also decide
4140     // whether to exit the termination protocol or not.
4141     bool finished = _cm->terminator()->offer_termination(this);
4142     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4143     _termination_time_ms +=
4144       termination_end_time_ms - _termination_start_time_ms;
4145 
4146     if (finished) {
4147       // We're all done.
4148 
4149       if (_task_id == 0) {
4150         // let's allow task 0 to do this
4151         if (concurrent()) {
4152           assert(_cm->concurrent_marking_in_progress(), "invariant");
4153           // we need to set this to false before the next
4154           // safepoint. This way we ensure that the marking phase
4155           // doesn't observe any more heap expansions.
4156           _cm->clear_concurrent_marking_in_progress();
4157         }
4158       }
4159 
4160       // We can now guarantee that the global stack is empty, since
4161       // all other tasks have finished. We separated the guarantees so
4162       // that, if a condition is false, we can immediately find out
4163       // which one.
4164       guarantee(_cm->out_of_regions(), "only way to reach here");
4165       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4166       guarantee(_task_queue->size() == 0, "only way to reach here");
4167       guarantee(!_cm->has_overflown(), "only way to reach here");
4168       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4169 
4170       if (_cm->verbose_low()) {
4171         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4172       }
4173     } else {
4174       // Apparently there's more work to do. Let's abort this task. It
4175       // will restart it and we can hopefully find more things to do.
4176 
4177       if (_cm->verbose_low()) {
4178         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4179                                _task_id);
4180       }
4181 
4182       set_has_aborted();
4183       statsOnly( ++_aborted_termination );
4184     }
4185   }
4186 
4187   // Mainly for debugging purposes to make sure that a pointer to the
4188   // closure which was statically allocated in this frame doesn't
4189   // escape it by accident.
4190   set_cm_oop_closure(NULL);
4191   double end_time_ms = os::elapsedVTime() * 1000.0;
4192   double elapsed_time_ms = end_time_ms - _start_time_ms;
4193   // Update the step history.
4194   _step_times_ms.add(elapsed_time_ms);
4195 
4196   if (has_aborted()) {
4197     // The task was aborted for some reason.
4198 
4199     statsOnly( ++_aborted );
4200 
4201     if (_has_timed_out) {
4202       double diff_ms = elapsed_time_ms - _time_target_ms;
4203       // Keep statistics of how well we did with respect to hitting
4204       // our target only if we actually timed out (if we aborted for
4205       // other reasons, then the results might get skewed).
4206       _marking_step_diffs_ms.add(diff_ms);
4207     }
4208 
4209     if (_cm->has_overflown()) {
4210       // This is the interesting one. We aborted because a global
4211       // overflow was raised. This means we have to restart the
4212       // marking phase and start iterating over regions. However, in
4213       // order to do this we have to make sure that all tasks stop
4214       // what they are doing and re-initialise in a safe manner. We
4215       // will achieve this with the use of two barrier sync points.
4216 
4217       if (_cm->verbose_low()) {
4218         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4219       }
4220 
4221       _cm->enter_first_sync_barrier(_task_id);
4222       // When we exit this sync barrier we know that all tasks have
4223       // stopped doing marking work. So, it's now safe to
4224       // re-initialise our data structures. At the end of this method,
4225       // task 0 will clear the global data structures.
4226 
4227       statsOnly( ++_aborted_overflow );
4228 
4229       // We clear the local state of this task...
4230       clear_region_fields();
4231 
4232       // ...and enter the second barrier.
4233       _cm->enter_second_sync_barrier(_task_id);
4234       // At this point everything has bee re-initialised and we're
4235       // ready to restart.
4236     }
4237 
4238     if (_cm->verbose_low()) {
4239       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4240                              "elapsed = %1.2lfms <<<<<<<<<<",
4241                              _task_id, _time_target_ms, elapsed_time_ms);
4242       if (_cm->has_aborted()) {
4243         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4244                                _task_id);
4245       }
4246     }
4247   } else {
4248     if (_cm->verbose_low()) {
4249       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4250                              "elapsed = %1.2lfms <<<<<<<<<<",
4251                              _task_id, _time_target_ms, elapsed_time_ms);
4252     }
4253   }
4254 
4255   _claimed = false;
4256 }
4257 
4258 CMTask::CMTask(int task_id,
4259                ConcurrentMark* cm,
4260                size_t* marked_bytes,
4261                BitMap* card_bm,
4262                CMTaskQueue* task_queue,
4263                CMTaskQueueSet* task_queues)
4264   : _g1h(G1CollectedHeap::heap()),
4265     _task_id(task_id), _cm(cm),
4266     _claimed(false),
4267     _nextMarkBitMap(NULL), _hash_seed(17),
4268     _task_queue(task_queue),
4269     _task_queues(task_queues),
4270     _cm_oop_closure(NULL),
4271     _marked_bytes_array(marked_bytes),
4272     _card_bm(card_bm) {
4273   guarantee(task_queue != NULL, "invariant");
4274   guarantee(task_queues != NULL, "invariant");
4275 
4276   statsOnly( _clock_due_to_scanning = 0;
4277              _clock_due_to_marking  = 0 );
4278 
4279   _marking_step_diffs_ms.add(0.5);
4280 }
4281 
4282 // These are formatting macros that are used below to ensure
4283 // consistent formatting. The *_H_* versions are used to format the
4284 // header for a particular value and they should be kept consistent
4285 // with the corresponding macro. Also note that most of the macros add
4286 // the necessary white space (as a prefix) which makes them a bit
4287 // easier to compose.
4288 
4289 // All the output lines are prefixed with this string to be able to
4290 // identify them easily in a large log file.
4291 #define G1PPRL_LINE_PREFIX            "###"
4292 
4293 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4294 #ifdef _LP64
4295 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4296 #else // _LP64
4297 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4298 #endif // _LP64
4299 
4300 // For per-region info
4301 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4302 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4303 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4304 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4305 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4306 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4307 
4308 // For summary info
4309 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4310 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4311 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4312 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4313 
4314 G1PrintRegionLivenessInfoClosure::
4315 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4316   : _out(out),
4317     _total_used_bytes(0), _total_capacity_bytes(0),
4318     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4319     _hum_used_bytes(0), _hum_capacity_bytes(0),
4320     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4321   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4322   MemRegion g1_committed = g1h->g1_committed();
4323   MemRegion g1_reserved = g1h->g1_reserved();
4324   double now = os::elapsedTime();
4325 
4326   // Print the header of the output.
4327   _out->cr();
4328   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4329   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4330                  G1PPRL_SUM_ADDR_FORMAT("committed")
4331                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4332                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4333                  g1_committed.start(), g1_committed.end(),
4334                  g1_reserved.start(), g1_reserved.end(),
4335                  HeapRegion::GrainBytes);
4336   _out->print_cr(G1PPRL_LINE_PREFIX);
4337   _out->print_cr(G1PPRL_LINE_PREFIX
4338                  G1PPRL_TYPE_H_FORMAT
4339                  G1PPRL_ADDR_BASE_H_FORMAT
4340                  G1PPRL_BYTE_H_FORMAT
4341                  G1PPRL_BYTE_H_FORMAT
4342                  G1PPRL_BYTE_H_FORMAT
4343                  G1PPRL_DOUBLE_H_FORMAT,
4344                  "type", "address-range",
4345                  "used", "prev-live", "next-live", "gc-eff");
4346   _out->print_cr(G1PPRL_LINE_PREFIX
4347                  G1PPRL_TYPE_H_FORMAT
4348                  G1PPRL_ADDR_BASE_H_FORMAT
4349                  G1PPRL_BYTE_H_FORMAT
4350                  G1PPRL_BYTE_H_FORMAT
4351                  G1PPRL_BYTE_H_FORMAT
4352                  G1PPRL_DOUBLE_H_FORMAT,
4353                  "", "",
4354                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4355 }
4356 
4357 // It takes as a parameter a reference to one of the _hum_* fields, it
4358 // deduces the corresponding value for a region in a humongous region
4359 // series (either the region size, or what's left if the _hum_* field
4360 // is < the region size), and updates the _hum_* field accordingly.
4361 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4362   size_t bytes = 0;
4363   // The > 0 check is to deal with the prev and next live bytes which
4364   // could be 0.
4365   if (*hum_bytes > 0) {
4366     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4367     *hum_bytes -= bytes;
4368   }
4369   return bytes;
4370 }
4371 
4372 // It deduces the values for a region in a humongous region series
4373 // from the _hum_* fields and updates those accordingly. It assumes
4374 // that that _hum_* fields have already been set up from the "starts
4375 // humongous" region and we visit the regions in address order.
4376 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4377                                                      size_t* capacity_bytes,
4378                                                      size_t* prev_live_bytes,
4379                                                      size_t* next_live_bytes) {
4380   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4381   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4382   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4383   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4384   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4385 }
4386 
4387 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4388   const char* type = "";
4389   HeapWord* bottom       = r->bottom();
4390   HeapWord* end          = r->end();
4391   size_t capacity_bytes  = r->capacity();
4392   size_t used_bytes      = r->used();
4393   size_t prev_live_bytes = r->live_bytes();
4394   size_t next_live_bytes = r->next_live_bytes();
4395   double gc_eff          = r->gc_efficiency();
4396   if (r->used() == 0) {
4397     type = "FREE";
4398   } else if (r->is_survivor()) {
4399     type = "SURV";
4400   } else if (r->is_young()) {
4401     type = "EDEN";
4402   } else if (r->startsHumongous()) {
4403     type = "HUMS";
4404 
4405     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4406            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4407            "they should have been zeroed after the last time we used them");
4408     // Set up the _hum_* fields.
4409     _hum_capacity_bytes  = capacity_bytes;
4410     _hum_used_bytes      = used_bytes;
4411     _hum_prev_live_bytes = prev_live_bytes;
4412     _hum_next_live_bytes = next_live_bytes;
4413     get_hum_bytes(&used_bytes, &capacity_bytes,
4414                   &prev_live_bytes, &next_live_bytes);
4415     end = bottom + HeapRegion::GrainWords;
4416   } else if (r->continuesHumongous()) {
4417     type = "HUMC";
4418     get_hum_bytes(&used_bytes, &capacity_bytes,
4419                   &prev_live_bytes, &next_live_bytes);
4420     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4421   } else {
4422     type = "OLD";
4423   }
4424 
4425   _total_used_bytes      += used_bytes;
4426   _total_capacity_bytes  += capacity_bytes;
4427   _total_prev_live_bytes += prev_live_bytes;
4428   _total_next_live_bytes += next_live_bytes;
4429 
4430   // Print a line for this particular region.
4431   _out->print_cr(G1PPRL_LINE_PREFIX
4432                  G1PPRL_TYPE_FORMAT
4433                  G1PPRL_ADDR_BASE_FORMAT
4434                  G1PPRL_BYTE_FORMAT
4435                  G1PPRL_BYTE_FORMAT
4436                  G1PPRL_BYTE_FORMAT
4437                  G1PPRL_DOUBLE_FORMAT,
4438                  type, bottom, end,
4439                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4440 
4441   return false;
4442 }
4443 
4444 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4445   // Print the footer of the output.
4446   _out->print_cr(G1PPRL_LINE_PREFIX);
4447   _out->print_cr(G1PPRL_LINE_PREFIX
4448                  " SUMMARY"
4449                  G1PPRL_SUM_MB_FORMAT("capacity")
4450                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4451                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4452                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4453                  bytes_to_mb(_total_capacity_bytes),
4454                  bytes_to_mb(_total_used_bytes),
4455                  perc(_total_used_bytes, _total_capacity_bytes),
4456                  bytes_to_mb(_total_prev_live_bytes),
4457                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4458                  bytes_to_mb(_total_next_live_bytes),
4459                  perc(_total_next_live_bytes, _total_capacity_bytes));
4460   _out->cr();
4461 }