New src/share/vm/gc_implementation/g1/concurrentMark.cpp

   1 /*
   2  * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1Log.hpp"
  33 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  34 #include "gc_implementation/g1/g1RemSet.hpp"
  35 #include "gc_implementation/g1/heapRegion.inline.hpp"
  36 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  37 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  38 #include "gc_implementation/shared/vmGCOperations.hpp"
  39 #include "gc_implementation/shared/gcTimer.hpp"
  40 #include "gc_implementation/shared/gcTrace.hpp"
  41 #include "gc_implementation/shared/gcTraceTime.hpp"
  42 #include "memory/genOopClosures.inline.hpp"
  43 #include "memory/referencePolicy.hpp"
  44 #include "memory/resourceArea.hpp"
  45 #include "oops/oop.inline.hpp"
  46 #include "runtime/handles.inline.hpp"
  47 #include "runtime/java.hpp"
  48 #include "services/memTracker.hpp"
  49 
  50 // Concurrent marking bit map wrapper
  51 
  52 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter) :
  53   _bm((uintptr_t*)NULL,0),
  54   _shifter(shifter) {
  55   _bmStartWord = (HeapWord*)(rs.base());
  56   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  57   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  58                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  59 
  60   MemTracker::record_virtual_memory_type((address)brs.base(), mtGC);
  61 
  62   guarantee(brs.is_reserved(), "couldn't allocate concurrent marking bit map");
  63   // For now we'll just commit all of the bit map up fromt.
  64   // Later on we'll try to be more parsimonious with swap.
  65   guarantee(_virtual_space.initialize(brs, brs.size()),
  66             "couldn't reseve backing store for concurrent marking bit map");
  67   assert(_virtual_space.committed_size() == brs.size(),
  68          "didn't reserve backing store for all of concurrent marking bit map?");
  69   _bm.set_map((uintptr_t*)_virtual_space.low());
  70   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  71          _bmWordSize, "inconsistency in bit map sizing");
  72   _bm.set_size(_bmWordSize >> _shifter);
  73 }
  74 
  75 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  76                                                HeapWord* limit) const {
  77   // First we must round addr *up* to a possible object boundary.
  78   addr = (HeapWord*)align_size_up((intptr_t)addr,
  79                                   HeapWordSize << _shifter);
  80   size_t addrOffset = heapWordToOffset(addr);
  81   if (limit == NULL) {
  82     limit = _bmStartWord + _bmWordSize;
  83   }
  84   size_t limitOffset = heapWordToOffset(limit);
  85   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  86   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  87   assert(nextAddr >= addr, "get_next_one postcondition");
  88   assert(nextAddr == limit || isMarked(nextAddr),
  89          "get_next_one postcondition");
  90   return nextAddr;
  91 }
  92 
  93 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  94                                                  HeapWord* limit) const {
  95   size_t addrOffset = heapWordToOffset(addr);
  96   if (limit == NULL) {
  97     limit = _bmStartWord + _bmWordSize;
  98   }
  99   size_t limitOffset = heapWordToOffset(limit);
 100   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
 101   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
 102   assert(nextAddr >= addr, "get_next_one postcondition");
 103   assert(nextAddr == limit || !isMarked(nextAddr),
 104          "get_next_one postcondition");
 105   return nextAddr;
 106 }
 107 
 108 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 109   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 110   return (int) (diff >> _shifter);
 111 }
 112 
 113 #ifndef PRODUCT
 114 bool CMBitMapRO::covers(ReservedSpace rs) const {
 115   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 116   assert(((size_t)_bm.size() * ((size_t)1 << _shifter)) == _bmWordSize,
 117          "size inconsistency");
 118   return _bmStartWord == (HeapWord*)(rs.base()) &&
 119          _bmWordSize  == rs.size()>>LogHeapWordSize;
 120 }
 121 #endif
 122 
 123 void CMBitMap::clearAll() {
 124   _bm.clear();
 125   return;
 126 }
 127 
 128 void CMBitMap::markRange(MemRegion mr) {
 129   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 130   assert(!mr.is_empty(), "unexpected empty region");
 131   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 132           ((HeapWord *) mr.end())),
 133          "markRange memory region end is not card aligned");
 134   // convert address range into offset range
 135   _bm.at_put_range(heapWordToOffset(mr.start()),
 136                    heapWordToOffset(mr.end()), true);
 137 }
 138 
 139 void CMBitMap::clearRange(MemRegion mr) {
 140   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 141   assert(!mr.is_empty(), "unexpected empty region");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), false);
 145 }
 146 
 147 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 148                                             HeapWord* end_addr) {
 149   HeapWord* start = getNextMarkedWordAddress(addr);
 150   start = MIN2(start, end_addr);
 151   HeapWord* end   = getNextUnmarkedWordAddress(start);
 152   end = MIN2(end, end_addr);
 153   assert(start <= end, "Consistency check");
 154   MemRegion mr(start, end);
 155   if (!mr.is_empty()) {
 156     clearRange(mr);
 157   }
 158   return mr;
 159 }
 160 
 161 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 162   _base(NULL), _cm(cm)
 163 #ifdef ASSERT
 164   , _drain_in_progress(false)
 165   , _drain_in_progress_yields(false)
 166 #endif
 167 {}
 168 
 169 void CMMarkStack::allocate(size_t size) {
 170   _base = NEW_C_HEAP_ARRAY(oop, size, mtGC);
 171   if (_base == NULL) {
 172     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 173   }
 174   _index = 0;
 175   _capacity = (jint) size;
 176   _saved_index = -1;
 177   NOT_PRODUCT(_max_depth = 0);
 178 }
 179 
 180 CMMarkStack::~CMMarkStack() {
 181   if (_base != NULL) {
 182     FREE_C_HEAP_ARRAY(oop, _base, mtGC);
 183   }
 184 }
 185 
 186 void CMMarkStack::par_push(oop ptr) {
 187   while (true) {
 188     if (isFull()) {
 189       _overflow = true;
 190       return;
 191     }
 192     // Otherwise...
 193     jint index = _index;
 194     jint next_index = index+1;
 195     jint res = Atomic::cmpxchg(next_index, &_index, index);
 196     if (res == index) {
 197       _base[index] = ptr;
 198       // Note that we don't maintain this atomically.  We could, but it
 199       // doesn't seem necessary.
 200       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 201       return;
 202     }
 203     // Otherwise, we need to try again.
 204   }
 205 }
 206 
 207 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 208   while (true) {
 209     if (isFull()) {
 210       _overflow = true;
 211       return;
 212     }
 213     // Otherwise...
 214     jint index = _index;
 215     jint next_index = index + n;
 216     if (next_index > _capacity) {
 217       _overflow = true;
 218       return;
 219     }
 220     jint res = Atomic::cmpxchg(next_index, &_index, index);
 221     if (res == index) {
 222       for (int i = 0; i < n; i++) {
 223         int ind = index + i;
 224         assert(ind < _capacity, "By overflow test above.");
 225         _base[ind] = ptr_arr[i];
 226       }
 227       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 228       return;
 229     }
 230     // Otherwise, we need to try again.
 231   }
 232 }
 233 
 234 
 235 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 236   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 237   jint start = _index;
 238   jint next_index = start + n;
 239   if (next_index > _capacity) {
 240     _overflow = true;
 241     return;
 242   }
 243   // Otherwise.
 244   _index = next_index;
 245   for (int i = 0; i < n; i++) {
 246     int ind = start + i;
 247     assert(ind < _capacity, "By overflow test above.");
 248     _base[ind] = ptr_arr[i];
 249   }
 250 }
 251 
 252 
 253 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 254   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 255   jint index = _index;
 256   if (index == 0) {
 257     *n = 0;
 258     return false;
 259   } else {
 260     int k = MIN2(max, index);
 261     jint new_ind = index - k;
 262     for (int j = 0; j < k; j++) {
 263       ptr_arr[j] = _base[new_ind + j];
 264     }
 265     _index = new_ind;
 266     *n = k;
 267     return true;
 268   }
 269 }
 270 
 271 template<class OopClosureClass>
 272 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 273   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 274          || SafepointSynchronize::is_at_safepoint(),
 275          "Drain recursion must be yield-safe.");
 276   bool res = true;
 277   debug_only(_drain_in_progress = true);
 278   debug_only(_drain_in_progress_yields = yield_after);
 279   while (!isEmpty()) {
 280     oop newOop = pop();
 281     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 282     assert(newOop->is_oop(), "Expected an oop");
 283     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 284            "only grey objects on this stack");
 285     newOop->oop_iterate(cl);
 286     if (yield_after && _cm->do_yield_check()) {
 287       res = false;
 288       break;
 289     }
 290   }
 291   debug_only(_drain_in_progress = false);
 292   return res;
 293 }
 294 
 295 void CMMarkStack::note_start_of_gc() {
 296   assert(_saved_index == -1,
 297          "note_start_of_gc()/end_of_gc() bracketed incorrectly");
 298   _saved_index = _index;
 299 }
 300 
 301 void CMMarkStack::note_end_of_gc() {
 302   // This is intentionally a guarantee, instead of an assert. If we
 303   // accidentally add something to the mark stack during GC, it
 304   // will be a correctness issue so it's better if we crash. we'll
 305   // only check this once per GC anyway, so it won't be a performance
 306   // issue in any way.
 307   guarantee(_saved_index == _index,
 308             err_msg("saved index: %d index: %d", _saved_index, _index));
 309   _saved_index = -1;
 310 }
 311 
 312 void CMMarkStack::oops_do(OopClosure* f) {
 313   assert(_saved_index == _index,
 314          err_msg("saved index: %d index: %d", _saved_index, _index));
 315   for (int i = 0; i < _index; i += 1) {
 316     f->do_oop(&_base[i]);
 317   }
 318 }
 319 
 320 bool ConcurrentMark::not_yet_marked(oop obj) const {
 321   return (_g1h->is_obj_ill(obj)
 322           || (_g1h->is_in_permanent(obj)
 323               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 324 }
 325 
 326 CMRootRegions::CMRootRegions() :
 327   _young_list(NULL), _cm(NULL), _scan_in_progress(false),
 328   _should_abort(false),  _next_survivor(NULL) { }
 329 
 330 void CMRootRegions::init(G1CollectedHeap* g1h, ConcurrentMark* cm) {
 331   _young_list = g1h->young_list();
 332   _cm = cm;
 333 }
 334 
 335 void CMRootRegions::prepare_for_scan() {
 336   assert(!scan_in_progress(), "pre-condition");
 337 
 338   // Currently, only survivors can be root regions.
 339   assert(_next_survivor == NULL, "pre-condition");
 340   _next_survivor = _young_list->first_survivor_region();
 341   _scan_in_progress = (_next_survivor != NULL);
 342   _should_abort = false;
 343 }
 344 
 345 HeapRegion* CMRootRegions::claim_next() {
 346   if (_should_abort) {
 347     // If someone has set the should_abort flag, we return NULL to
 348     // force the caller to bail out of their loop.
 349     return NULL;
 350   }
 351 
 352   // Currently, only survivors can be root regions.
 353   HeapRegion* res = _next_survivor;
 354   if (res != NULL) {
 355     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 356     // Read it again in case it changed while we were waiting for the lock.
 357     res = _next_survivor;
 358     if (res != NULL) {
 359       if (res == _young_list->last_survivor_region()) {
 360         // We just claimed the last survivor so store NULL to indicate
 361         // that we're done.
 362         _next_survivor = NULL;
 363       } else {
 364         _next_survivor = res->get_next_young_region();
 365       }
 366     } else {
 367       // Someone else claimed the last survivor while we were trying
 368       // to take the lock so nothing else to do.
 369     }
 370   }
 371   assert(res == NULL || res->is_survivor(), "post-condition");
 372 
 373   return res;
 374 }
 375 
 376 void CMRootRegions::scan_finished() {
 377   assert(scan_in_progress(), "pre-condition");
 378 
 379   // Currently, only survivors can be root regions.
 380   if (!_should_abort) {
 381     assert(_next_survivor == NULL, "we should have claimed all survivors");
 382   }
 383   _next_survivor = NULL;
 384 
 385   {
 386     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 387     _scan_in_progress = false;
 388     RootRegionScan_lock->notify_all();
 389   }
 390 }
 391 
 392 bool CMRootRegions::wait_until_scan_finished() {
 393   if (!scan_in_progress()) return false;
 394 
 395   {
 396     MutexLockerEx x(RootRegionScan_lock, Mutex::_no_safepoint_check_flag);
 397     while (scan_in_progress()) {
 398       RootRegionScan_lock->wait(Mutex::_no_safepoint_check_flag);
 399     }
 400   }
 401   return true;
 402 }
 403 
 404 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 405 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 406 #endif // _MSC_VER
 407 
 408 uint ConcurrentMark::scale_parallel_threads(uint n_par_threads) {
 409   return MAX2((n_par_threads + 2) / 4, 1U);
 410 }
 411 
 412 ConcurrentMark::ConcurrentMark(ReservedSpace rs, uint max_regions) :
 413   _markBitMap1(rs, log2_intptr(MinObjAlignment)),
 414   _markBitMap2(rs, log2_intptr(MinObjAlignment)),
 415   _parallel_marking_threads(0),
 416   _max_parallel_marking_threads(0),
 417   _sleep_factor(0.0),
 418   _marking_task_overhead(1.0),
 419   _cleanup_sleep_factor(0.0),
 420   _cleanup_task_overhead(1.0),
 421   _cleanup_list("Cleanup List"),
 422   _region_bm((BitMap::idx_t) max_regions, false /* in_resource_area*/),
 423   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 424            CardTableModRefBS::card_shift,
 425            false /* in_resource_area*/),
 426 
 427   _prevMarkBitMap(&_markBitMap1),
 428   _nextMarkBitMap(&_markBitMap2),
 429 
 430   _markStack(this),
 431   // _finger set in set_non_marking_state
 432 
 433   _max_task_num(MAX2((uint)ParallelGCThreads, 1U)),
 434   // _active_tasks set in set_non_marking_state
 435   // _tasks set inside the constructor
 436   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 437   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 438 
 439   _has_overflown(false),
 440   _concurrent(false),
 441   _has_aborted(false),
 442   _restart_for_overflow(false),
 443   _concurrent_marking_in_progress(false),
 444 
 445   // _verbose_level set below
 446 
 447   _init_times(),
 448   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 449   _cleanup_times(),
 450   _total_counting_time(0.0),
 451   _total_rs_scrub_time(0.0),
 452 
 453   _parallel_workers(NULL),
 454 
 455   _count_card_bitmaps(NULL),
 456   _count_marked_bytes(NULL) {
 457   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 458   if (verbose_level < no_verbose) {
 459     verbose_level = no_verbose;
 460   }
 461   if (verbose_level > high_verbose) {
 462     verbose_level = high_verbose;
 463   }
 464   _verbose_level = verbose_level;
 465 
 466   if (verbose_low()) {
 467     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 468                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 469   }
 470 
 471   _markStack.allocate(MarkStackSize);
 472 
 473   // Create & start a ConcurrentMark thread.
 474   _cmThread = new ConcurrentMarkThread(this);
 475   assert(cmThread() != NULL, "CM Thread should have been created");
 476   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 477 
 478   _g1h = G1CollectedHeap::heap();
 479   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 480   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 481   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 482 
 483   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 484   satb_qs.set_buffer_size(G1SATBBufferSize);
 485 
 486   _root_regions.init(_g1h, this);
 487 
 488   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num, mtGC);
 489   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num, mtGC);
 490 
 491   _count_card_bitmaps = NEW_C_HEAP_ARRAY(BitMap,  _max_task_num, mtGC);
 492   _count_marked_bytes = NEW_C_HEAP_ARRAY(size_t*, _max_task_num, mtGC);
 493 
 494   BitMap::idx_t card_bm_size = _card_bm.size();
 495 
 496   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 497   _active_tasks = _max_task_num;
 498   for (int i = 0; i < (int) _max_task_num; ++i) {
 499     CMTaskQueue* task_queue = new CMTaskQueue();
 500     task_queue->initialize();
 501     _task_queues->register_queue(i, task_queue);
 502 
 503     _count_card_bitmaps[i] = BitMap(card_bm_size, false);
 504     _count_marked_bytes[i] = NEW_C_HEAP_ARRAY(size_t, (size_t) max_regions, mtGC);
 505 
 506     _tasks[i] = new CMTask(i, this,
 507                            _count_marked_bytes[i],
 508                            &_count_card_bitmaps[i],
 509                            task_queue, _task_queues);
 510 
 511     _accum_task_vtime[i] = 0.0;
 512   }
 513 
 514   // Calculate the card number for the bottom of the heap. Used
 515   // in biasing indexes into the accounting card bitmaps.
 516   _heap_bottom_card_num =
 517     intptr_t(uintptr_t(_g1h->reserved_region().start()) >>
 518                                 CardTableModRefBS::card_shift);
 519 
 520   // Clear all the liveness counting data
 521   clear_all_count_data();
 522 
 523   if (ConcGCThreads > ParallelGCThreads) {
 524     vm_exit_during_initialization("Can't have more ConcGCThreads "
 525                                   "than ParallelGCThreads.");
 526   }
 527   if (ParallelGCThreads == 0) {
 528     // if we are not running with any parallel GC threads we will not
 529     // spawn any marking threads either
 530     _parallel_marking_threads =       0;
 531     _max_parallel_marking_threads =   0;
 532     _sleep_factor             =     0.0;
 533     _marking_task_overhead    =     1.0;
 534   } else {
 535     if (!FLAG_IS_DEFAULT(ConcGCThreads) && ConcGCThreads > 0) {
 536       // Note: ConcGCThreads has precedence over G1MarkingOverheadPercent
 537       // if both are set
 538       _sleep_factor             = 0.0;
 539       _marking_task_overhead    = 1.0;
 540     } else if (G1MarkingOverheadPercent > 0) {
 541       // We will calculate the number of parallel marking threads based
 542       // on a target overhead with respect to the soft real-time goal
 543       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 544       double overall_cm_overhead =
 545         (double) MaxGCPauseMillis * marking_overhead /
 546         (double) GCPauseIntervalMillis;
 547       double cpu_ratio = 1.0 / (double) os::processor_count();
 548       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 549       double marking_task_overhead =
 550         overall_cm_overhead / marking_thread_num *
 551                                                 (double) os::processor_count();
 552       double sleep_factor =
 553                          (1.0 - marking_task_overhead) / marking_task_overhead;
 554 
 555       FLAG_SET_ERGO(uintx, ConcGCThreads, (uint) marking_thread_num);
 556       _sleep_factor             = sleep_factor;
 557       _marking_task_overhead    = marking_task_overhead;
 558     } else {
 559       // Calculate the number of parallel marking threads by scaling
 560       // the number of parallel GC threads.
 561       uint marking_thread_num = scale_parallel_threads((uint) ParallelGCThreads);
 562       FLAG_SET_ERGO(uintx, ConcGCThreads, marking_thread_num);
 563       _sleep_factor             = 0.0;
 564       _marking_task_overhead    = 1.0;
 565     }
 566 
 567     assert(ConcGCThreads > 0, "Should have been set");
 568     _parallel_marking_threads = (uint) ConcGCThreads;
 569     _max_parallel_marking_threads = _parallel_marking_threads;
 570 
 571     if (parallel_marking_threads() > 1) {
 572       _cleanup_task_overhead = 1.0;
 573     } else {
 574       _cleanup_task_overhead = marking_task_overhead();
 575     }
 576     _cleanup_sleep_factor =
 577                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 578 
 579 #if 0
 580     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 581     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 582     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 583     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 584     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 585 #endif
 586 
 587     guarantee(parallel_marking_threads() > 0, "peace of mind");
 588     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 589          _max_parallel_marking_threads, false, true);
 590     if (_parallel_workers == NULL) {
 591       vm_exit_during_initialization("Failed necessary allocation.");
 592     } else {
 593       _parallel_workers->initialize_workers();
 594     }
 595   }
 596 
 597   // so that the call below can read a sensible value
 598   _heap_start = (HeapWord*) rs.base();
 599   set_non_marking_state();
 600 }
 601 
 602 void ConcurrentMark::update_g1_committed(bool force) {
 603   // If concurrent marking is not in progress, then we do not need to
 604   // update _heap_end.
 605   if (!concurrent_marking_in_progress() && !force) return;
 606 
 607   MemRegion committed = _g1h->g1_committed();
 608   assert(committed.start() == _heap_start, "start shouldn't change");
 609   HeapWord* new_end = committed.end();
 610   if (new_end > _heap_end) {
 611     // The heap has been expanded.
 612 
 613     _heap_end = new_end;
 614   }
 615   // Notice that the heap can also shrink. However, this only happens
 616   // during a Full GC (at least currently) and the entire marking
 617   // phase will bail out and the task will not be restarted. So, let's
 618   // do nothing.
 619 }
 620 
 621 void ConcurrentMark::reset() {
 622   // Starting values for these two. This should be called in a STW
 623   // phase. CM will be notified of any future g1_committed expansions
 624   // will be at the end of evacuation pauses, when tasks are
 625   // inactive.
 626   MemRegion committed = _g1h->g1_committed();
 627   _heap_start = committed.start();
 628   _heap_end   = committed.end();
 629 
 630   // Separated the asserts so that we know which one fires.
 631   assert(_heap_start != NULL, "heap bounds should look ok");
 632   assert(_heap_end != NULL, "heap bounds should look ok");
 633   assert(_heap_start < _heap_end, "heap bounds should look ok");
 634 
 635   // Reset all the marking data structures and any necessary flags
 636   reset_marking_state();
 637 
 638   if (verbose_low()) {
 639     gclog_or_tty->print_cr("[global] resetting");
 640   }
 641 
 642   // We do reset all of them, since different phases will use
 643   // different number of active threads. So, it's easiest to have all
 644   // of them ready.
 645   for (int i = 0; i < (int) _max_task_num; ++i) {
 646     _tasks[i]->reset(_nextMarkBitMap);
 647   }
 648 
 649   // we need this to make sure that the flag is on during the evac
 650   // pause with initial mark piggy-backed
 651   set_concurrent_marking_in_progress();
 652 }
 653 
 654 
 655 void ConcurrentMark::reset_marking_state(bool clear_overflow) {
 656   _markStack.setEmpty();
 657   _markStack.clear_overflow();
 658   if (clear_overflow) {
 659     clear_has_overflown();
 660   } else {
 661     assert(has_overflown(), "pre-condition");
 662   }
 663   _finger = _heap_start;
 664 
 665   for (uint i = 0; i < _max_task_num; ++i) {
 666     CMTaskQueue* queue = _task_queues->queue(i);
 667     queue->set_empty();
 668   }
 669 }
 670 
 671 void ConcurrentMark::set_concurrency(uint active_tasks) {
 672   assert(active_tasks <= _max_task_num, "we should not have more");
 673 
 674   _active_tasks = active_tasks;
 675   // Need to update the three data structures below according to the
 676   // number of active threads for this phase.
 677   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 678   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 679   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 680 }
 681 
 682 void ConcurrentMark::set_concurrency_and_phase(uint active_tasks, bool concurrent) {
 683   set_concurrency(active_tasks);
 684 
 685   _concurrent = concurrent;
 686   // We propagate this to all tasks, not just the active ones.
 687   for (int i = 0; i < (int) _max_task_num; ++i)
 688     _tasks[i]->set_concurrent(concurrent);
 689 
 690   if (concurrent) {
 691     set_concurrent_marking_in_progress();
 692   } else {
 693     // We currently assume that the concurrent flag has been set to
 694     // false before we start remark. At this point we should also be
 695     // in a STW phase.
 696     assert(!concurrent_marking_in_progress(), "invariant");
 697     assert(_finger == _heap_end,
 698            err_msg("only way to get here: _finger: "PTR_FORMAT", _heap_end: "PTR_FORMAT,
 699                    _finger, _heap_end));
 700     update_g1_committed(true);
 701   }
 702 }
 703 
 704 void ConcurrentMark::set_non_marking_state() {
 705   // We set the global marking state to some default values when we're
 706   // not doing marking.
 707   reset_marking_state();
 708   _active_tasks = 0;
 709   clear_concurrent_marking_in_progress();
 710 }
 711 
 712 ConcurrentMark::~ConcurrentMark() {
 713   // The ConcurrentMark instance is never freed.
 714   ShouldNotReachHere();
 715 }
 716 
 717 void ConcurrentMark::clearNextBitmap() {
 718   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 719   G1CollectorPolicy* g1p = g1h->g1_policy();
 720 
 721   // Make sure that the concurrent mark thread looks to still be in
 722   // the current cycle.
 723   guarantee(cmThread()->during_cycle(), "invariant");
 724 
 725   // We are finishing up the current cycle by clearing the next
 726   // marking bitmap and getting it ready for the next cycle. During
 727   // this time no other cycle can start. So, let's make sure that this
 728   // is the case.
 729   guarantee(!g1h->mark_in_progress(), "invariant");
 730 
 731   // clear the mark bitmap (no grey objects to start with).
 732   // We need to do this in chunks and offer to yield in between
 733   // each chunk.
 734   HeapWord* start  = _nextMarkBitMap->startWord();
 735   HeapWord* end    = _nextMarkBitMap->endWord();
 736   HeapWord* cur    = start;
 737   size_t chunkSize = M;
 738   while (cur < end) {
 739     HeapWord* next = cur + chunkSize;
 740     if (next > end) {
 741       next = end;
 742     }
 743     MemRegion mr(cur,next);
 744     _nextMarkBitMap->clearRange(mr);
 745     cur = next;
 746     do_yield_check();
 747 
 748     // Repeat the asserts from above. We'll do them as asserts here to
 749     // minimize their overhead on the product. However, we'll have
 750     // them as guarantees at the beginning / end of the bitmap
 751     // clearing to get some checking in the product.
 752     assert(cmThread()->during_cycle(), "invariant");
 753     assert(!g1h->mark_in_progress(), "invariant");
 754   }
 755 
 756   // Clear the liveness counting data
 757   clear_all_count_data();
 758 
 759   // Repeat the asserts from above.
 760   guarantee(cmThread()->during_cycle(), "invariant");
 761   guarantee(!g1h->mark_in_progress(), "invariant");
 762 }
 763 
 764 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 765 public:
 766   bool doHeapRegion(HeapRegion* r) {
 767     if (!r->continuesHumongous()) {
 768       r->note_start_of_marking();
 769     }
 770     return false;
 771   }
 772 };
 773 
 774 void ConcurrentMark::checkpointRootsInitialPre() {
 775   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 776   G1CollectorPolicy* g1p = g1h->g1_policy();
 777 
 778   _has_aborted = false;
 779 
 780 #ifndef PRODUCT
 781   if (G1PrintReachableAtInitialMark) {
 782     print_reachable("at-cycle-start",
 783                     VerifyOption_G1UsePrevMarking, true /* all */);
 784   }
 785 #endif
 786 
 787   // Initialise marking structures. This has to be done in a STW phase.
 788   reset();
 789 
 790   // For each region note start of marking.
 791   NoteStartOfMarkHRClosure startcl;
 792   g1h->heap_region_iterate(&startcl);
 793 }
 794 
 795 
 796 void ConcurrentMark::checkpointRootsInitialPost() {
 797   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 798 
 799   // If we force an overflow during remark, the remark operation will
 800   // actually abort and we'll restart concurrent marking. If we always
 801   // force an oveflow during remark we'll never actually complete the
 802   // marking phase. So, we initilize this here, at the start of the
 803   // cycle, so that at the remaining overflow number will decrease at
 804   // every remark and we'll eventually not need to cause one.
 805   force_overflow_stw()->init();
 806 
 807   // Start Concurrent Marking weak-reference discovery.
 808   ReferenceProcessor* rp = g1h->ref_processor_cm();
 809   // enable ("weak") refs discovery
 810   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 811   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 812 
 813   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 814   // This is the start of  the marking cycle, we're expected all
 815   // threads to have SATB queues with active set to false.
 816   satb_mq_set.set_active_all_threads(true, /* new active value */
 817                                      false /* expected_active */);
 818 
 819   _root_regions.prepare_for_scan();
 820 
 821   // update_g1_committed() will be called at the end of an evac pause
 822   // when marking is on. So, it's also called at the end of the
 823   // initial-mark pause to update the heap end, if the heap expands
 824   // during it. No need to call it here.
 825 }
 826 
 827 /*
 828  * Notice that in the next two methods, we actually leave the STS
 829  * during the barrier sync and join it immediately afterwards. If we
 830  * do not do this, the following deadlock can occur: one thread could
 831  * be in the barrier sync code, waiting for the other thread to also
 832  * sync up, whereas another one could be trying to yield, while also
 833  * waiting for the other threads to sync up too.
 834  *
 835  * Note, however, that this code is also used during remark and in
 836  * this case we should not attempt to leave / enter the STS, otherwise
 837  * we'll either hit an asseert (debug / fastdebug) or deadlock
 838  * (product). So we should only leave / enter the STS if we are
 839  * operating concurrently.
 840  *
 841  * Because the thread that does the sync barrier has left the STS, it
 842  * is possible to be suspended for a Full GC or an evacuation pause
 843  * could occur. This is actually safe, since the entering the sync
 844  * barrier is one of the last things do_marking_step() does, and it
 845  * doesn't manipulate any data structures afterwards.
 846  */
 847 
 848 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 849   if (verbose_low()) {
 850     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 851   }
 852 
 853   if (concurrent()) {
 854     ConcurrentGCThread::stsLeave();
 855   }
 856   _first_overflow_barrier_sync.enter();
 857   if (concurrent()) {
 858     ConcurrentGCThread::stsJoin();
 859   }
 860   // at this point everyone should have synced up and not be doing any
 861   // more work
 862 
 863   if (verbose_low()) {
 864     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 865   }
 866 
 867   // If we're executing the concurrent phase of marking, reset the marking
 868   // state; otherwise the marking state is reset after reference processing,
 869   // during the remark pause.
 870   // If we reset here as a result of an overflow during the remark we will
 871   // see assertion failures from any subsequent set_concurrency_and_phase()
 872   // calls.
 873   if (concurrent()) {
 874     // let the task 0 do this
 875     if (task_num == 0) {
 876       // task 0 is responsible for clearing the global data structures
 877       // We should be here because of an overflow. During STW we should
 878       // not clear the overflow flag since we rely on it being true when
 879       // we exit this method to abort the pause and restart concurent
 880       // marking.
 881       reset_marking_state(true /* clear_overflow */);
 882       force_overflow()->update();
 883 
 884       if (G1Log::fine()) {
 885         gclog_or_tty->date_stamp(PrintGCDateStamps);
 886         gclog_or_tty->stamp(PrintGCTimeStamps);
 887         gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 888       }
 889     }
 890   }
 891 
 892   // after this, each task should reset its own data structures then
 893   // then go into the second barrier
 894 }
 895 
 896 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 897   if (verbose_low()) {
 898     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 899   }
 900 
 901   if (concurrent()) {
 902     ConcurrentGCThread::stsLeave();
 903   }
 904   _second_overflow_barrier_sync.enter();
 905   if (concurrent()) {
 906     ConcurrentGCThread::stsJoin();
 907   }
 908   // at this point everything should be re-initialized and ready to go
 909 
 910   if (verbose_low()) {
 911     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 912   }
 913 }
 914 
 915 #ifndef PRODUCT
 916 void ForceOverflowSettings::init() {
 917   _num_remaining = G1ConcMarkForceOverflow;
 918   _force = false;
 919   update();
 920 }
 921 
 922 void ForceOverflowSettings::update() {
 923   if (_num_remaining > 0) {
 924     _num_remaining -= 1;
 925     _force = true;
 926   } else {
 927     _force = false;
 928   }
 929 }
 930 
 931 bool ForceOverflowSettings::should_force() {
 932   if (_force) {
 933     _force = false;
 934     return true;
 935   } else {
 936     return false;
 937   }
 938 }
 939 #endif // !PRODUCT
 940 
 941 class CMConcurrentMarkingTask: public AbstractGangTask {
 942 private:
 943   ConcurrentMark*       _cm;
 944   ConcurrentMarkThread* _cmt;
 945 
 946 public:
 947   void work(uint worker_id) {
 948     assert(Thread::current()->is_ConcurrentGC_thread(),
 949            "this should only be done by a conc GC thread");
 950     ResourceMark rm;
 951 
 952     double start_vtime = os::elapsedVTime();
 953 
 954     ConcurrentGCThread::stsJoin();
 955 
 956     assert(worker_id < _cm->active_tasks(), "invariant");
 957     CMTask* the_task = _cm->task(worker_id);
 958     the_task->record_start_time();
 959     if (!_cm->has_aborted()) {
 960       do {
 961         double start_vtime_sec = os::elapsedVTime();
 962         double start_time_sec = os::elapsedTime();
 963         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
 964 
 965         the_task->do_marking_step(mark_step_duration_ms,
 966                                   true  /* do_termination */,
 967                                   false /* is_serial*/);
 968 
 969         double end_time_sec = os::elapsedTime();
 970         double end_vtime_sec = os::elapsedVTime();
 971         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
 972         double elapsed_time_sec = end_time_sec - start_time_sec;
 973         _cm->clear_has_overflown();
 974 
 975         bool ret = _cm->do_yield_check(worker_id);
 976 
 977         jlong sleep_time_ms;
 978         if (!_cm->has_aborted() && the_task->has_aborted()) {
 979           sleep_time_ms =
 980             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
 981           ConcurrentGCThread::stsLeave();
 982           os::sleep(Thread::current(), sleep_time_ms, false);
 983           ConcurrentGCThread::stsJoin();
 984         }
 985         double end_time2_sec = os::elapsedTime();
 986         double elapsed_time2_sec = end_time2_sec - start_time_sec;
 987 
 988 #if 0
 989           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
 990                                  "overhead %1.4lf",
 991                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
 992                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
 993           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
 994                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
 995 #endif
 996       } while (!_cm->has_aborted() && the_task->has_aborted());
 997     }
 998     the_task->record_end_time();
 999     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1000 
1001     ConcurrentGCThread::stsLeave();
1002 
1003     double end_vtime = os::elapsedVTime();
1004     _cm->update_accum_task_vtime(worker_id, end_vtime - start_vtime);
1005   }
1006 
1007   CMConcurrentMarkingTask(ConcurrentMark* cm,
1008                           ConcurrentMarkThread* cmt) :
1009       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1010 
1011   ~CMConcurrentMarkingTask() { }
1012 };
1013 
1014 // Calculates the number of active workers for a concurrent
1015 // phase.
1016 uint ConcurrentMark::calc_parallel_marking_threads() {
1017   if (G1CollectedHeap::use_parallel_gc_threads()) {
1018     uint n_conc_workers = 0;
1019     if (!UseDynamicNumberOfGCThreads ||
1020         (!FLAG_IS_DEFAULT(ConcGCThreads) &&
1021          !ForceDynamicNumberOfGCThreads)) {
1022       n_conc_workers = max_parallel_marking_threads();
1023     } else {
1024       n_conc_workers =
1025         AdaptiveSizePolicy::calc_default_active_workers(
1026                                      max_parallel_marking_threads(),
1027                                      1, /* Minimum workers */
1028                                      parallel_marking_threads(),
1029                                      Threads::number_of_non_daemon_threads());
1030       // Don't scale down "n_conc_workers" by scale_parallel_threads() because
1031       // that scaling has already gone into "_max_parallel_marking_threads".
1032     }
1033     assert(n_conc_workers > 0, "Always need at least 1");
1034     return n_conc_workers;
1035   }
1036   // If we are not running with any parallel GC threads we will not
1037   // have spawned any marking threads either. Hence the number of
1038   // concurrent workers should be 0.
1039   return 0;
1040 }
1041 
1042 void ConcurrentMark::scanRootRegion(HeapRegion* hr, uint worker_id) {
1043   // Currently, only survivors can be root regions.
1044   assert(hr->next_top_at_mark_start() == hr->bottom(), "invariant");
1045   G1RootRegionScanClosure cl(_g1h, this, worker_id);
1046 
1047   const uintx interval = PrefetchScanIntervalInBytes;
1048   HeapWord* curr = hr->bottom();
1049   const HeapWord* end = hr->top();
1050   while (curr < end) {
1051     Prefetch::read(curr, interval);
1052     oop obj = oop(curr);
1053     int size = obj->oop_iterate(&cl);
1054     assert(size == obj->size(), "sanity");
1055     curr += size;
1056   }
1057 }
1058 
1059 class CMRootRegionScanTask : public AbstractGangTask {
1060 private:
1061   ConcurrentMark* _cm;
1062 
1063 public:
1064   CMRootRegionScanTask(ConcurrentMark* cm) :
1065     AbstractGangTask("Root Region Scan"), _cm(cm) { }
1066 
1067   void work(uint worker_id) {
1068     assert(Thread::current()->is_ConcurrentGC_thread(),
1069            "this should only be done by a conc GC thread");
1070 
1071     CMRootRegions* root_regions = _cm->root_regions();
1072     HeapRegion* hr = root_regions->claim_next();
1073     while (hr != NULL) {
1074       _cm->scanRootRegion(hr, worker_id);
1075       hr = root_regions->claim_next();
1076     }
1077   }
1078 };
1079 
1080 void ConcurrentMark::scanRootRegions() {
1081   // scan_in_progress() will have been set to true only if there was
1082   // at least one root region to scan. So, if it's false, we
1083   // should not attempt to do any further work.
1084   if (root_regions()->scan_in_progress()) {
1085     _parallel_marking_threads = calc_parallel_marking_threads();
1086     assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1087            "Maximum number of marking threads exceeded");
1088     uint active_workers = MAX2(1U, parallel_marking_threads());
1089 
1090     CMRootRegionScanTask task(this);
1091     if (use_parallel_marking_threads()) {
1092       _parallel_workers->set_active_workers((int) active_workers);
1093       _parallel_workers->run_task(&task);
1094     } else {
1095       task.work(0);
1096     }
1097 
1098     // It's possible that has_aborted() is true here without actually
1099     // aborting the survivor scan earlier. This is OK as it's
1100     // mainly used for sanity checking.
1101     root_regions()->scan_finished();
1102   }
1103 }
1104 
1105 void ConcurrentMark::markFromRoots() {
1106   // we might be tempted to assert that:
1107   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1108   //        "inconsistent argument?");
1109   // However that wouldn't be right, because it's possible that
1110   // a safepoint is indeed in progress as a younger generation
1111   // stop-the-world GC happens even as we mark in this generation.
1112 
1113   _restart_for_overflow = false;
1114   force_overflow_conc()->init();
1115 
1116   // _g1h has _n_par_threads
1117   _parallel_marking_threads = calc_parallel_marking_threads();
1118   assert(parallel_marking_threads() <= max_parallel_marking_threads(),
1119     "Maximum number of marking threads exceeded");
1120 
1121   uint active_workers = MAX2(1U, parallel_marking_threads());
1122 
1123   // Parallel task terminator is set in "set_concurrency_and_phase()"
1124   set_concurrency_and_phase(active_workers, true /* concurrent */);
1125 
1126   CMConcurrentMarkingTask markingTask(this, cmThread());
1127   if (use_parallel_marking_threads()) {
1128     _parallel_workers->set_active_workers((int)active_workers);
1129     // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
1130     // and the decisions on that MT processing is made elsewhere.
1131     assert(_parallel_workers->active_workers() > 0, "Should have been set");
1132     _parallel_workers->run_task(&markingTask);
1133   } else {
1134     markingTask.work(0);
1135   }
1136   print_stats();
1137 }
1138 
1139 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1140   // world is stopped at this checkpoint
1141   assert(SafepointSynchronize::is_at_safepoint(),
1142          "world should be stopped");
1143 
1144   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1145 
1146   // If a full collection has happened, we shouldn't do this.
1147   if (has_aborted()) {
1148     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1149     return;
1150   }
1151 
1152   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1153 
1154   if (VerifyDuringGC) {
1155     HandleMark hm;  // handle scope
1156     gclog_or_tty->print(" VerifyDuringGC:(before)");
1157     Universe::heap()->prepare_for_verify();
1158     Universe::verify(/* silent */ false,
1159                      /* option */ VerifyOption_G1UsePrevMarking);
1160   }
1161 
1162   G1CollectorPolicy* g1p = g1h->g1_policy();
1163   g1p->record_concurrent_mark_remark_start();
1164 
1165   double start = os::elapsedTime();
1166 
1167   checkpointRootsFinalWork();
1168 
1169   double mark_work_end = os::elapsedTime();
1170 
1171   weakRefsWork(clear_all_soft_refs);
1172 
1173   if (has_overflown()) {
1174     // Oops.  We overflowed.  Restart concurrent marking.
1175     _restart_for_overflow = true;
1176     if (G1TraceMarkStackOverflow) {
1177       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1178     }
1179 
1180     // Verify the heap w.r.t. the previous marking bitmap.
1181     if (VerifyDuringGC) {
1182       HandleMark hm;  // handle scope
1183       gclog_or_tty->print(" VerifyDuringGC:(overflow)");
1184       Universe::heap()->prepare_for_verify();
1185       Universe::verify(/* silent */ false,
1186                        /* option */ VerifyOption_G1UsePrevMarking);
1187     }
1188 
1189     // Clear the marking state because we will be restarting
1190     // marking due to overflowing the global mark stack.
1191     reset_marking_state();
1192   } else {
1193     // Aggregate the per-task counting data that we have accumulated
1194     // while marking.
1195     aggregate_count_data();
1196 
1197     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1198     // We're done with marking.
1199     // This is the end of  the marking cycle, we're expected all
1200     // threads to have SATB queues with active set to true.
1201     satb_mq_set.set_active_all_threads(false, /* new active value */
1202                                        true /* expected_active */);
1203 
1204     if (VerifyDuringGC) {
1205       HandleMark hm;  // handle scope
1206       gclog_or_tty->print(" VerifyDuringGC:(after)");
1207       Universe::heap()->prepare_for_verify();
1208       Universe::verify(/* silent */ false,
1209                        /* option */ VerifyOption_G1UseNextMarking);
1210     }
1211     assert(!restart_for_overflow(), "sanity");
1212     // Completely reset the marking state since marking completed
1213     set_non_marking_state();
1214   }
1215 
1216 #if VERIFY_OBJS_PROCESSED
1217   _scan_obj_cl.objs_processed = 0;
1218   ThreadLocalObjQueue::objs_enqueued = 0;
1219 #endif
1220 
1221   // Statistics
1222   double now = os::elapsedTime();
1223   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1224   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1225   _remark_times.add((now - start) * 1000.0);
1226 
1227   g1p->record_concurrent_mark_remark_end();
1228 
1229   G1CMIsAliveClosure is_alive(g1h);
1230   g1h->gc_tracer_cm()->report_object_count_after_gc(&is_alive);
1231 }
1232 
1233 // Base class of the closures that finalize and verify the
1234 // liveness counting data.
1235 class CMCountDataClosureBase: public HeapRegionClosure {
1236 protected:
1237   G1CollectedHeap* _g1h;
1238   ConcurrentMark* _cm;
1239   CardTableModRefBS* _ct_bs;
1240 
1241   BitMap* _region_bm;
1242   BitMap* _card_bm;
1243 
1244   // Takes a region that's not empty (i.e., it has at least one
1245   // live object in it and sets its corresponding bit on the region
1246   // bitmap to 1. If the region is "starts humongous" it will also set
1247   // to 1 the bits on the region bitmap that correspond to its
1248   // associated "continues humongous" regions.
1249   void set_bit_for_region(HeapRegion* hr) {
1250     assert(!hr->continuesHumongous(), "should have filtered those out");
1251 
1252     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1253     if (!hr->startsHumongous()) {
1254       // Normal (non-humongous) case: just set the bit.
1255       _region_bm->par_at_put(index, true);
1256     } else {
1257       // Starts humongous case: calculate how many regions are part of
1258       // this humongous region and then set the bit range.
1259       BitMap::idx_t end_index = (BitMap::idx_t) hr->last_hc_index();
1260       _region_bm->par_at_put_range(index, end_index, true);
1261     }
1262   }
1263 
1264 public:
1265   CMCountDataClosureBase(G1CollectedHeap* g1h,
1266                          BitMap* region_bm, BitMap* card_bm):
1267     _g1h(g1h), _cm(g1h->concurrent_mark()),
1268     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
1269     _region_bm(region_bm), _card_bm(card_bm) { }
1270 };
1271 
1272 // Closure that calculates the # live objects per region. Used
1273 // for verification purposes during the cleanup pause.
1274 class CalcLiveObjectsClosure: public CMCountDataClosureBase {
1275   CMBitMapRO* _bm;
1276   size_t _region_marked_bytes;
1277 
1278 public:
1279   CalcLiveObjectsClosure(CMBitMapRO *bm, G1CollectedHeap* g1h,
1280                          BitMap* region_bm, BitMap* card_bm) :
1281     CMCountDataClosureBase(g1h, region_bm, card_bm),
1282     _bm(bm), _region_marked_bytes(0) { }
1283 
1284   bool doHeapRegion(HeapRegion* hr) {
1285 
1286     if (hr->continuesHumongous()) {
1287       // We will ignore these here and process them when their
1288       // associated "starts humongous" region is processed (see
1289       // set_bit_for_heap_region()). Note that we cannot rely on their
1290       // associated "starts humongous" region to have their bit set to
1291       // 1 since, due to the region chunking in the parallel region
1292       // iteration, a "continues humongous" region might be visited
1293       // before its associated "starts humongous".
1294       return false;
1295     }
1296 
1297     HeapWord* ntams = hr->next_top_at_mark_start();
1298     HeapWord* start = hr->bottom();
1299 
1300     assert(start <= hr->end() && start <= ntams && ntams <= hr->end(),
1301            err_msg("Preconditions not met - "
1302                    "start: "PTR_FORMAT", ntams: "PTR_FORMAT", end: "PTR_FORMAT,
1303                    start, ntams, hr->end()));
1304 
1305     // Find the first marked object at or after "start".
1306     start = _bm->getNextMarkedWordAddress(start, ntams);
1307 
1308     size_t marked_bytes = 0;
1309 
1310     while (start < ntams) {
1311       oop obj = oop(start);
1312       int obj_sz = obj->size();
1313       HeapWord* obj_end = start + obj_sz;
1314 
1315       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
1316       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(obj_end);
1317 
1318       // Note: if we're looking at the last region in heap - obj_end
1319       // could be actually just beyond the end of the heap; end_idx
1320       // will then correspond to a (non-existent) card that is also
1321       // just beyond the heap.
1322       if (_g1h->is_in_g1_reserved(obj_end) && !_ct_bs->is_card_aligned(obj_end)) {
1323         // end of object is not card aligned - increment to cover
1324         // all the cards spanned by the object
1325         end_idx += 1;
1326       }
1327 
1328       // Set the bits in the card BM for the cards spanned by this object.
1329       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1330 
1331       // Add the size of this object to the number of marked bytes.
1332       marked_bytes += (size_t)obj_sz * HeapWordSize;
1333 
1334       // Find the next marked object after this one.
1335       start = _bm->getNextMarkedWordAddress(obj_end, ntams);
1336     }
1337 
1338     // Mark the allocated-since-marking portion...
1339     HeapWord* top = hr->top();
1340     if (ntams < top) {
1341       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1342       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1343 
1344       // Note: if we're looking at the last region in heap - top
1345       // could be actually just beyond the end of the heap; end_idx
1346       // will then correspond to a (non-existent) card that is also
1347       // just beyond the heap.
1348       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1349         // end of object is not card aligned - increment to cover
1350         // all the cards spanned by the object
1351         end_idx += 1;
1352       }
1353       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1354 
1355       // This definitely means the region has live objects.
1356       set_bit_for_region(hr);
1357     }
1358 
1359     // Update the live region bitmap.
1360     if (marked_bytes > 0) {
1361       set_bit_for_region(hr);
1362     }
1363 
1364     // Set the marked bytes for the current region so that
1365     // it can be queried by a calling verificiation routine
1366     _region_marked_bytes = marked_bytes;
1367 
1368     return false;
1369   }
1370 
1371   size_t region_marked_bytes() const { return _region_marked_bytes; }
1372 };
1373 
1374 // Heap region closure used for verifying the counting data
1375 // that was accumulated concurrently and aggregated during
1376 // the remark pause. This closure is applied to the heap
1377 // regions during the STW cleanup pause.
1378 
1379 class VerifyLiveObjectDataHRClosure: public HeapRegionClosure {
1380   G1CollectedHeap* _g1h;
1381   ConcurrentMark* _cm;
1382   CalcLiveObjectsClosure _calc_cl;
1383   BitMap* _region_bm;   // Region BM to be verified
1384   BitMap* _card_bm;     // Card BM to be verified
1385   bool _verbose;        // verbose output?
1386 
1387   BitMap* _exp_region_bm; // Expected Region BM values
1388   BitMap* _exp_card_bm;   // Expected card BM values
1389 
1390   int _failures;
1391 
1392 public:
1393   VerifyLiveObjectDataHRClosure(G1CollectedHeap* g1h,
1394                                 BitMap* region_bm,
1395                                 BitMap* card_bm,
1396                                 BitMap* exp_region_bm,
1397                                 BitMap* exp_card_bm,
1398                                 bool verbose) :
1399     _g1h(g1h), _cm(g1h->concurrent_mark()),
1400     _calc_cl(_cm->nextMarkBitMap(), g1h, exp_region_bm, exp_card_bm),
1401     _region_bm(region_bm), _card_bm(card_bm), _verbose(verbose),
1402     _exp_region_bm(exp_region_bm), _exp_card_bm(exp_card_bm),
1403     _failures(0) { }
1404 
1405   int failures() const { return _failures; }
1406 
1407   bool doHeapRegion(HeapRegion* hr) {
1408     if (hr->continuesHumongous()) {
1409       // We will ignore these here and process them when their
1410       // associated "starts humongous" region is processed (see
1411       // set_bit_for_heap_region()). Note that we cannot rely on their
1412       // associated "starts humongous" region to have their bit set to
1413       // 1 since, due to the region chunking in the parallel region
1414       // iteration, a "continues humongous" region might be visited
1415       // before its associated "starts humongous".
1416       return false;
1417     }
1418 
1419     int failures = 0;
1420 
1421     // Call the CalcLiveObjectsClosure to walk the marking bitmap for
1422     // this region and set the corresponding bits in the expected region
1423     // and card bitmaps.
1424     bool res = _calc_cl.doHeapRegion(hr);
1425     assert(res == false, "should be continuing");
1426 
1427     MutexLockerEx x((_verbose ? ParGCRareEvent_lock : NULL),
1428                     Mutex::_no_safepoint_check_flag);
1429 
1430     // Verify the marked bytes for this region.
1431     size_t exp_marked_bytes = _calc_cl.region_marked_bytes();
1432     size_t act_marked_bytes = hr->next_marked_bytes();
1433 
1434     // We're not OK if expected marked bytes > actual marked bytes. It means
1435     // we have missed accounting some objects during the actual marking.
1436     if (exp_marked_bytes > act_marked_bytes) {
1437       if (_verbose) {
1438         gclog_or_tty->print_cr("Region %u: marked bytes mismatch: "
1439                                "expected: " SIZE_FORMAT ", actual: " SIZE_FORMAT,
1440                                hr->hrs_index(), exp_marked_bytes, act_marked_bytes);
1441       }
1442       failures += 1;
1443     }
1444 
1445     // Verify the bit, for this region, in the actual and expected
1446     // (which was just calculated) region bit maps.
1447     // We're not OK if the bit in the calculated expected region
1448     // bitmap is set and the bit in the actual region bitmap is not.
1449     BitMap::idx_t index = (BitMap::idx_t) hr->hrs_index();
1450 
1451     bool expected = _exp_region_bm->at(index);
1452     bool actual = _region_bm->at(index);
1453     if (expected && !actual) {
1454       if (_verbose) {
1455         gclog_or_tty->print_cr("Region %u: region bitmap mismatch: "
1456                                "expected: %s, actual: %s",
1457                                hr->hrs_index(),
1458                                BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1459       }
1460       failures += 1;
1461     }
1462 
1463     // Verify that the card bit maps for the cards spanned by the current
1464     // region match. We have an error if we have a set bit in the expected
1465     // bit map and the corresponding bit in the actual bitmap is not set.
1466 
1467     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(hr->bottom());
1468     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(hr->top());
1469 
1470     for (BitMap::idx_t i = start_idx; i < end_idx; i+=1) {
1471       expected = _exp_card_bm->at(i);
1472       actual = _card_bm->at(i);
1473 
1474       if (expected && !actual) {
1475         if (_verbose) {
1476           gclog_or_tty->print_cr("Region %u: card bitmap mismatch at " SIZE_FORMAT ": "
1477                                  "expected: %s, actual: %s",
1478                                  hr->hrs_index(), i,
1479                                  BOOL_TO_STR(expected), BOOL_TO_STR(actual));
1480         }
1481         failures += 1;
1482       }
1483     }
1484 
1485     if (failures > 0 && _verbose)  {
1486       gclog_or_tty->print_cr("Region " HR_FORMAT ", ntams: " PTR_FORMAT ", "
1487                              "marked_bytes: calc/actual " SIZE_FORMAT "/" SIZE_FORMAT,
1488                              HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start(),
1489                              _calc_cl.region_marked_bytes(), hr->next_marked_bytes());
1490     }
1491 
1492     _failures += failures;
1493 
1494     // We could stop iteration over the heap when we
1495     // find the first violating region by returning true.
1496     return false;
1497   }
1498 };
1499 
1500 
1501 class G1ParVerifyFinalCountTask: public AbstractGangTask {
1502 protected:
1503   G1CollectedHeap* _g1h;
1504   ConcurrentMark* _cm;
1505   BitMap* _actual_region_bm;
1506   BitMap* _actual_card_bm;
1507 
1508   uint    _n_workers;
1509 
1510   BitMap* _expected_region_bm;
1511   BitMap* _expected_card_bm;
1512 
1513   int  _failures;
1514   bool _verbose;
1515 
1516 public:
1517   G1ParVerifyFinalCountTask(G1CollectedHeap* g1h,
1518                             BitMap* region_bm, BitMap* card_bm,
1519                             BitMap* expected_region_bm, BitMap* expected_card_bm)
1520     : AbstractGangTask("G1 verify final counting"),
1521       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1522       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1523       _expected_region_bm(expected_region_bm), _expected_card_bm(expected_card_bm),
1524       _failures(0), _verbose(false),
1525       _n_workers(0) {
1526     assert(VerifyDuringGC, "don't call this otherwise");
1527 
1528     // Use the value already set as the number of active threads
1529     // in the call to run_task().
1530     if (G1CollectedHeap::use_parallel_gc_threads()) {
1531       assert( _g1h->workers()->active_workers() > 0,
1532         "Should have been previously set");
1533       _n_workers = _g1h->workers()->active_workers();
1534     } else {
1535       _n_workers = 1;
1536     }
1537 
1538     assert(_expected_card_bm->size() == _actual_card_bm->size(), "sanity");
1539     assert(_expected_region_bm->size() == _actual_region_bm->size(), "sanity");
1540 
1541     _verbose = _cm->verbose_medium();
1542   }
1543 
1544   void work(uint worker_id) {
1545     assert(worker_id < _n_workers, "invariant");
1546 
1547     VerifyLiveObjectDataHRClosure verify_cl(_g1h,
1548                                             _actual_region_bm, _actual_card_bm,
1549                                             _expected_region_bm,
1550                                             _expected_card_bm,
1551                                             _verbose);
1552 
1553     if (G1CollectedHeap::use_parallel_gc_threads()) {
1554       _g1h->heap_region_par_iterate_chunked(&verify_cl,
1555                                             worker_id,
1556                                             _n_workers,
1557                                             HeapRegion::VerifyCountClaimValue);
1558     } else {
1559       _g1h->heap_region_iterate(&verify_cl);
1560     }
1561 
1562     Atomic::add(verify_cl.failures(), &_failures);
1563   }
1564 
1565   int failures() const { return _failures; }
1566 };
1567 
1568 // Closure that finalizes the liveness counting data.
1569 // Used during the cleanup pause.
1570 // Sets the bits corresponding to the interval [NTAMS, top]
1571 // (which contains the implicitly live objects) in the
1572 // card liveness bitmap. Also sets the bit for each region,
1573 // containing live data, in the region liveness bitmap.
1574 
1575 class FinalCountDataUpdateClosure: public CMCountDataClosureBase {
1576  public:
1577   FinalCountDataUpdateClosure(G1CollectedHeap* g1h,
1578                               BitMap* region_bm,
1579                               BitMap* card_bm) :
1580     CMCountDataClosureBase(g1h, region_bm, card_bm) { }
1581 
1582   bool doHeapRegion(HeapRegion* hr) {
1583 
1584     if (hr->continuesHumongous()) {
1585       // We will ignore these here and process them when their
1586       // associated "starts humongous" region is processed (see
1587       // set_bit_for_heap_region()). Note that we cannot rely on their
1588       // associated "starts humongous" region to have their bit set to
1589       // 1 since, due to the region chunking in the parallel region
1590       // iteration, a "continues humongous" region might be visited
1591       // before its associated "starts humongous".
1592       return false;
1593     }
1594 
1595     HeapWord* ntams = hr->next_top_at_mark_start();
1596     HeapWord* top   = hr->top();
1597 
1598     assert(hr->bottom() <= ntams && ntams <= hr->end(), "Preconditions.");
1599 
1600     // Mark the allocated-since-marking portion...
1601     if (ntams < top) {
1602       // This definitely means the region has live objects.
1603       set_bit_for_region(hr);
1604 
1605       // Now set the bits in the card bitmap for [ntams, top)
1606       BitMap::idx_t start_idx = _cm->card_bitmap_index_for(ntams);
1607       BitMap::idx_t end_idx = _cm->card_bitmap_index_for(top);
1608 
1609       // Note: if we're looking at the last region in heap - top
1610       // could be actually just beyond the end of the heap; end_idx
1611       // will then correspond to a (non-existent) card that is also
1612       // just beyond the heap.
1613       if (_g1h->is_in_g1_reserved(top) && !_ct_bs->is_card_aligned(top)) {
1614         // end of object is not card aligned - increment to cover
1615         // all the cards spanned by the object
1616         end_idx += 1;
1617       }
1618 
1619       assert(end_idx <= _card_bm->size(),
1620              err_msg("oob: end_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1621                      end_idx, _card_bm->size()));
1622       assert(start_idx < _card_bm->size(),
1623              err_msg("oob: start_idx=  "SIZE_FORMAT", bitmap size= "SIZE_FORMAT,
1624                      start_idx, _card_bm->size()));
1625 
1626       _cm->set_card_bitmap_range(_card_bm, start_idx, end_idx, true /* is_par */);
1627      }
1628 
1629     // Set the bit for the region if it contains live data
1630     if (hr->next_marked_bytes() > 0) {
1631       set_bit_for_region(hr);
1632     }
1633 
1634     return false;
1635   }
1636 };
1637 
1638 class G1ParFinalCountTask: public AbstractGangTask {
1639 protected:
1640   G1CollectedHeap* _g1h;
1641   ConcurrentMark* _cm;
1642   BitMap* _actual_region_bm;
1643   BitMap* _actual_card_bm;
1644 
1645   uint    _n_workers;
1646 
1647 public:
1648   G1ParFinalCountTask(G1CollectedHeap* g1h, BitMap* region_bm, BitMap* card_bm)
1649     : AbstractGangTask("G1 final counting"),
1650       _g1h(g1h), _cm(_g1h->concurrent_mark()),
1651       _actual_region_bm(region_bm), _actual_card_bm(card_bm),
1652       _n_workers(0) {
1653     // Use the value already set as the number of active threads
1654     // in the call to run_task().
1655     if (G1CollectedHeap::use_parallel_gc_threads()) {
1656       assert( _g1h->workers()->active_workers() > 0,
1657         "Should have been previously set");
1658       _n_workers = _g1h->workers()->active_workers();
1659     } else {
1660       _n_workers = 1;
1661     }
1662   }
1663 
1664   void work(uint worker_id) {
1665     assert(worker_id < _n_workers, "invariant");
1666 
1667     FinalCountDataUpdateClosure final_update_cl(_g1h,
1668                                                 _actual_region_bm,
1669                                                 _actual_card_bm);
1670 
1671     if (G1CollectedHeap::use_parallel_gc_threads()) {
1672       _g1h->heap_region_par_iterate_chunked(&final_update_cl,
1673                                             worker_id,
1674                                             _n_workers,
1675                                             HeapRegion::FinalCountClaimValue);
1676     } else {
1677       _g1h->heap_region_iterate(&final_update_cl);
1678     }
1679   }
1680 };
1681 
1682 class G1ParNoteEndTask;
1683 
1684 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1685   G1CollectedHeap* _g1;
1686   int _worker_num;
1687   size_t _max_live_bytes;
1688   uint _regions_claimed;
1689   size_t _freed_bytes;
1690   FreeRegionList* _local_cleanup_list;
1691   OldRegionSet* _old_proxy_set;
1692   HumongousRegionSet* _humongous_proxy_set;
1693   HRRSCleanupTask* _hrrs_cleanup_task;
1694   double _claimed_region_time;
1695   double _max_region_time;
1696 
1697 public:
1698   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1699                              int worker_num,
1700                              FreeRegionList* local_cleanup_list,
1701                              OldRegionSet* old_proxy_set,
1702                              HumongousRegionSet* humongous_proxy_set,
1703                              HRRSCleanupTask* hrrs_cleanup_task) :
1704     _g1(g1), _worker_num(worker_num),
1705     _max_live_bytes(0), _regions_claimed(0),
1706     _freed_bytes(0),
1707     _claimed_region_time(0.0), _max_region_time(0.0),
1708     _local_cleanup_list(local_cleanup_list),
1709     _old_proxy_set(old_proxy_set),
1710     _humongous_proxy_set(humongous_proxy_set),
1711     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1712 
1713   size_t freed_bytes() { return _freed_bytes; }
1714 
1715   bool doHeapRegion(HeapRegion *hr) {
1716     if (hr->continuesHumongous()) {
1717       return false;
1718     }
1719     // We use a claim value of zero here because all regions
1720     // were claimed with value 1 in the FinalCount task.
1721     _g1->reset_gc_time_stamps(hr);
1722     double start = os::elapsedTime();
1723     _regions_claimed++;
1724     hr->note_end_of_marking();
1725     _max_live_bytes += hr->max_live_bytes();
1726     _g1->free_region_if_empty(hr,
1727                               &_freed_bytes,
1728                               _local_cleanup_list,
1729                               _old_proxy_set,
1730                               _humongous_proxy_set,
1731                               _hrrs_cleanup_task,
1732                               true /* par */);
1733     double region_time = (os::elapsedTime() - start);
1734     _claimed_region_time += region_time;
1735     if (region_time > _max_region_time) {
1736       _max_region_time = region_time;
1737     }
1738     return false;
1739   }
1740 
1741   size_t max_live_bytes() { return _max_live_bytes; }
1742   uint regions_claimed() { return _regions_claimed; }
1743   double claimed_region_time_sec() { return _claimed_region_time; }
1744   double max_region_time_sec() { return _max_region_time; }
1745 };
1746 
1747 class G1ParNoteEndTask: public AbstractGangTask {
1748   friend class G1NoteEndOfConcMarkClosure;
1749 
1750 protected:
1751   G1CollectedHeap* _g1h;
1752   size_t _max_live_bytes;
1753   size_t _freed_bytes;
1754   FreeRegionList* _cleanup_list;
1755 
1756 public:
1757   G1ParNoteEndTask(G1CollectedHeap* g1h,
1758                    FreeRegionList* cleanup_list) :
1759     AbstractGangTask("G1 note end"), _g1h(g1h),
1760     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1761 
1762   void work(uint worker_id) {
1763     double start = os::elapsedTime();
1764     FreeRegionList local_cleanup_list("Local Cleanup List");
1765     OldRegionSet old_proxy_set("Local Cleanup Old Proxy Set");
1766     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1767     HRRSCleanupTask hrrs_cleanup_task;
1768     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, worker_id, &local_cleanup_list,
1769                                            &old_proxy_set,
1770                                            &humongous_proxy_set,
1771                                            &hrrs_cleanup_task);
1772     if (G1CollectedHeap::use_parallel_gc_threads()) {
1773       _g1h->heap_region_par_iterate_chunked(&g1_note_end, worker_id,
1774                                             _g1h->workers()->active_workers(),
1775                                             HeapRegion::NoteEndClaimValue);
1776     } else {
1777       _g1h->heap_region_iterate(&g1_note_end);
1778     }
1779     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1780 
1781     // Now update the lists
1782     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1783                                             NULL /* free_list */,
1784                                             &old_proxy_set,
1785                                             &humongous_proxy_set,
1786                                             true /* par */);
1787     {
1788       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1789       _max_live_bytes += g1_note_end.max_live_bytes();
1790       _freed_bytes += g1_note_end.freed_bytes();
1791 
1792       // If we iterate over the global cleanup list at the end of
1793       // cleanup to do this printing we will not guarantee to only
1794       // generate output for the newly-reclaimed regions (the list
1795       // might not be empty at the beginning of cleanup; we might
1796       // still be working on its previous contents). So we do the
1797       // printing here, before we append the new regions to the global
1798       // cleanup list.
1799 
1800       G1HRPrinter* hr_printer = _g1h->hr_printer();
1801       if (hr_printer->is_active()) {
1802         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1803         while (iter.more_available()) {
1804           HeapRegion* hr = iter.get_next();
1805           hr_printer->cleanup(hr);
1806         }
1807       }
1808 
1809       _cleanup_list->add_as_tail(&local_cleanup_list);
1810       assert(local_cleanup_list.is_empty(), "post-condition");
1811 
1812       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1813     }
1814   }
1815   size_t max_live_bytes() { return _max_live_bytes; }
1816   size_t freed_bytes() { return _freed_bytes; }
1817 };
1818 
1819 class G1ParScrubRemSetTask: public AbstractGangTask {
1820 protected:
1821   G1RemSet* _g1rs;
1822   BitMap* _region_bm;
1823   BitMap* _card_bm;
1824 public:
1825   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1826                        BitMap* region_bm, BitMap* card_bm) :
1827     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1828     _region_bm(region_bm), _card_bm(card_bm) { }
1829 
1830   void work(uint worker_id) {
1831     if (G1CollectedHeap::use_parallel_gc_threads()) {
1832       _g1rs->scrub_par(_region_bm, _card_bm, worker_id,
1833                        HeapRegion::ScrubRemSetClaimValue);
1834     } else {
1835       _g1rs->scrub(_region_bm, _card_bm);
1836     }
1837   }
1838 
1839 };
1840 
1841 void ConcurrentMark::cleanup() {
1842   // world is stopped at this checkpoint
1843   assert(SafepointSynchronize::is_at_safepoint(),
1844          "world should be stopped");
1845   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1846 
1847   // If a full collection has happened, we shouldn't do this.
1848   if (has_aborted()) {
1849     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1850     return;
1851   }
1852 
1853   HRSPhaseSetter x(HRSPhaseCleanup);
1854   g1h->verify_region_sets_optional();
1855 
1856   if (VerifyDuringGC) {
1857     HandleMark hm;  // handle scope
1858     gclog_or_tty->print(" VerifyDuringGC:(before)");
1859     Universe::heap()->prepare_for_verify();
1860     Universe::verify(/* silent */ false,
1861                      /* option */ VerifyOption_G1UsePrevMarking);
1862   }
1863 
1864   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1865   g1p->record_concurrent_mark_cleanup_start();
1866 
1867   double start = os::elapsedTime();
1868 
1869   HeapRegionRemSet::reset_for_cleanup_tasks();
1870 
1871   uint n_workers;
1872 
1873   // Do counting once more with the world stopped for good measure.
1874   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
1875 
1876   if (G1CollectedHeap::use_parallel_gc_threads()) {
1877    assert(g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
1878            "sanity check");
1879 
1880     g1h->set_par_threads();
1881     n_workers = g1h->n_par_threads();
1882     assert(g1h->n_par_threads() == n_workers,
1883            "Should not have been reset");
1884     g1h->workers()->run_task(&g1_par_count_task);
1885     // Done with the parallel phase so reset to 0.
1886     g1h->set_par_threads(0);
1887 
1888     assert(g1h->check_heap_region_claim_values(HeapRegion::FinalCountClaimValue),
1889            "sanity check");
1890   } else {
1891     n_workers = 1;
1892     g1_par_count_task.work(0);
1893   }
1894 
1895   if (VerifyDuringGC) {
1896     // Verify that the counting data accumulated during marking matches
1897     // that calculated by walking the marking bitmap.
1898 
1899     // Bitmaps to hold expected values
1900     BitMap expected_region_bm(_region_bm.size(), false);
1901     BitMap expected_card_bm(_card_bm.size(), false);
1902 
1903     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
1904                                                  &_region_bm,
1905                                                  &_card_bm,
1906                                                  &expected_region_bm,
1907                                                  &expected_card_bm);
1908 
1909     if (G1CollectedHeap::use_parallel_gc_threads()) {
1910       g1h->set_par_threads((int)n_workers);
1911       g1h->workers()->run_task(&g1_par_verify_task);
1912       // Done with the parallel phase so reset to 0.
1913       g1h->set_par_threads(0);
1914 
1915       assert(g1h->check_heap_region_claim_values(HeapRegion::VerifyCountClaimValue),
1916              "sanity check");
1917     } else {
1918       g1_par_verify_task.work(0);
1919     }
1920 
1921     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
1922   }
1923 
1924   size_t start_used_bytes = g1h->used();
1925   g1h->set_marking_complete();
1926 
1927   double count_end = os::elapsedTime();
1928   double this_final_counting_time = (count_end - start);
1929   _total_counting_time += this_final_counting_time;
1930 
1931   if (G1PrintRegionLivenessInfo) {
1932     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1933     _g1h->heap_region_iterate(&cl);
1934   }
1935 
1936   // Install newly created mark bitMap as "prev".
1937   swapMarkBitMaps();
1938 
1939   g1h->reset_gc_time_stamp();
1940 
1941   // Note end of marking in all heap regions.
1942   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1943   if (G1CollectedHeap::use_parallel_gc_threads()) {
1944     g1h->set_par_threads((int)n_workers);
1945     g1h->workers()->run_task(&g1_par_note_end_task);
1946     g1h->set_par_threads(0);
1947 
1948     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1949            "sanity check");
1950   } else {
1951     g1_par_note_end_task.work(0);
1952   }
1953   g1h->check_gc_time_stamps();
1954 
1955   if (!cleanup_list_is_empty()) {
1956     // The cleanup list is not empty, so we'll have to process it
1957     // concurrently. Notify anyone else that might be wanting free
1958     // regions that there will be more free regions coming soon.
1959     g1h->set_free_regions_coming();
1960   }
1961 
1962   // call below, since it affects the metric by which we sort the heap
1963   // regions.
1964   if (G1ScrubRemSets) {
1965     double rs_scrub_start = os::elapsedTime();
1966     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1967     if (G1CollectedHeap::use_parallel_gc_threads()) {
1968       g1h->set_par_threads((int)n_workers);
1969       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1970       g1h->set_par_threads(0);
1971 
1972       assert(g1h->check_heap_region_claim_values(
1973                                             HeapRegion::ScrubRemSetClaimValue),
1974              "sanity check");
1975     } else {
1976       g1_par_scrub_rs_task.work(0);
1977     }
1978 
1979     double rs_scrub_end = os::elapsedTime();
1980     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1981     _total_rs_scrub_time += this_rs_scrub_time;
1982   }
1983 
1984   // this will also free any regions totally full of garbage objects,
1985   // and sort the regions.
1986   g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
1987 
1988   // Statistics.
1989   double end = os::elapsedTime();
1990   _cleanup_times.add((end - start) * 1000.0);
1991 
1992   if (G1Log::fine()) {
1993     g1h->print_size_transition(gclog_or_tty,
1994                                start_used_bytes,
1995                                g1h->used(),
1996                                g1h->capacity());
1997   }
1998 
1999   // Clean up will have freed any regions completely full of garbage.
2000   // Update the soft reference policy with the new heap occupancy.
2001   Universe::update_heap_info_at_gc();
2002 
2003   // We need to make this be a "collection" so any collection pause that
2004   // races with it goes around and waits for completeCleanup to finish.
2005   g1h->increment_total_collections();
2006 
2007   // We reclaimed old regions so we should calculate the sizes to make
2008   // sure we update the old gen/space data.
2009   g1h->g1mm()->update_sizes();
2010 
2011   if (VerifyDuringGC) {
2012     HandleMark hm;  // handle scope
2013     gclog_or_tty->print(" VerifyDuringGC:(after)");
2014     Universe::heap()->prepare_for_verify();
2015     Universe::verify(/* silent */ false,
2016                      /* option */ VerifyOption_G1UsePrevMarking);
2017   }
2018 
2019   g1h->verify_region_sets_optional();
2020   g1h->trace_heap_after_concurrent_cycle();
2021 }
2022 
2023 void ConcurrentMark::completeCleanup() {
2024   if (has_aborted()) return;
2025 
2026   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2027 
2028   _cleanup_list.verify_optional();
2029   FreeRegionList tmp_free_list("Tmp Free List");
2030 
2031   if (G1ConcRegionFreeingVerbose) {
2032     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2033                            "cleanup list has %u entries",
2034                            _cleanup_list.length());
2035   }
2036 
2037   // Noone else should be accessing the _cleanup_list at this point,
2038   // so it's not necessary to take any locks
2039   while (!_cleanup_list.is_empty()) {
2040     HeapRegion* hr = _cleanup_list.remove_head();
2041     assert(hr != NULL, "the list was not empty");
2042     hr->par_clear();
2043     tmp_free_list.add_as_tail(hr);
2044 
2045     // Instead of adding one region at a time to the secondary_free_list,
2046     // we accumulate them in the local list and move them a few at a
2047     // time. This also cuts down on the number of notify_all() calls
2048     // we do during this process. We'll also append the local list when
2049     // _cleanup_list is empty (which means we just removed the last
2050     // region from the _cleanup_list).
2051     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
2052         _cleanup_list.is_empty()) {
2053       if (G1ConcRegionFreeingVerbose) {
2054         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
2055                                "appending %u entries to the secondary_free_list, "
2056                                "cleanup list still has %u entries",
2057                                tmp_free_list.length(),
2058                                _cleanup_list.length());
2059       }
2060 
2061       {
2062         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
2063         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
2064         SecondaryFreeList_lock->notify_all();
2065       }
2066 
2067       if (G1StressConcRegionFreeing) {
2068         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
2069           os::sleep(Thread::current(), (jlong) 1, false);
2070         }
2071       }
2072     }
2073   }
2074   assert(tmp_free_list.is_empty(), "post-condition");
2075 }
2076 
2077 // Supporting Object and Oop closures for reference discovery
2078 // and processing in during marking
2079 
2080 bool G1CMIsAliveClosure::do_object_b(oop obj) {
2081   HeapWord* addr = (HeapWord*)obj;
2082   return addr != NULL &&
2083          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
2084 }
2085 
2086 // 'Keep Alive' oop closure used by both serial parallel reference processing.
2087 // Uses the CMTask associated with a worker thread (for serial reference
2088 // processing the CMTask for worker 0 is used) to preserve (mark) and
2089 // trace referent objects.
2090 //
2091 // Using the CMTask and embedded local queues avoids having the worker
2092 // threads operating on the global mark stack. This reduces the risk
2093 // of overflowing the stack - which we would rather avoid at this late
2094 // state. Also using the tasks' local queues removes the potential
2095 // of the workers interfering with each other that could occur if
2096 // operating on the global stack.
2097 
2098 class G1CMKeepAliveAndDrainClosure: public OopClosure {
2099   ConcurrentMark* _cm;
2100   CMTask*         _task;
2101   int             _ref_counter_limit;
2102   int             _ref_counter;
2103   bool            _is_serial;
2104  public:
2105   G1CMKeepAliveAndDrainClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2106     _cm(cm), _task(task), _is_serial(is_serial),
2107     _ref_counter_limit(G1RefProcDrainInterval) {
2108     assert(_ref_counter_limit > 0, "sanity");
2109     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
2110     _ref_counter = _ref_counter_limit;
2111   }
2112 
2113   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2114   virtual void do_oop(      oop* p) { do_oop_work(p); }
2115 
2116   template <class T> void do_oop_work(T* p) {
2117     if (!_cm->has_overflown()) {
2118       oop obj = oopDesc::load_decode_heap_oop(p);
2119       if (_cm->verbose_high()) {
2120         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2121                                "*"PTR_FORMAT" = "PTR_FORMAT,
2122                                _task->task_id(), p, (void*) obj);
2123       }
2124 
2125       _task->deal_with_reference(obj);
2126       _ref_counter--;
2127 
2128       if (_ref_counter == 0) {
2129         // We have dealt with _ref_counter_limit references, pushing them
2130         // and objects reachable from them on to the local stack (and
2131         // possibly the global stack). Call CMTask::do_marking_step() to
2132         // process these entries.
2133         //
2134         // We call CMTask::do_marking_step() in a loop, which we'll exit if
2135         // there's nothing more to do (i.e. we're done with the entries that
2136         // were pushed as a result of the CMTask::deal_with_reference() calls
2137         // above) or we overflow.
2138         //
2139         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2140         // flag while there may still be some work to do. (See the comment at
2141         // the beginning of CMTask::do_marking_step() for those conditions -
2142         // one of which is reaching the specified time target.) It is only
2143         // when CMTask::do_marking_step() returns without setting the
2144         // has_aborted() flag that the marking step has completed.
2145         do {
2146           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2147           _task->do_marking_step(mark_step_duration_ms,
2148                                  false      /* do_termination */,
2149                                  _is_serial);
2150         } while (_task->has_aborted() && !_cm->has_overflown());
2151         _ref_counter = _ref_counter_limit;
2152       }
2153     } else {
2154       if (_cm->verbose_high()) {
2155          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2156       }
2157     }
2158   }
2159 };
2160 
2161 // 'Drain' oop closure used by both serial and parallel reference processing.
2162 // Uses the CMTask associated with a given worker thread (for serial
2163 // reference processing the CMtask for worker 0 is used). Calls the
2164 // do_marking_step routine, with an unbelievably large timeout value,
2165 // to drain the marking data structures of the remaining entries
2166 // added by the 'keep alive' oop closure above.
2167 
2168 class G1CMDrainMarkingStackClosure: public VoidClosure {
2169   ConcurrentMark* _cm;
2170   CMTask*         _task;
2171   bool            _is_serial;
2172  public:
2173   G1CMDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task, bool is_serial) :
2174     _cm(cm), _task(task), _is_serial(is_serial) {
2175     assert(!_is_serial || _task->task_id() == 0, "only task 0 for serial code");
2176   }
2177 
2178   void do_void() {
2179     do {
2180       if (_cm->verbose_high()) {
2181         gclog_or_tty->print_cr("\t[%d] Drain: Calling do_marking_step - serial: %s",
2182                                _task->task_id(), BOOL_TO_STR(_is_serial));
2183       }
2184 
2185       // We call CMTask::do_marking_step() to completely drain the local
2186       // and global marking stacks of entries pushed by the 'keep alive'
2187       // oop closure (an instance of G1CMKeepAliveAndDrainClosure above).
2188       //
2189       // CMTask::do_marking_step() is called in a loop, which we'll exit
2190       // if there's nothing more to do (i.e. we'completely drained the
2191       // entries that were pushed as a a result of applying the 'keep alive'
2192       // closure to the entries on the discovered ref lists) or we overflow
2193       // the global marking stack.
2194       //
2195       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted()
2196       // flag while there may still be some work to do. (See the comment at
2197       // the beginning of CMTask::do_marking_step() for those conditions -
2198       // one of which is reaching the specified time target.) It is only
2199       // when CMTask::do_marking_step() returns without setting the
2200       // has_aborted() flag that the marking step has completed.
2201 
2202       _task->do_marking_step(1000000000.0 /* something very large */,
2203                              true         /* do_termination */,
2204                              _is_serial);
2205     } while (_task->has_aborted() && !_cm->has_overflown());
2206   }
2207 };
2208 
2209 // Implementation of AbstractRefProcTaskExecutor for parallel
2210 // reference processing at the end of G1 concurrent marking
2211 
2212 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2213 private:
2214   G1CollectedHeap* _g1h;
2215   ConcurrentMark*  _cm;
2216   WorkGang*        _workers;
2217   int              _active_workers;
2218 
2219 public:
2220   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2221                         ConcurrentMark* cm,
2222                         WorkGang* workers,
2223                         int n_workers) :
2224     _g1h(g1h), _cm(cm),
2225     _workers(workers), _active_workers(n_workers) { }
2226 
2227   // Executes the given task using concurrent marking worker threads.
2228   virtual void execute(ProcessTask& task);
2229   virtual void execute(EnqueueTask& task);
2230 };
2231 
2232 class G1CMRefProcTaskProxy: public AbstractGangTask {
2233   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2234   ProcessTask&     _proc_task;
2235   G1CollectedHeap* _g1h;
2236   ConcurrentMark*  _cm;
2237 
2238 public:
2239   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2240                      G1CollectedHeap* g1h,
2241                      ConcurrentMark* cm) :
2242     AbstractGangTask("Process reference objects in parallel"),
2243     _proc_task(proc_task), _g1h(g1h), _cm(cm) {
2244     ReferenceProcessor* rp = _g1h->ref_processor_cm();
2245     assert(rp->processing_is_mt(), "shouldn't be here otherwise");
2246   }
2247 
2248   virtual void work(uint worker_id) {
2249     CMTask* task = _cm->task(worker_id);
2250     G1CMIsAliveClosure g1_is_alive(_g1h);
2251     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
2252     G1CMDrainMarkingStackClosure g1_par_drain(_cm, task, false /* is_serial */);
2253 
2254     _proc_task.work(worker_id, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2255   }
2256 };
2257 
2258 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2259   assert(_workers != NULL, "Need parallel worker threads.");
2260   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2261 
2262   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm);
2263 
2264   // We need to reset the concurrency level before each
2265   // proxy task execution, so that the termination protocol
2266   // and overflow handling in CMTask::do_marking_step() knows
2267   // how many workers to wait for.
2268   _cm->set_concurrency(_active_workers);
2269   _g1h->set_par_threads(_active_workers);
2270   _workers->run_task(&proc_task_proxy);
2271   _g1h->set_par_threads(0);
2272 }
2273 
2274 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2275   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2276   EnqueueTask& _enq_task;
2277 
2278 public:
2279   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2280     AbstractGangTask("Enqueue reference objects in parallel"),
2281     _enq_task(enq_task) { }
2282 
2283   virtual void work(uint worker_id) {
2284     _enq_task.work(worker_id);
2285   }
2286 };
2287 
2288 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2289   assert(_workers != NULL, "Need parallel worker threads.");
2290   assert(_g1h->ref_processor_cm()->processing_is_mt(), "processing is not MT");
2291 
2292   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2293 
2294   // Not strictly necessary but...
2295   //
2296   // We need to reset the concurrency level before each
2297   // proxy task execution, so that the termination protocol
2298   // and overflow handling in CMTask::do_marking_step() knows
2299   // how many workers to wait for.
2300   _cm->set_concurrency(_active_workers);
2301   _g1h->set_par_threads(_active_workers);
2302   _workers->run_task(&enq_task_proxy);
2303   _g1h->set_par_threads(0);
2304 }
2305 
2306 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2307   if (has_overflown()) {
2308     // Skip processing the discovered references if we have
2309     // overflown the global marking stack. Reference objects
2310     // only get discovered once so it is OK to not
2311     // de-populate the discovered reference lists. We could have,
2312     // but the only benefit would be that, when marking restarts,
2313     // less reference objects are discovered.
2314     return;
2315   }
2316 
2317   ResourceMark rm;
2318   HandleMark   hm;
2319 
2320   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2321 
2322   // Is alive closure.
2323   G1CMIsAliveClosure g1_is_alive(g1h);
2324 
2325   // Inner scope to exclude the cleaning of the string and symbol
2326   // tables from the displayed time.
2327   {
2328     if (G1Log::finer()) {
2329       gclog_or_tty->put(' ');
2330     }
2331     GCTraceTime t("GC ref-proc", G1Log::finer(), false, g1h->gc_timer_cm());
2332 
2333     ReferenceProcessor* rp = g1h->ref_processor_cm();
2334 
2335     // See the comment in G1CollectedHeap::ref_processing_init()
2336     // about how reference processing currently works in G1.
2337 
2338     // Set the soft reference policy
2339     rp->setup_policy(clear_all_soft_refs);
2340     assert(_markStack.isEmpty(), "mark stack should be empty");
2341 
2342     // Instances of the 'Keep Alive' and 'Complete GC' closures used
2343     // in serial reference processing. Note these closures are also
2344     // used for serially processing (by the the current thread) the
2345     // JNI references during parallel reference processing.
2346     //
2347     // These closures do not need to synchronize with the worker
2348     // threads involved in parallel reference processing as these
2349     // instances are executed serially by the current thread (e.g.
2350     // reference processing is not multi-threaded and is thus
2351     // performed by the current thread instead of a gang worker).
2352     //
2353     // The gang tasks involved in parallel reference procssing create
2354     // their own instances of these closures, which do their own
2355     // synchronization among themselves.
2356     G1CMKeepAliveAndDrainClosure g1_keep_alive(this, task(0), true /* is_serial */);
2357     G1CMDrainMarkingStackClosure g1_drain_mark_stack(this, task(0), true /* is_serial */);
2358 
2359     // We need at least one active thread. If reference processing
2360     // is not multi-threaded we use the current (VMThread) thread,
2361     // otherwise we use the work gang from the G1CollectedHeap and
2362     // we utilize all the worker threads we can.
2363     bool processing_is_mt = rp->processing_is_mt() && g1h->workers() != NULL;
2364     uint active_workers = (processing_is_mt ? g1h->workers()->active_workers() : 1U);
2365     active_workers = MAX2(MIN2(active_workers, _max_task_num), 1U);
2366 
2367     // Parallel processing task executor.
2368     G1CMRefProcTaskExecutor par_task_executor(g1h, this,
2369                                               g1h->workers(), active_workers);
2370     AbstractRefProcTaskExecutor* executor = (processing_is_mt ? &par_task_executor : NULL);
2371 
2372     ReferenceProcessorStats stats;
2373 
2374     // Set the concurrency level. The phase was already set prior to
2375     // executing the remark task.
2376     set_concurrency(active_workers);
2377 
2378     // Set the degree of MT processing here.  If the discovery was done MT,
2379     // the number of threads involved during discovery could differ from
2380     // the number of active workers.  This is OK as long as the discovered
2381     // Reference lists are balanced (see balance_all_queues() and balance_queues()).
2382     rp->set_active_mt_degree(active_workers);
2383 
2384     // Process the weak references.
2385     stats = rp->process_discovered_references(&g1_is_alive,
2386                                               &g1_keep_alive,
2387                                               &g1_drain_mark_stack,
2388                                               executor,
2389                                               g1h->gc_timer_cm());
2390 
2391     // The do_oop work routines of the keep_alive and drain_marking_stack
2392     // oop closures will set the has_overflown flag if we overflow the
2393     // global marking stack.
2394 
2395     g1h->gc_tracer_cm()->report_gc_reference_stats(stats);
2396 
2397     assert(_markStack.overflow() || _markStack.isEmpty(),
2398             "mark stack should be empty (unless it overflowed)");
2399 
2400     if (_markStack.overflow()) {
2401       // This should have been done already when we tried to push an
2402       // entry on to the global mark stack. But let's do it again.
2403       set_has_overflown();
2404     }
2405 
2406     assert(rp->num_q() == active_workers, "why not");
2407 
2408     rp->enqueue_discovered_references(executor);
2409 
2410     rp->verify_no_references_recorded();
2411     assert(!rp->discovery_enabled(), "Post condition");
2412   }
2413 
2414   // Now clean up stale oops in StringTable
2415   StringTable::unlink(&g1_is_alive);
2416   // Clean up unreferenced symbols in symbol table.
2417   SymbolTable::unlink();
2418 }
2419 
2420 void ConcurrentMark::swapMarkBitMaps() {
2421   CMBitMapRO* temp = _prevMarkBitMap;
2422   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2423   _nextMarkBitMap  = (CMBitMap*)  temp;
2424 }
2425 
2426 class CMRemarkTask: public AbstractGangTask {
2427 private:
2428   ConcurrentMark* _cm;
2429   bool            _is_serial;
2430 public:
2431   void work(uint worker_id) {
2432     // Since all available tasks are actually started, we should
2433     // only proceed if we're supposed to be actived.
2434     if (worker_id < _cm->active_tasks()) {
2435       CMTask* task = _cm->task(worker_id);
2436       task->record_start_time();
2437       do {
2438         task->do_marking_step(1000000000.0 /* something very large */,
2439                               true         /* do_termination       */,
2440                               _is_serial);
2441       } while (task->has_aborted() && !_cm->has_overflown());
2442       // If we overflow, then we do not want to restart. We instead
2443       // want to abort remark and do concurrent marking again.
2444       task->record_end_time();
2445     }
2446   }
2447 
2448   CMRemarkTask(ConcurrentMark* cm, int active_workers, bool is_serial) :
2449     AbstractGangTask("Par Remark"), _cm(cm), _is_serial(is_serial) {
2450     _cm->terminator()->reset_for_reuse(active_workers);
2451   }
2452 };
2453 
2454 void ConcurrentMark::checkpointRootsFinalWork() {
2455   ResourceMark rm;
2456   HandleMark   hm;
2457   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2458 
2459   g1h->ensure_parsability(false);
2460 
2461   if (G1CollectedHeap::use_parallel_gc_threads()) {
2462     G1CollectedHeap::StrongRootsScope srs(g1h);
2463     // this is remark, so we'll use up all active threads
2464     uint active_workers = g1h->workers()->active_workers();
2465     if (active_workers == 0) {
2466       assert(active_workers > 0, "Should have been set earlier");
2467       active_workers = (uint) ParallelGCThreads;
2468       g1h->workers()->set_active_workers(active_workers);
2469     }
2470     set_concurrency_and_phase(active_workers, false /* concurrent */);
2471     // Leave _parallel_marking_threads at it's
2472     // value originally calculated in the ConcurrentMark
2473     // constructor and pass values of the active workers
2474     // through the gang in the task.
2475 
2476     CMRemarkTask remarkTask(this, active_workers, false /* is_serial */);
2477     // We will start all available threads, even if we decide that the
2478     // active_workers will be fewer. The extra ones will just bail out
2479     // immediately.
2480     g1h->set_par_threads(active_workers);
2481     g1h->workers()->run_task(&remarkTask);
2482     g1h->set_par_threads(0);
2483   } else {
2484     G1CollectedHeap::StrongRootsScope srs(g1h);
2485     uint active_workers = 1;
2486     set_concurrency_and_phase(active_workers, false /* concurrent */);
2487 
2488     // Note - if there's no work gang then the VMThread will be
2489     // the thread to execute the remark - serially. We have
2490     // to pass true for the is_serial parameter so that
2491     // CMTask::do_marking_step() doesn't enter the sync
2492     // barriers in the event of an overflow. Doing so will
2493     // cause an assert that the current thread is not a
2494     // concurrent GC thread.
2495     CMRemarkTask remarkTask(this, active_workers, true /* is_serial*/);
2496     remarkTask.work(0);
2497   }
2498   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2499   guarantee(has_overflown() ||
2500             satb_mq_set.completed_buffers_num() == 0,
2501             err_msg("Invariant: has_overflown = %s, num buffers = %d",
2502                     BOOL_TO_STR(has_overflown()),
2503                     satb_mq_set.completed_buffers_num()));
2504 
2505   print_stats();
2506 
2507 #if VERIFY_OBJS_PROCESSED
2508   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2509     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2510                            _scan_obj_cl.objs_processed,
2511                            ThreadLocalObjQueue::objs_enqueued);
2512     guarantee(_scan_obj_cl.objs_processed ==
2513               ThreadLocalObjQueue::objs_enqueued,
2514               "Different number of objs processed and enqueued.");
2515   }
2516 #endif
2517 }
2518 
2519 #ifndef PRODUCT
2520 
2521 class PrintReachableOopClosure: public OopClosure {
2522 private:
2523   G1CollectedHeap* _g1h;
2524   outputStream*    _out;
2525   VerifyOption     _vo;
2526   bool             _all;
2527 
2528 public:
2529   PrintReachableOopClosure(outputStream* out,
2530                            VerifyOption  vo,
2531                            bool          all) :
2532     _g1h(G1CollectedHeap::heap()),
2533     _out(out), _vo(vo), _all(all) { }
2534 
2535   void do_oop(narrowOop* p) { do_oop_work(p); }
2536   void do_oop(      oop* p) { do_oop_work(p); }
2537 
2538   template <class T> void do_oop_work(T* p) {
2539     oop         obj = oopDesc::load_decode_heap_oop(p);
2540     const char* str = NULL;
2541     const char* str2 = "";
2542 
2543     if (obj == NULL) {
2544       str = "";
2545     } else if (!_g1h->is_in_g1_reserved(obj)) {
2546       str = " O";
2547     } else {
2548       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2549       guarantee(hr != NULL, "invariant");
2550       bool over_tams = _g1h->allocated_since_marking(obj, hr, _vo);
2551       bool marked = _g1h->is_marked(obj, _vo);
2552 
2553       if (over_tams) {
2554         str = " >";
2555         if (marked) {
2556           str2 = " AND MARKED";
2557         }
2558       } else if (marked) {
2559         str = " M";
2560       } else {
2561         str = " NOT";
2562       }
2563     }
2564 
2565     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2566                    p, (void*) obj, str, str2);
2567   }
2568 };
2569 
2570 class PrintReachableObjectClosure : public ObjectClosure {
2571 private:
2572   G1CollectedHeap* _g1h;
2573   outputStream*    _out;
2574   VerifyOption     _vo;
2575   bool             _all;
2576   HeapRegion*      _hr;
2577 
2578 public:
2579   PrintReachableObjectClosure(outputStream* out,
2580                               VerifyOption  vo,
2581                               bool          all,
2582                               HeapRegion*   hr) :
2583     _g1h(G1CollectedHeap::heap()),
2584     _out(out), _vo(vo), _all(all), _hr(hr) { }
2585 
2586   void do_object(oop o) {
2587     bool over_tams = _g1h->allocated_since_marking(o, _hr, _vo);
2588     bool marked = _g1h->is_marked(o, _vo);
2589     bool print_it = _all || over_tams || marked;
2590 
2591     if (print_it) {
2592       _out->print_cr(" "PTR_FORMAT"%s",
2593                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2594       PrintReachableOopClosure oopCl(_out, _vo, _all);
2595       o->oop_iterate(&oopCl);
2596     }
2597   }
2598 };
2599 
2600 class PrintReachableRegionClosure : public HeapRegionClosure {
2601 private:
2602   G1CollectedHeap* _g1h;
2603   outputStream*    _out;
2604   VerifyOption     _vo;
2605   bool             _all;
2606 
2607 public:
2608   bool doHeapRegion(HeapRegion* hr) {
2609     HeapWord* b = hr->bottom();
2610     HeapWord* e = hr->end();
2611     HeapWord* t = hr->top();
2612     HeapWord* p = _g1h->top_at_mark_start(hr, _vo);
2613     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2614                    "TAMS: "PTR_FORMAT, b, e, t, p);
2615     _out->cr();
2616 
2617     HeapWord* from = b;
2618     HeapWord* to   = t;
2619 
2620     if (to > from) {
2621       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2622       _out->cr();
2623       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2624       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2625       _out->cr();
2626     }
2627 
2628     return false;
2629   }
2630 
2631   PrintReachableRegionClosure(outputStream* out,
2632                               VerifyOption  vo,
2633                               bool          all) :
2634     _g1h(G1CollectedHeap::heap()), _out(out), _vo(vo), _all(all) { }
2635 };
2636 
2637 void ConcurrentMark::print_reachable(const char* str,
2638                                      VerifyOption vo,
2639                                      bool all) {
2640   gclog_or_tty->cr();
2641   gclog_or_tty->print_cr("== Doing heap dump... ");
2642 
2643   if (G1PrintReachableBaseFile == NULL) {
2644     gclog_or_tty->print_cr("  #### error: no base file defined");
2645     return;
2646   }
2647 
2648   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2649       (JVM_MAXPATHLEN - 1)) {
2650     gclog_or_tty->print_cr("  #### error: file name too long");
2651     return;
2652   }
2653 
2654   char file_name[JVM_MAXPATHLEN];
2655   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2656   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2657 
2658   fileStream fout(file_name);
2659   if (!fout.is_open()) {
2660     gclog_or_tty->print_cr("  #### error: could not open file");
2661     return;
2662   }
2663 
2664   outputStream* out = &fout;
2665   out->print_cr("-- USING %s", _g1h->top_at_mark_start_str(vo));
2666   out->cr();
2667 
2668   out->print_cr("--- ITERATING OVER REGIONS");
2669   out->cr();
2670   PrintReachableRegionClosure rcl(out, vo, all);
2671   _g1h->heap_region_iterate(&rcl);
2672   out->cr();
2673 
2674   gclog_or_tty->print_cr("  done");
2675   gclog_or_tty->flush();
2676 }
2677 
2678 #endif // PRODUCT
2679 
2680 void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
2681   // Note we are overriding the read-only view of the prev map here, via
2682   // the cast.
2683   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2684 }
2685 
2686 void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
2687   _nextMarkBitMap->clearRange(mr);
2688 }
2689 
2690 void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
2691   clearRangePrevBitmap(mr);
2692   clearRangeNextBitmap(mr);
2693 }
2694 
2695 HeapRegion*
2696 ConcurrentMark::claim_region(int task_num) {
2697   // "checkpoint" the finger
2698   HeapWord* finger = _finger;
2699 
2700   // _heap_end will not change underneath our feet; it only changes at
2701   // yield points.
2702   while (finger < _heap_end) {
2703     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2704 
2705     // Note on how this code handles humongous regions. In the
2706     // normal case the finger will reach the start of a "starts
2707     // humongous" (SH) region. Its end will either be the end of the
2708     // last "continues humongous" (CH) region in the sequence, or the
2709     // standard end of the SH region (if the SH is the only region in
2710     // the sequence). That way claim_region() will skip over the CH
2711     // regions. However, there is a subtle race between a CM thread
2712     // executing this method and a mutator thread doing a humongous
2713     // object allocation. The two are not mutually exclusive as the CM
2714     // thread does not need to hold the Heap_lock when it gets
2715     // here. So there is a chance that claim_region() will come across
2716     // a free region that's in the progress of becoming a SH or a CH
2717     // region. In the former case, it will either
2718     //   a) Miss the update to the region's end, in which case it will
2719     //      visit every subsequent CH region, will find their bitmaps
2720     //      empty, and do nothing, or
2721     //   b) Will observe the update of the region's end (in which case
2722     //      it will skip the subsequent CH regions).
2723     // If it comes across a region that suddenly becomes CH, the
2724     // scenario will be similar to b). So, the race between
2725     // claim_region() and a humongous object allocation might force us
2726     // to do a bit of unnecessary work (due to some unnecessary bitmap
2727     // iterations) but it should not introduce and correctness issues.
2728     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2729     HeapWord*   bottom        = curr_region->bottom();
2730     HeapWord*   end           = curr_region->end();
2731     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2732 
2733     if (verbose_low()) {
2734       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2735                              "["PTR_FORMAT", "PTR_FORMAT"), "
2736                              "limit = "PTR_FORMAT,
2737                              task_num, curr_region, bottom, end, limit);
2738     }
2739 
2740     // Is the gap between reading the finger and doing the CAS too long?
2741     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2742     if (res == finger) {
2743       // we succeeded
2744 
2745       // notice that _finger == end cannot be guaranteed here since,
2746       // someone else might have moved the finger even further
2747       assert(_finger >= end, "the finger should have moved forward");
2748 
2749       if (verbose_low()) {
2750         gclog_or_tty->print_cr("[%d] we were successful with region = "
2751                                PTR_FORMAT, task_num, curr_region);
2752       }
2753 
2754       if (limit > bottom) {
2755         if (verbose_low()) {
2756           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2757                                  "returning it ", task_num, curr_region);
2758         }
2759         return curr_region;
2760       } else {
2761         assert(limit == bottom,
2762                "the region limit should be at bottom");
2763         if (verbose_low()) {
2764           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2765                                  "returning NULL", task_num, curr_region);
2766         }
2767         // we return NULL and the caller should try calling
2768         // claim_region() again.
2769         return NULL;
2770       }
2771     } else {
2772       assert(_finger > finger, "the finger should have moved forward");
2773       if (verbose_low()) {
2774         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2775                                "global finger = "PTR_FORMAT", "
2776                                "our finger = "PTR_FORMAT,
2777                                task_num, _finger, finger);
2778       }
2779 
2780       // read it again
2781       finger = _finger;
2782     }
2783   }
2784 
2785   return NULL;
2786 }
2787 
2788 #ifndef PRODUCT
2789 enum VerifyNoCSetOopsPhase {
2790   VerifyNoCSetOopsStack,
2791   VerifyNoCSetOopsQueues,
2792   VerifyNoCSetOopsSATBCompleted,
2793   VerifyNoCSetOopsSATBThread
2794 };
2795 
2796 class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
2797 private:
2798   G1CollectedHeap* _g1h;
2799   VerifyNoCSetOopsPhase _phase;
2800   int _info;
2801 
2802   const char* phase_str() {
2803     switch (_phase) {
2804     case VerifyNoCSetOopsStack:         return "Stack";
2805     case VerifyNoCSetOopsQueues:        return "Queue";
2806     case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
2807     case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
2808     default:                            ShouldNotReachHere();
2809     }
2810     return NULL;
2811   }
2812 
2813   void do_object_work(oop obj) {
2814     guarantee(!_g1h->obj_in_cs(obj),
2815               err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
2816                       (void*) obj, phase_str(), _info));
2817   }
2818 
2819 public:
2820   VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
2821 
2822   void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
2823     _phase = phase;
2824     _info = info;
2825   }
2826 
2827   virtual void do_oop(oop* p) {
2828     oop obj = oopDesc::load_decode_heap_oop(p);
2829     do_object_work(obj);
2830   }
2831 
2832   virtual void do_oop(narrowOop* p) {
2833     // We should not come across narrow oops while scanning marking
2834     // stacks and SATB buffers.
2835     ShouldNotReachHere();
2836   }
2837 
2838   virtual void do_object(oop obj) {
2839     do_object_work(obj);
2840   }
2841 };
2842 
2843 void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
2844                                          bool verify_enqueued_buffers,
2845                                          bool verify_thread_buffers,
2846                                          bool verify_fingers) {
2847   assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
2848   if (!G1CollectedHeap::heap()->mark_in_progress()) {
2849     return;
2850   }
2851 
2852   VerifyNoCSetOopsClosure cl;
2853 
2854   if (verify_stacks) {
2855     // Verify entries on the global mark stack
2856     cl.set_phase(VerifyNoCSetOopsStack);
2857     _markStack.oops_do(&cl);
2858 
2859     // Verify entries on the task queues
2860     for (int i = 0; i < (int) _max_task_num; i += 1) {
2861       cl.set_phase(VerifyNoCSetOopsQueues, i);
2862       OopTaskQueue* queue = _task_queues->queue(i);
2863       queue->oops_do(&cl);
2864     }
2865   }
2866 
2867   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
2868 
2869   // Verify entries on the enqueued SATB buffers
2870   if (verify_enqueued_buffers) {
2871     cl.set_phase(VerifyNoCSetOopsSATBCompleted);
2872     satb_qs.iterate_completed_buffers_read_only(&cl);
2873   }
2874 
2875   // Verify entries on the per-thread SATB buffers
2876   if (verify_thread_buffers) {
2877     cl.set_phase(VerifyNoCSetOopsSATBThread);
2878     satb_qs.iterate_thread_buffers_read_only(&cl);
2879   }
2880 
2881   if (verify_fingers) {
2882     // Verify the global finger
2883     HeapWord* global_finger = finger();
2884     if (global_finger != NULL && global_finger < _heap_end) {
2885       // The global finger always points to a heap region boundary. We
2886       // use heap_region_containing_raw() to get the containing region
2887       // given that the global finger could be pointing to a free region
2888       // which subsequently becomes continues humongous. If that
2889       // happens, heap_region_containing() will return the bottom of the
2890       // corresponding starts humongous region and the check below will
2891       // not hold any more.
2892       HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
2893       guarantee(global_finger == global_hr->bottom(),
2894                 err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
2895                         global_finger, HR_FORMAT_PARAMS(global_hr)));
2896     }
2897 
2898     // Verify the task fingers
2899     assert(parallel_marking_threads() <= _max_task_num, "sanity");
2900     for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
2901       CMTask* task = _tasks[i];
2902       HeapWord* task_finger = task->finger();
2903       if (task_finger != NULL && task_finger < _heap_end) {
2904         // See above note on the global finger verification.
2905         HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
2906         guarantee(task_finger == task_hr->bottom() ||
2907                   !task_hr->in_collection_set(),
2908                   err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
2909                           task_finger, HR_FORMAT_PARAMS(task_hr)));
2910       }
2911     }
2912   }
2913 }
2914 #endif // PRODUCT
2915 
2916 // Aggregate the counting data that was constructed concurrently
2917 // with marking.
2918 class AggregateCountDataHRClosure: public HeapRegionClosure {
2919   G1CollectedHeap* _g1h;
2920   ConcurrentMark* _cm;
2921   CardTableModRefBS* _ct_bs;
2922   BitMap* _cm_card_bm;
2923   size_t _max_task_num;
2924 
2925  public:
2926   AggregateCountDataHRClosure(G1CollectedHeap* g1h,
2927                               BitMap* cm_card_bm,
2928                               size_t max_task_num) :
2929     _g1h(g1h), _cm(g1h->concurrent_mark()),
2930     _ct_bs((CardTableModRefBS*) (g1h->barrier_set())),
2931     _cm_card_bm(cm_card_bm), _max_task_num(max_task_num) { }
2932 
2933   bool doHeapRegion(HeapRegion* hr) {
2934     if (hr->continuesHumongous()) {
2935       // We will ignore these here and process them when their
2936       // associated "starts humongous" region is processed.
2937       // Note that we cannot rely on their associated
2938       // "starts humongous" region to have their bit set to 1
2939       // since, due to the region chunking in the parallel region
2940       // iteration, a "continues humongous" region might be visited
2941       // before its associated "starts humongous".
2942       return false;
2943     }
2944 
2945     HeapWord* start = hr->bottom();
2946     HeapWord* limit = hr->next_top_at_mark_start();
2947     HeapWord* end = hr->end();
2948 
2949     assert(start <= limit && limit <= hr->top() && hr->top() <= hr->end(),
2950            err_msg("Preconditions not met - "
2951                    "start: "PTR_FORMAT", limit: "PTR_FORMAT", "
2952                    "top: "PTR_FORMAT", end: "PTR_FORMAT,
2953                    start, limit, hr->top(), hr->end()));
2954 
2955     assert(hr->next_marked_bytes() == 0, "Precondition");
2956 
2957     if (start == limit) {
2958       // NTAMS of this region has not been set so nothing to do.
2959       return false;
2960     }
2961 
2962     // 'start' should be in the heap.
2963     assert(_g1h->is_in_g1_reserved(start) && _ct_bs->is_card_aligned(start), "sanity");
2964     // 'end' *may* be just beyone the end of the heap (if hr is the last region)
2965     assert(!_g1h->is_in_g1_reserved(end) || _ct_bs->is_card_aligned(end), "sanity");
2966 
2967     BitMap::idx_t start_idx = _cm->card_bitmap_index_for(start);
2968     BitMap::idx_t limit_idx = _cm->card_bitmap_index_for(limit);
2969     BitMap::idx_t end_idx = _cm->card_bitmap_index_for(end);
2970 
2971     // If ntams is not card aligned then we bump card bitmap index
2972     // for limit so that we get the all the cards spanned by
2973     // the object ending at ntams.
2974     // Note: if this is the last region in the heap then ntams
2975     // could be actually just beyond the end of the the heap;
2976     // limit_idx will then  correspond to a (non-existent) card
2977     // that is also outside the heap.
2978     if (_g1h->is_in_g1_reserved(limit) && !_ct_bs->is_card_aligned(limit)) {
2979       limit_idx += 1;
2980     }
2981 
2982     assert(limit_idx <= end_idx, "or else use atomics");
2983 
2984     // Aggregate the "stripe" in the count data associated with hr.
2985     uint hrs_index = hr->hrs_index();
2986     size_t marked_bytes = 0;
2987 
2988     for (int i = 0; (size_t)i < _max_task_num; i += 1) {
2989       size_t* marked_bytes_array = _cm->count_marked_bytes_array_for(i);
2990       BitMap* task_card_bm = _cm->count_card_bitmap_for(i);
2991 
2992       // Fetch the marked_bytes in this region for task i and
2993       // add it to the running total for this region.
2994       marked_bytes += marked_bytes_array[hrs_index];
2995 
2996       // Now union the bitmaps[0,max_task_num)[start_idx..limit_idx)
2997       // into the global card bitmap.
2998       BitMap::idx_t scan_idx = task_card_bm->get_next_one_offset(start_idx, limit_idx);
2999 
3000       while (scan_idx < limit_idx) {
3001         assert(task_card_bm->at(scan_idx) == true, "should be");
3002         _cm_card_bm->set_bit(scan_idx);
3003         assert(_cm_card_bm->at(scan_idx) == true, "should be");
3004 
3005         // BitMap::get_next_one_offset() can handle the case when
3006         // its left_offset parameter is greater than its right_offset
3007         // parameter. It does, however, have an early exit if
3008         // left_offset == right_offset. So let's limit the value
3009         // passed in for left offset here.
3010         BitMap::idx_t next_idx = MIN2(scan_idx + 1, limit_idx);
3011         scan_idx = task_card_bm->get_next_one_offset(next_idx, limit_idx);
3012       }
3013     }
3014 
3015     // Update the marked bytes for this region.
3016     hr->add_to_marked_bytes(marked_bytes);
3017 
3018     // Next heap region
3019     return false;
3020   }
3021 };
3022 
3023 class G1AggregateCountDataTask: public AbstractGangTask {
3024 protected:
3025   G1CollectedHeap* _g1h;
3026   ConcurrentMark* _cm;
3027   BitMap* _cm_card_bm;
3028   size_t _max_task_num;
3029   int _active_workers;
3030 
3031 public:
3032   G1AggregateCountDataTask(G1CollectedHeap* g1h,
3033                            ConcurrentMark* cm,
3034                            BitMap* cm_card_bm,
3035                            size_t max_task_num,
3036                            int n_workers) :
3037     AbstractGangTask("Count Aggregation"),
3038     _g1h(g1h), _cm(cm), _cm_card_bm(cm_card_bm),
3039     _max_task_num(max_task_num),
3040     _active_workers(n_workers) { }
3041 
3042   void work(uint worker_id) {
3043     AggregateCountDataHRClosure cl(_g1h, _cm_card_bm, _max_task_num);
3044 
3045     if (G1CollectedHeap::use_parallel_gc_threads()) {
3046       _g1h->heap_region_par_iterate_chunked(&cl, worker_id,
3047                                             _active_workers,
3048                                             HeapRegion::AggregateCountClaimValue);
3049     } else {
3050       _g1h->heap_region_iterate(&cl);
3051     }
3052   }
3053 };
3054 
3055 
3056 void ConcurrentMark::aggregate_count_data() {
3057   int n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
3058                         _g1h->workers()->active_workers() :
3059                         1);
3060 
3061   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
3062                                            _max_task_num, n_workers);
3063 
3064   if (G1CollectedHeap::use_parallel_gc_threads()) {
3065     assert(_g1h->check_heap_region_claim_values(HeapRegion::InitialClaimValue),
3066            "sanity check");
3067     _g1h->set_par_threads(n_workers);
3068     _g1h->workers()->run_task(&g1_par_agg_task);
3069     _g1h->set_par_threads(0);
3070 
3071     assert(_g1h->check_heap_region_claim_values(HeapRegion::AggregateCountClaimValue),
3072            "sanity check");
3073     _g1h->reset_heap_region_claim_values();
3074   } else {
3075     g1_par_agg_task.work(0);
3076   }
3077 }
3078 
3079 // Clear the per-worker arrays used to store the per-region counting data
3080 void ConcurrentMark::clear_all_count_data() {
3081   // Clear the global card bitmap - it will be filled during
3082   // liveness count aggregation (during remark) and the
3083   // final counting task.
3084   _card_bm.clear();
3085 
3086   // Clear the global region bitmap - it will be filled as part
3087   // of the final counting task.
3088   _region_bm.clear();
3089 
3090   uint max_regions = _g1h->max_regions();
3091   assert(_max_task_num != 0, "unitialized");
3092 
3093   for (int i = 0; (size_t) i < _max_task_num; i += 1) {
3094     BitMap* task_card_bm = count_card_bitmap_for(i);
3095     size_t* marked_bytes_array = count_marked_bytes_array_for(i);
3096 
3097     assert(task_card_bm->size() == _card_bm.size(), "size mismatch");
3098     assert(marked_bytes_array != NULL, "uninitialized");
3099 
3100     memset(marked_bytes_array, 0, (size_t) max_regions * sizeof(size_t));
3101     task_card_bm->clear();
3102   }
3103 }
3104 
3105 void ConcurrentMark::print_stats() {
3106   if (verbose_stats()) {
3107     gclog_or_tty->print_cr("---------------------------------------------------------------------");
3108     for (size_t i = 0; i < _active_tasks; ++i) {
3109       _tasks[i]->print_stats();
3110       gclog_or_tty->print_cr("---------------------------------------------------------------------");
3111     }
3112   }
3113 }
3114 
3115 // abandon current marking iteration due to a Full GC
3116 void ConcurrentMark::abort() {
3117   // Clear all marks to force marking thread to do nothing
3118   _nextMarkBitMap->clearAll();
3119   // Clear the liveness counting data
3120   clear_all_count_data();
3121   // Empty mark stack
3122   reset_marking_state();
3123   for (int i = 0; i < (int)_max_task_num; ++i) {
3124     _tasks[i]->clear_region_fields();
3125   }
3126   _has_aborted = true;
3127 
3128   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3129   satb_mq_set.abandon_partial_marking();
3130   // This can be called either during or outside marking, we'll read
3131   // the expected_active value from the SATB queue set.
3132   satb_mq_set.set_active_all_threads(
3133                                  false, /* new active value */
3134                                  satb_mq_set.is_active() /* expected_active */);
3135 
3136   _g1h->trace_heap_after_concurrent_cycle();
3137   _g1h->register_concurrent_cycle_end();
3138 }
3139 
3140 static void print_ms_time_info(const char* prefix, const char* name,
3141                                NumberSeq& ns) {
3142   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3143                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3144   if (ns.num() > 0) {
3145     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3146                            prefix, ns.sd(), ns.maximum());
3147   }
3148 }
3149 
3150 void ConcurrentMark::print_summary_info() {
3151   gclog_or_tty->print_cr(" Concurrent marking:");
3152   print_ms_time_info("  ", "init marks", _init_times);
3153   print_ms_time_info("  ", "remarks", _remark_times);
3154   {
3155     print_ms_time_info("     ", "final marks", _remark_mark_times);
3156     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3157 
3158   }
3159   print_ms_time_info("  ", "cleanups", _cleanup_times);
3160   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3161                          _total_counting_time,
3162                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3163                           (double)_cleanup_times.num()
3164                          : 0.0));
3165   if (G1ScrubRemSets) {
3166     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3167                            _total_rs_scrub_time,
3168                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3169                             (double)_cleanup_times.num()
3170                            : 0.0));
3171   }
3172   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3173                          (_init_times.sum() + _remark_times.sum() +
3174                           _cleanup_times.sum())/1000.0);
3175   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3176                 "(%8.2f s marking).",
3177                 cmThread()->vtime_accum(),
3178                 cmThread()->vtime_mark_accum());
3179 }
3180 
3181 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3182   if (use_parallel_marking_threads()) {
3183     _parallel_workers->print_worker_threads_on(st);
3184   }
3185 }
3186 
3187 // We take a break if someone is trying to stop the world.
3188 bool ConcurrentMark::do_yield_check(uint worker_id) {
3189   if (should_yield()) {
3190     if (worker_id == 0) {
3191       _g1h->g1_policy()->record_concurrent_pause();
3192     }
3193     cmThread()->yield();
3194     return true;
3195   } else {
3196     return false;
3197   }
3198 }
3199 
3200 bool ConcurrentMark::should_yield() {
3201   return cmThread()->should_yield();
3202 }
3203 
3204 bool ConcurrentMark::containing_card_is_marked(void* p) {
3205   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3206   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3207 }
3208 
3209 bool ConcurrentMark::containing_cards_are_marked(void* start,
3210                                                  void* last) {
3211   return containing_card_is_marked(start) &&
3212          containing_card_is_marked(last);
3213 }
3214 
3215 #ifndef PRODUCT
3216 // for debugging purposes
3217 void ConcurrentMark::print_finger() {
3218   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3219                          _heap_start, _heap_end, _finger);
3220   for (int i = 0; i < (int) _max_task_num; ++i) {
3221     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3222   }
3223   gclog_or_tty->print_cr("");
3224 }
3225 #endif
3226 
3227 void CMTask::scan_object(oop obj) {
3228   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3229 
3230   if (_cm->verbose_high()) {
3231     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3232                            _task_id, (void*) obj);
3233   }
3234 
3235   size_t obj_size = obj->size();
3236   _words_scanned += obj_size;
3237 
3238   obj->oop_iterate(_cm_oop_closure);
3239   statsOnly( ++_objs_scanned );
3240   check_limits();
3241 }
3242 
3243 // Closure for iteration over bitmaps
3244 class CMBitMapClosure : public BitMapClosure {
3245 private:
3246   // the bitmap that is being iterated over
3247   CMBitMap*                   _nextMarkBitMap;
3248   ConcurrentMark*             _cm;
3249   CMTask*                     _task;
3250 
3251 public:
3252   CMBitMapClosure(CMTask *task, ConcurrentMark* cm, CMBitMap* nextMarkBitMap) :
3253     _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3254 
3255   bool do_bit(size_t offset) {
3256     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3257     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3258     assert( addr < _cm->finger(), "invariant");
3259 
3260     statsOnly( _task->increase_objs_found_on_bitmap() );
3261     assert(addr >= _task->finger(), "invariant");
3262 
3263     // We move that task's local finger along.
3264     _task->move_finger_to(addr);
3265 
3266     _task->scan_object(oop(addr));
3267     // we only partially drain the local queue and global stack
3268     _task->drain_local_queue(true);
3269     _task->drain_global_stack(true);
3270 
3271     // if the has_aborted flag has been raised, we need to bail out of
3272     // the iteration
3273     return !_task->has_aborted();
3274   }
3275 };
3276 
3277 // Closure for iterating over objects, currently only used for
3278 // processing SATB buffers.
3279 class CMObjectClosure : public ObjectClosure {
3280 private:
3281   CMTask* _task;
3282 
3283 public:
3284   void do_object(oop obj) {
3285     _task->deal_with_reference(obj);
3286   }
3287 
3288   CMObjectClosure(CMTask* task) : _task(task) { }
3289 };
3290 
3291 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3292                                ConcurrentMark* cm,
3293                                CMTask* task)
3294   : _g1h(g1h), _cm(cm), _task(task) {
3295   assert(_ref_processor == NULL, "should be initialized to NULL");
3296 
3297   if (G1UseConcMarkReferenceProcessing) {
3298     _ref_processor = g1h->ref_processor_cm();
3299     assert(_ref_processor != NULL, "should not be NULL");
3300   }
3301 }
3302 
3303 void CMTask::setup_for_region(HeapRegion* hr) {
3304   // Separated the asserts so that we know which one fires.
3305   assert(hr != NULL,
3306         "claim_region() should have filtered out continues humongous regions");
3307   assert(!hr->continuesHumongous(),
3308         "claim_region() should have filtered out continues humongous regions");
3309 
3310   if (_cm->verbose_low()) {
3311     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3312                            _task_id, hr);
3313   }
3314 
3315   _curr_region  = hr;
3316   _finger       = hr->bottom();
3317   update_region_limit();
3318 }
3319 
3320 void CMTask::update_region_limit() {
3321   HeapRegion* hr            = _curr_region;
3322   HeapWord* bottom          = hr->bottom();
3323   HeapWord* limit           = hr->next_top_at_mark_start();
3324 
3325   if (limit == bottom) {
3326     if (_cm->verbose_low()) {
3327       gclog_or_tty->print_cr("[%d] found an empty region "
3328                              "["PTR_FORMAT", "PTR_FORMAT")",
3329                              _task_id, bottom, limit);
3330     }
3331     // The region was collected underneath our feet.
3332     // We set the finger to bottom to ensure that the bitmap
3333     // iteration that will follow this will not do anything.
3334     // (this is not a condition that holds when we set the region up,
3335     // as the region is not supposed to be empty in the first place)
3336     _finger = bottom;
3337   } else if (limit >= _region_limit) {
3338     assert(limit >= _finger, "peace of mind");
3339   } else {
3340     assert(limit < _region_limit, "only way to get here");
3341     // This can happen under some pretty unusual circumstances.  An
3342     // evacuation pause empties the region underneath our feet (NTAMS
3343     // at bottom). We then do some allocation in the region (NTAMS
3344     // stays at bottom), followed by the region being used as a GC
3345     // alloc region (NTAMS will move to top() and the objects
3346     // originally below it will be grayed). All objects now marked in
3347     // the region are explicitly grayed, if below the global finger,
3348     // and we do not need in fact to scan anything else. So, we simply
3349     // set _finger to be limit to ensure that the bitmap iteration
3350     // doesn't do anything.
3351     _finger = limit;
3352   }
3353 
3354   _region_limit = limit;
3355 }
3356 
3357 void CMTask::giveup_current_region() {
3358   assert(_curr_region != NULL, "invariant");
3359   if (_cm->verbose_low()) {
3360     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3361                            _task_id, _curr_region);
3362   }
3363   clear_region_fields();
3364 }
3365 
3366 void CMTask::clear_region_fields() {
3367   // Values for these three fields that indicate that we're not
3368   // holding on to a region.
3369   _curr_region   = NULL;
3370   _finger        = NULL;
3371   _region_limit  = NULL;
3372 }
3373 
3374 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3375   if (cm_oop_closure == NULL) {
3376     assert(_cm_oop_closure != NULL, "invariant");
3377   } else {
3378     assert(_cm_oop_closure == NULL, "invariant");
3379   }
3380   _cm_oop_closure = cm_oop_closure;
3381 }
3382 
3383 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3384   guarantee(nextMarkBitMap != NULL, "invariant");
3385 
3386   if (_cm->verbose_low()) {
3387     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3388   }
3389 
3390   _nextMarkBitMap                = nextMarkBitMap;
3391   clear_region_fields();
3392 
3393   _calls                         = 0;
3394   _elapsed_time_ms               = 0.0;
3395   _termination_time_ms           = 0.0;
3396   _termination_start_time_ms     = 0.0;
3397 
3398 #if _MARKING_STATS_
3399   _local_pushes                  = 0;
3400   _local_pops                    = 0;
3401   _local_max_size                = 0;
3402   _objs_scanned                  = 0;
3403   _global_pushes                 = 0;
3404   _global_pops                   = 0;
3405   _global_max_size               = 0;
3406   _global_transfers_to           = 0;
3407   _global_transfers_from         = 0;
3408   _regions_claimed               = 0;
3409   _objs_found_on_bitmap          = 0;
3410   _satb_buffers_processed        = 0;
3411   _steal_attempts                = 0;
3412   _steals                        = 0;
3413   _aborted                       = 0;
3414   _aborted_overflow              = 0;
3415   _aborted_cm_aborted            = 0;
3416   _aborted_yield                 = 0;
3417   _aborted_timed_out             = 0;
3418   _aborted_satb                  = 0;
3419   _aborted_termination           = 0;
3420 #endif // _MARKING_STATS_
3421 }
3422 
3423 bool CMTask::should_exit_termination() {
3424   regular_clock_call();
3425   // This is called when we are in the termination protocol. We should
3426   // quit if, for some reason, this task wants to abort or the global
3427   // stack is not empty (this means that we can get work from it).
3428   return !_cm->mark_stack_empty() || has_aborted();
3429 }
3430 
3431 void CMTask::reached_limit() {
3432   assert(_words_scanned >= _words_scanned_limit ||
3433          _refs_reached >= _refs_reached_limit ,
3434          "shouldn't have been called otherwise");
3435   regular_clock_call();
3436 }
3437 
3438 void CMTask::regular_clock_call() {
3439   if (has_aborted()) return;
3440 
3441   // First, we need to recalculate the words scanned and refs reached
3442   // limits for the next clock call.
3443   recalculate_limits();
3444 
3445   // During the regular clock call we do the following
3446 
3447   // (1) If an overflow has been flagged, then we abort.
3448   if (_cm->has_overflown()) {
3449     set_has_aborted();
3450     return;
3451   }
3452 
3453   // If we are not concurrent (i.e. we're doing remark) we don't need
3454   // to check anything else. The other steps are only needed during
3455   // the concurrent marking phase.
3456   if (!concurrent()) return;
3457 
3458   // (2) If marking has been aborted for Full GC, then we also abort.
3459   if (_cm->has_aborted()) {
3460     set_has_aborted();
3461     statsOnly( ++_aborted_cm_aborted );
3462     return;
3463   }
3464 
3465   double curr_time_ms = os::elapsedVTime() * 1000.0;
3466 
3467   // (3) If marking stats are enabled, then we update the step history.
3468 #if _MARKING_STATS_
3469   if (_words_scanned >= _words_scanned_limit) {
3470     ++_clock_due_to_scanning;
3471   }
3472   if (_refs_reached >= _refs_reached_limit) {
3473     ++_clock_due_to_marking;
3474   }
3475 
3476   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3477   _interval_start_time_ms = curr_time_ms;
3478   _all_clock_intervals_ms.add(last_interval_ms);
3479 
3480   if (_cm->verbose_medium()) {
3481       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3482                         "scanned = %d%s, refs reached = %d%s",
3483                         _task_id, last_interval_ms,
3484                         _words_scanned,
3485                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3486                         _refs_reached,
3487                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3488   }
3489 #endif // _MARKING_STATS_
3490 
3491   // (4) We check whether we should yield. If we have to, then we abort.
3492   if (_cm->should_yield()) {
3493     // We should yield. To do this we abort the task. The caller is
3494     // responsible for yielding.
3495     set_has_aborted();
3496     statsOnly( ++_aborted_yield );
3497     return;
3498   }
3499 
3500   // (5) We check whether we've reached our time quota. If we have,
3501   // then we abort.
3502   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3503   if (elapsed_time_ms > _time_target_ms) {
3504     set_has_aborted();
3505     _has_timed_out = true;
3506     statsOnly( ++_aborted_timed_out );
3507     return;
3508   }
3509 
3510   // (6) Finally, we check whether there are enough completed STAB
3511   // buffers available for processing. If there are, we abort.
3512   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3513   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3514     if (_cm->verbose_low()) {
3515       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3516                              _task_id);
3517     }
3518     // we do need to process SATB buffers, we'll abort and restart
3519     // the marking task to do so
3520     set_has_aborted();
3521     statsOnly( ++_aborted_satb );
3522     return;
3523   }
3524 }
3525 
3526 void CMTask::recalculate_limits() {
3527   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3528   _words_scanned_limit      = _real_words_scanned_limit;
3529 
3530   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3531   _refs_reached_limit       = _real_refs_reached_limit;
3532 }
3533 
3534 void CMTask::decrease_limits() {
3535   // This is called when we believe that we're going to do an infrequent
3536   // operation which will increase the per byte scanned cost (i.e. move
3537   // entries to/from the global stack). It basically tries to decrease the
3538   // scanning limit so that the clock is called earlier.
3539 
3540   if (_cm->verbose_medium()) {
3541     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3542   }
3543 
3544   _words_scanned_limit = _real_words_scanned_limit -
3545     3 * words_scanned_period / 4;
3546   _refs_reached_limit  = _real_refs_reached_limit -
3547     3 * refs_reached_period / 4;
3548 }
3549 
3550 void CMTask::move_entries_to_global_stack() {
3551   // local array where we'll store the entries that will be popped
3552   // from the local queue
3553   oop buffer[global_stack_transfer_size];
3554 
3555   int n = 0;
3556   oop obj;
3557   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3558     buffer[n] = obj;
3559     ++n;
3560   }
3561 
3562   if (n > 0) {
3563     // we popped at least one entry from the local queue
3564 
3565     statsOnly( ++_global_transfers_to; _local_pops += n );
3566 
3567     if (!_cm->mark_stack_push(buffer, n)) {
3568       if (_cm->verbose_low()) {
3569         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3570                                _task_id);
3571       }
3572       set_has_aborted();
3573     } else {
3574       // the transfer was successful
3575 
3576       if (_cm->verbose_medium()) {
3577         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3578                                _task_id, n);
3579       }
3580       statsOnly( int tmp_size = _cm->mark_stack_size();
3581                  if (tmp_size > _global_max_size) {
3582                    _global_max_size = tmp_size;
3583                  }
3584                  _global_pushes += n );
3585     }
3586   }
3587 
3588   // this operation was quite expensive, so decrease the limits
3589   decrease_limits();
3590 }
3591 
3592 void CMTask::get_entries_from_global_stack() {
3593   // local array where we'll store the entries that will be popped
3594   // from the global stack.
3595   oop buffer[global_stack_transfer_size];
3596   int n;
3597   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3598   assert(n <= global_stack_transfer_size,
3599          "we should not pop more than the given limit");
3600   if (n > 0) {
3601     // yes, we did actually pop at least one entry
3602 
3603     statsOnly( ++_global_transfers_from; _global_pops += n );
3604     if (_cm->verbose_medium()) {
3605       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3606                              _task_id, n);
3607     }
3608     for (int i = 0; i < n; ++i) {
3609       bool success = _task_queue->push(buffer[i]);
3610       // We only call this when the local queue is empty or under a
3611       // given target limit. So, we do not expect this push to fail.
3612       assert(success, "invariant");
3613     }
3614 
3615     statsOnly( int tmp_size = _task_queue->size();
3616                if (tmp_size > _local_max_size) {
3617                  _local_max_size = tmp_size;
3618                }
3619                _local_pushes += n );
3620   }
3621 
3622   // this operation was quite expensive, so decrease the limits
3623   decrease_limits();
3624 }
3625 
3626 void CMTask::drain_local_queue(bool partially) {
3627   if (has_aborted()) return;
3628 
3629   // Decide what the target size is, depending whether we're going to
3630   // drain it partially (so that other tasks can steal if they run out
3631   // of things to do) or totally (at the very end).
3632   size_t target_size;
3633   if (partially) {
3634     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3635   } else {
3636     target_size = 0;
3637   }
3638 
3639   if (_task_queue->size() > target_size) {
3640     if (_cm->verbose_high()) {
3641       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3642                              _task_id, target_size);
3643     }
3644 
3645     oop obj;
3646     bool ret = _task_queue->pop_local(obj);
3647     while (ret) {
3648       statsOnly( ++_local_pops );
3649 
3650       if (_cm->verbose_high()) {
3651         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3652                                (void*) obj);
3653       }
3654 
3655       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3656       assert(!_g1h->is_on_master_free_list(
3657                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3658 
3659       scan_object(obj);
3660 
3661       if (_task_queue->size() <= target_size || has_aborted()) {
3662         ret = false;
3663       } else {
3664         ret = _task_queue->pop_local(obj);
3665       }
3666     }
3667 
3668     if (_cm->verbose_high()) {
3669       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3670                              _task_id, _task_queue->size());
3671     }
3672   }
3673 }
3674 
3675 void CMTask::drain_global_stack(bool partially) {
3676   if (has_aborted()) return;
3677 
3678   // We have a policy to drain the local queue before we attempt to
3679   // drain the global stack.
3680   assert(partially || _task_queue->size() == 0, "invariant");
3681 
3682   // Decide what the target size is, depending whether we're going to
3683   // drain it partially (so that other tasks can steal if they run out
3684   // of things to do) or totally (at the very end).  Notice that,
3685   // because we move entries from the global stack in chunks or
3686   // because another task might be doing the same, we might in fact
3687   // drop below the target. But, this is not a problem.
3688   size_t target_size;
3689   if (partially) {
3690     target_size = _cm->partial_mark_stack_size_target();
3691   } else {
3692     target_size = 0;
3693   }
3694 
3695   if (_cm->mark_stack_size() > target_size) {
3696     if (_cm->verbose_low()) {
3697       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3698                              _task_id, target_size);
3699     }
3700 
3701     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3702       get_entries_from_global_stack();
3703       drain_local_queue(partially);
3704     }
3705 
3706     if (_cm->verbose_low()) {
3707       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3708                              _task_id, _cm->mark_stack_size());
3709     }
3710   }
3711 }
3712 
3713 // SATB Queue has several assumptions on whether to call the par or
3714 // non-par versions of the methods. this is why some of the code is
3715 // replicated. We should really get rid of the single-threaded version
3716 // of the code to simplify things.
3717 void CMTask::drain_satb_buffers() {
3718   if (has_aborted()) return;
3719 
3720   // We set this so that the regular clock knows that we're in the
3721   // middle of draining buffers and doesn't set the abort flag when it
3722   // notices that SATB buffers are available for draining. It'd be
3723   // very counter productive if it did that. :-)
3724   _draining_satb_buffers = true;
3725 
3726   CMObjectClosure oc(this);
3727   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3728   if (G1CollectedHeap::use_parallel_gc_threads()) {
3729     satb_mq_set.set_par_closure(_task_id, &oc);
3730   } else {
3731     satb_mq_set.set_closure(&oc);
3732   }
3733 
3734   // This keeps claiming and applying the closure to completed buffers
3735   // until we run out of buffers or we need to abort.
3736   if (G1CollectedHeap::use_parallel_gc_threads()) {
3737     while (!has_aborted() &&
3738            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3739       if (_cm->verbose_medium()) {
3740         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3741       }
3742       statsOnly( ++_satb_buffers_processed );
3743       regular_clock_call();
3744     }
3745   } else {
3746     while (!has_aborted() &&
3747            satb_mq_set.apply_closure_to_completed_buffer()) {
3748       if (_cm->verbose_medium()) {
3749         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3750       }
3751       statsOnly( ++_satb_buffers_processed );
3752       regular_clock_call();
3753     }
3754   }
3755 
3756   if (!concurrent() && !has_aborted()) {
3757     // We should only do this during remark.
3758     if (G1CollectedHeap::use_parallel_gc_threads()) {
3759       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3760     } else {
3761       satb_mq_set.iterate_closure_all_threads();
3762     }
3763   }
3764 
3765   _draining_satb_buffers = false;
3766 
3767   assert(has_aborted() ||
3768          concurrent() ||
3769          satb_mq_set.completed_buffers_num() == 0, "invariant");
3770 
3771   if (G1CollectedHeap::use_parallel_gc_threads()) {
3772     satb_mq_set.set_par_closure(_task_id, NULL);
3773   } else {
3774     satb_mq_set.set_closure(NULL);
3775   }
3776 
3777   // again, this was a potentially expensive operation, decrease the
3778   // limits to get the regular clock call early
3779   decrease_limits();
3780 }
3781 
3782 void CMTask::print_stats() {
3783   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3784                          _task_id, _calls);
3785   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3786                          _elapsed_time_ms, _termination_time_ms);
3787   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3788                          _step_times_ms.num(), _step_times_ms.avg(),
3789                          _step_times_ms.sd());
3790   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3791                          _step_times_ms.maximum(), _step_times_ms.sum());
3792 
3793 #if _MARKING_STATS_
3794   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3795                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3796                          _all_clock_intervals_ms.sd());
3797   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3798                          _all_clock_intervals_ms.maximum(),
3799                          _all_clock_intervals_ms.sum());
3800   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3801                          _clock_due_to_scanning, _clock_due_to_marking);
3802   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3803                          _objs_scanned, _objs_found_on_bitmap);
3804   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3805                          _local_pushes, _local_pops, _local_max_size);
3806   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3807                          _global_pushes, _global_pops, _global_max_size);
3808   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3809                          _global_transfers_to,_global_transfers_from);
3810   gclog_or_tty->print_cr("  Regions: claimed = %d", _regions_claimed);
3811   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3812   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3813                          _steal_attempts, _steals);
3814   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3815   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3816                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3817   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3818                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3819 #endif // _MARKING_STATS_
3820 }
3821 
3822 /*****************************************************************************
3823 
3824     The do_marking_step(time_target_ms, ...) method is the building
3825     block of the parallel marking framework. It can be called in parallel
3826     with other invocations of do_marking_step() on different tasks
3827     (but only one per task, obviously) and concurrently with the
3828     mutator threads, or during remark, hence it eliminates the need
3829     for two versions of the code. When called during remark, it will
3830     pick up from where the task left off during the concurrent marking
3831     phase. Interestingly, tasks are also claimable during evacuation
3832     pauses too, since do_marking_step() ensures that it aborts before
3833     it needs to yield.
3834 
3835     The data structures that it uses to do marking work are the
3836     following:
3837 
3838       (1) Marking Bitmap. If there are gray objects that appear only
3839       on the bitmap (this happens either when dealing with an overflow
3840       or when the initial marking phase has simply marked the roots
3841       and didn't push them on the stack), then tasks claim heap
3842       regions whose bitmap they then scan to find gray objects. A
3843       global finger indicates where the end of the last claimed region
3844       is. A local finger indicates how far into the region a task has
3845       scanned. The two fingers are used to determine how to gray an
3846       object (i.e. whether simply marking it is OK, as it will be
3847       visited by a task in the future, or whether it needs to be also
3848       pushed on a stack).
3849 
3850       (2) Local Queue. The local queue of the task which is accessed
3851       reasonably efficiently by the task. Other tasks can steal from
3852       it when they run out of work. Throughout the marking phase, a
3853       task attempts to keep its local queue short but not totally
3854       empty, so that entries are available for stealing by other
3855       tasks. Only when there is no more work, a task will totally
3856       drain its local queue.
3857 
3858       (3) Global Mark Stack. This handles local queue overflow. During
3859       marking only sets of entries are moved between it and the local
3860       queues, as access to it requires a mutex and more fine-grain
3861       interaction with it which might cause contention. If it
3862       overflows, then the marking phase should restart and iterate
3863       over the bitmap to identify gray objects. Throughout the marking
3864       phase, tasks attempt to keep the global mark stack at a small
3865       length but not totally empty, so that entries are available for
3866       popping by other tasks. Only when there is no more work, tasks
3867       will totally drain the global mark stack.
3868 
3869       (4) SATB Buffer Queue. This is where completed SATB buffers are
3870       made available. Buffers are regularly removed from this queue
3871       and scanned for roots, so that the queue doesn't get too
3872       long. During remark, all completed buffers are processed, as
3873       well as the filled in parts of any uncompleted buffers.
3874 
3875     The do_marking_step() method tries to abort when the time target
3876     has been reached. There are a few other cases when the
3877     do_marking_step() method also aborts:
3878 
3879       (1) When the marking phase has been aborted (after a Full GC).
3880 
3881       (2) When a global overflow (on the global stack) has been
3882       triggered. Before the task aborts, it will actually sync up with
3883       the other tasks to ensure that all the marking data structures
3884       (local queues, stacks, fingers etc.)  are re-initialized so that
3885       when do_marking_step() completes, the marking phase can
3886       immediately restart.
3887 
3888       (3) When enough completed SATB buffers are available. The
3889       do_marking_step() method only tries to drain SATB buffers right
3890       at the beginning. So, if enough buffers are available, the
3891       marking step aborts and the SATB buffers are processed at
3892       the beginning of the next invocation.
3893 
3894       (4) To yield. when we have to yield then we abort and yield
3895       right at the end of do_marking_step(). This saves us from a lot
3896       of hassle as, by yielding we might allow a Full GC. If this
3897       happens then objects will be compacted underneath our feet, the
3898       heap might shrink, etc. We save checking for this by just
3899       aborting and doing the yield right at the end.
3900 
3901     From the above it follows that the do_marking_step() method should
3902     be called in a loop (or, otherwise, regularly) until it completes.
3903 
3904     If a marking step completes without its has_aborted() flag being
3905     true, it means it has completed the current marking phase (and
3906     also all other marking tasks have done so and have all synced up).
3907 
3908     A method called regular_clock_call() is invoked "regularly" (in
3909     sub ms intervals) throughout marking. It is this clock method that
3910     checks all the abort conditions which were mentioned above and
3911     decides when the task should abort. A work-based scheme is used to
3912     trigger this clock method: when the number of object words the
3913     marking phase has scanned or the number of references the marking
3914     phase has visited reach a given limit. Additional invocations to
3915     the method clock have been planted in a few other strategic places
3916     too. The initial reason for the clock method was to avoid calling
3917     vtime too regularly, as it is quite expensive. So, once it was in
3918     place, it was natural to piggy-back all the other conditions on it
3919     too and not constantly check them throughout the code.
3920 
3921     If do_termination is true then do_marking_step will enter its
3922     termination protocol.
3923 
3924     The value of is_serial must be true when do_marking_step is being
3925     called serially (i.e. by the VMThread) and do_marking_step should
3926     skip any synchronization in the termination and overflow code.
3927     Examples include the serial remark code and the serial reference
3928     processing closures.
3929 
3930     The value of is_serial must be false when do_marking_step is
3931     being called by any of the worker threads in a work gang.
3932     Examples include the concurrent marking code (CMMarkingTask),
3933     the MT remark code, and the MT reference processing closures.
3934 
3935  *****************************************************************************/
3936 
3937 void CMTask::do_marking_step(double time_target_ms,
3938                              bool do_termination,
3939                              bool is_serial) {
3940   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3941   assert(concurrent() == _cm->concurrent(), "they should be the same");
3942 
3943   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3944   assert(_task_queues != NULL, "invariant");
3945   assert(_task_queue != NULL, "invariant");
3946   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3947 
3948   assert(!_claimed,
3949          "only one thread should claim this task at any one time");
3950 
3951   // OK, this doesn't safeguard again all possible scenarios, as it is
3952   // possible for two threads to set the _claimed flag at the same
3953   // time. But it is only for debugging purposes anyway and it will
3954   // catch most problems.
3955   _claimed = true;
3956 
3957   _start_time_ms = os::elapsedVTime() * 1000.0;
3958   statsOnly( _interval_start_time_ms = _start_time_ms );
3959 
3960   // If do_stealing is true then do_marking_step will attempt to
3961   // steal work from the other CMTasks. It only makes sense to
3962   // enable stealing when the termination protocol is enabled
3963   // and do_marking_step() is not being called serially.
3964   bool do_stealing = do_termination && !is_serial;
3965 
3966   double diff_prediction_ms =
3967     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3968   _time_target_ms = time_target_ms - diff_prediction_ms;
3969 
3970   // set up the variables that are used in the work-based scheme to
3971   // call the regular clock method
3972   _words_scanned = 0;
3973   _refs_reached  = 0;
3974   recalculate_limits();
3975 
3976   // clear all flags
3977   clear_has_aborted();
3978   _has_timed_out = false;
3979   _draining_satb_buffers = false;
3980 
3981   ++_calls;
3982 
3983   if (_cm->verbose_low()) {
3984     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3985                            "target = %1.2lfms >>>>>>>>>>",
3986                            _task_id, _calls, _time_target_ms);
3987   }
3988 
3989   // Set up the bitmap and oop closures. Anything that uses them is
3990   // eventually called from this method, so it is OK to allocate these
3991   // statically.
3992   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3993   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
3994   set_cm_oop_closure(&cm_oop_closure);
3995 
3996   if (_cm->has_overflown()) {
3997     // This can happen if the mark stack overflows during a GC pause
3998     // and this task, after a yield point, restarts. We have to abort
3999     // as we need to get into the overflow protocol which happens
4000     // right at the end of this task.
4001     set_has_aborted();
4002   }
4003 
4004   // First drain any available SATB buffers. After this, we will not
4005   // look at SATB buffers before the next invocation of this method.
4006   // If enough completed SATB buffers are queued up, the regular clock
4007   // will abort this task so that it restarts.
4008   drain_satb_buffers();
4009   // ...then partially drain the local queue and the global stack
4010   drain_local_queue(true);
4011   drain_global_stack(true);
4012 
4013   do {
4014     if (!has_aborted() && _curr_region != NULL) {
4015       // This means that we're already holding on to a region.
4016       assert(_finger != NULL, "if region is not NULL, then the finger "
4017              "should not be NULL either");
4018 
4019       // We might have restarted this task after an evacuation pause
4020       // which might have evacuated the region we're holding on to
4021       // underneath our feet. Let's read its limit again to make sure
4022       // that we do not iterate over a region of the heap that
4023       // contains garbage (update_region_limit() will also move
4024       // _finger to the start of the region if it is found empty).
4025       update_region_limit();
4026       // We will start from _finger not from the start of the region,
4027       // as we might be restarting this task after aborting half-way
4028       // through scanning this region. In this case, _finger points to
4029       // the address where we last found a marked object. If this is a
4030       // fresh region, _finger points to start().
4031       MemRegion mr = MemRegion(_finger, _region_limit);
4032 
4033       if (_cm->verbose_low()) {
4034         gclog_or_tty->print_cr("[%d] we're scanning part "
4035                                "["PTR_FORMAT", "PTR_FORMAT") "
4036                                "of region "PTR_FORMAT,
4037                                _task_id, _finger, _region_limit, _curr_region);
4038       }
4039 
4040       // Let's iterate over the bitmap of the part of the
4041       // region that is left.
4042       if (mr.is_empty() || _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4043         // We successfully completed iterating over the region. Now,
4044         // let's give up the region.
4045         giveup_current_region();
4046         regular_clock_call();
4047       } else {
4048         assert(has_aborted(), "currently the only way to do so");
4049         // The only way to abort the bitmap iteration is to return
4050         // false from the do_bit() method. However, inside the
4051         // do_bit() method we move the _finger to point to the
4052         // object currently being looked at. So, if we bail out, we
4053         // have definitely set _finger to something non-null.
4054         assert(_finger != NULL, "invariant");
4055 
4056         // Region iteration was actually aborted. So now _finger
4057         // points to the address of the object we last scanned. If we
4058         // leave it there, when we restart this task, we will rescan
4059         // the object. It is easy to avoid this. We move the finger by
4060         // enough to point to the next possible object header (the
4061         // bitmap knows by how much we need to move it as it knows its
4062         // granularity).
4063         assert(_finger < _region_limit, "invariant");
4064         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4065         // Check if bitmap iteration was aborted while scanning the last object
4066         if (new_finger >= _region_limit) {
4067           giveup_current_region();
4068         } else {
4069           move_finger_to(new_finger);
4070         }
4071       }
4072     }
4073     // At this point we have either completed iterating over the
4074     // region we were holding on to, or we have aborted.
4075 
4076     // We then partially drain the local queue and the global stack.
4077     // (Do we really need this?)
4078     drain_local_queue(true);
4079     drain_global_stack(true);
4080 
4081     // Read the note on the claim_region() method on why it might
4082     // return NULL with potentially more regions available for
4083     // claiming and why we have to check out_of_regions() to determine
4084     // whether we're done or not.
4085     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4086       // We are going to try to claim a new region. We should have
4087       // given up on the previous one.
4088       // Separated the asserts so that we know which one fires.
4089       assert(_curr_region  == NULL, "invariant");
4090       assert(_finger       == NULL, "invariant");
4091       assert(_region_limit == NULL, "invariant");
4092       if (_cm->verbose_low()) {
4093         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4094       }
4095       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4096       if (claimed_region != NULL) {
4097         // Yes, we managed to claim one
4098         statsOnly( ++_regions_claimed );
4099 
4100         if (_cm->verbose_low()) {
4101           gclog_or_tty->print_cr("[%d] we successfully claimed "
4102                                  "region "PTR_FORMAT,
4103                                  _task_id, claimed_region);
4104         }
4105 
4106         setup_for_region(claimed_region);
4107         assert(_curr_region == claimed_region, "invariant");
4108       }
4109       // It is important to call the regular clock here. It might take
4110       // a while to claim a region if, for example, we hit a large
4111       // block of empty regions. So we need to call the regular clock
4112       // method once round the loop to make sure it's called
4113       // frequently enough.
4114       regular_clock_call();
4115     }
4116 
4117     if (!has_aborted() && _curr_region == NULL) {
4118       assert(_cm->out_of_regions(),
4119              "at this point we should be out of regions");
4120     }
4121   } while ( _curr_region != NULL && !has_aborted());
4122 
4123   if (!has_aborted()) {
4124     // We cannot check whether the global stack is empty, since other
4125     // tasks might be pushing objects to it concurrently.
4126     assert(_cm->out_of_regions(),
4127            "at this point we should be out of regions");
4128 
4129     if (_cm->verbose_low()) {
4130       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4131     }
4132 
4133     // Try to reduce the number of available SATB buffers so that
4134     // remark has less work to do.
4135     drain_satb_buffers();
4136   }
4137 
4138   // Since we've done everything else, we can now totally drain the
4139   // local queue and global stack.
4140   drain_local_queue(false);
4141   drain_global_stack(false);
4142 
4143   // Attempt at work stealing from other task's queues.
4144   if (do_stealing && !has_aborted()) {
4145     // We have not aborted. This means that we have finished all that
4146     // we could. Let's try to do some stealing...
4147 
4148     // We cannot check whether the global stack is empty, since other
4149     // tasks might be pushing objects to it concurrently.
4150     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4151            "only way to reach here");
4152 
4153     if (_cm->verbose_low()) {
4154       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4155     }
4156 
4157     while (!has_aborted()) {
4158       oop obj;
4159       statsOnly( ++_steal_attempts );
4160 
4161       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4162         if (_cm->verbose_medium()) {
4163           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4164                                  _task_id, (void*) obj);
4165         }
4166 
4167         statsOnly( ++_steals );
4168 
4169         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4170                "any stolen object should be marked");
4171         scan_object(obj);
4172 
4173         // And since we're towards the end, let's totally drain the
4174         // local queue and global stack.
4175         drain_local_queue(false);
4176         drain_global_stack(false);
4177       } else {
4178         break;
4179       }
4180     }
4181   }
4182 
4183   // If we are about to wrap up and go into termination, check if we
4184   // should raise the overflow flag.
4185   if (do_termination && !has_aborted()) {
4186     if (_cm->force_overflow()->should_force()) {
4187       _cm->set_has_overflown();
4188       regular_clock_call();
4189     }
4190   }
4191 
4192   // We still haven't aborted. Now, let's try to get into the
4193   // termination protocol.
4194   if (do_termination && !has_aborted()) {
4195     // We cannot check whether the global stack is empty, since other
4196     // tasks might be concurrently pushing objects on it.
4197     // Separated the asserts so that we know which one fires.
4198     assert(_cm->out_of_regions(), "only way to reach here");
4199     assert(_task_queue->size() == 0, "only way to reach here");
4200 
4201     if (_cm->verbose_low()) {
4202       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4203     }
4204 
4205     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4206 
4207     // The CMTask class also extends the TerminatorTerminator class,
4208     // hence its should_exit_termination() method will also decide
4209     // whether to exit the termination protocol or not.
4210     bool finished = (is_serial ||
4211                      _cm->terminator()->offer_termination(this));
4212     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4213     _termination_time_ms +=
4214       termination_end_time_ms - _termination_start_time_ms;
4215 
4216     if (finished) {
4217       // We're all done.
4218 
4219       if (_task_id == 0) {
4220         // let's allow task 0 to do this
4221         if (concurrent()) {
4222           assert(_cm->concurrent_marking_in_progress(), "invariant");
4223           // we need to set this to false before the next
4224           // safepoint. This way we ensure that the marking phase
4225           // doesn't observe any more heap expansions.
4226           _cm->clear_concurrent_marking_in_progress();
4227         }
4228       }
4229 
4230       // We can now guarantee that the global stack is empty, since
4231       // all other tasks have finished. We separated the guarantees so
4232       // that, if a condition is false, we can immediately find out
4233       // which one.
4234       guarantee(_cm->out_of_regions(), "only way to reach here");
4235       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4236       guarantee(_task_queue->size() == 0, "only way to reach here");
4237       guarantee(!_cm->has_overflown(), "only way to reach here");
4238       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4239 
4240       if (_cm->verbose_low()) {
4241         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4242       }
4243     } else {
4244       // Apparently there's more work to do. Let's abort this task. It
4245       // will restart it and we can hopefully find more things to do.
4246 
4247       if (_cm->verbose_low()) {
4248         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4249                                _task_id);
4250       }
4251 
4252       set_has_aborted();
4253       statsOnly( ++_aborted_termination );
4254     }
4255   }
4256 
4257   // Mainly for debugging purposes to make sure that a pointer to the
4258   // closure which was statically allocated in this frame doesn't
4259   // escape it by accident.
4260   set_cm_oop_closure(NULL);
4261   double end_time_ms = os::elapsedVTime() * 1000.0;
4262   double elapsed_time_ms = end_time_ms - _start_time_ms;
4263   // Update the step history.
4264   _step_times_ms.add(elapsed_time_ms);
4265 
4266   if (has_aborted()) {
4267     // The task was aborted for some reason.
4268 
4269     statsOnly( ++_aborted );
4270 
4271     if (_has_timed_out) {
4272       double diff_ms = elapsed_time_ms - _time_target_ms;
4273       // Keep statistics of how well we did with respect to hitting
4274       // our target only if we actually timed out (if we aborted for
4275       // other reasons, then the results might get skewed).
4276       _marking_step_diffs_ms.add(diff_ms);
4277     }
4278 
4279     if (_cm->has_overflown()) {
4280       // This is the interesting one. We aborted because a global
4281       // overflow was raised. This means we have to restart the
4282       // marking phase and start iterating over regions. However, in
4283       // order to do this we have to make sure that all tasks stop
4284       // what they are doing and re-initialise in a safe manner. We
4285       // will achieve this with the use of two barrier sync points.
4286 
4287       if (_cm->verbose_low()) {
4288         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4289       }
4290 
4291       if (!is_serial) {
4292         // We only need to enter the sync barrier if being called
4293         // from a parallel context
4294         _cm->enter_first_sync_barrier(_task_id);
4295 
4296         // When we exit this sync barrier we know that all tasks have
4297         // stopped doing marking work. So, it's now safe to
4298         // re-initialise our data structures. At the end of this method,
4299         // task 0 will clear the global data structures.
4300       }
4301 
4302       statsOnly( ++_aborted_overflow );
4303 
4304       // We clear the local state of this task...
4305       clear_region_fields();
4306 
4307       if (!is_serial) {
4308         // ...and enter the second barrier.
4309         _cm->enter_second_sync_barrier(_task_id);
4310       }
4311       // At this point, if we're during the concurrent phase of
4312       // marking, everything has been re-initialized and we're
4313       // ready to restart.
4314     }
4315 
4316     if (_cm->verbose_low()) {
4317       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4318                              "elapsed = %1.2lfms <<<<<<<<<<",
4319                              _task_id, _time_target_ms, elapsed_time_ms);
4320       if (_cm->has_aborted()) {
4321         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4322                                _task_id);
4323       }
4324     }
4325   } else {
4326     if (_cm->verbose_low()) {
4327       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4328                              "elapsed = %1.2lfms <<<<<<<<<<",
4329                              _task_id, _time_target_ms, elapsed_time_ms);
4330     }
4331   }
4332 
4333   _claimed = false;
4334 }
4335 
4336 CMTask::CMTask(int task_id,
4337                ConcurrentMark* cm,
4338                size_t* marked_bytes,
4339                BitMap* card_bm,
4340                CMTaskQueue* task_queue,
4341                CMTaskQueueSet* task_queues)
4342   : _g1h(G1CollectedHeap::heap()),
4343     _task_id(task_id), _cm(cm),
4344     _claimed(false),
4345     _nextMarkBitMap(NULL), _hash_seed(17),
4346     _task_queue(task_queue),
4347     _task_queues(task_queues),
4348     _cm_oop_closure(NULL),
4349     _marked_bytes_array(marked_bytes),
4350     _card_bm(card_bm) {
4351   guarantee(task_queue != NULL, "invariant");
4352   guarantee(task_queues != NULL, "invariant");
4353 
4354   statsOnly( _clock_due_to_scanning = 0;
4355              _clock_due_to_marking  = 0 );
4356 
4357   _marking_step_diffs_ms.add(0.5);
4358 }
4359 
4360 // These are formatting macros that are used below to ensure
4361 // consistent formatting. The *_H_* versions are used to format the
4362 // header for a particular value and they should be kept consistent
4363 // with the corresponding macro. Also note that most of the macros add
4364 // the necessary white space (as a prefix) which makes them a bit
4365 // easier to compose.
4366 
4367 // All the output lines are prefixed with this string to be able to
4368 // identify them easily in a large log file.
4369 #define G1PPRL_LINE_PREFIX            "###"
4370 
4371 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4372 #ifdef _LP64
4373 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4374 #else // _LP64
4375 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4376 #endif // _LP64
4377 
4378 // For per-region info
4379 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4380 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4381 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4382 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4383 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4384 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4385 
4386 // For summary info
4387 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4388 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4389 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4390 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4391 
4392 G1PrintRegionLivenessInfoClosure::
4393 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4394   : _out(out),
4395     _total_used_bytes(0), _total_capacity_bytes(0),
4396     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4397     _hum_used_bytes(0), _hum_capacity_bytes(0),
4398     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4399   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4400   MemRegion g1_committed = g1h->g1_committed();
4401   MemRegion g1_reserved = g1h->g1_reserved();
4402   double now = os::elapsedTime();
4403 
4404   // Print the header of the output.
4405   _out->cr();
4406   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4407   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4408                  G1PPRL_SUM_ADDR_FORMAT("committed")
4409                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4410                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4411                  g1_committed.start(), g1_committed.end(),
4412                  g1_reserved.start(), g1_reserved.end(),
4413                  HeapRegion::GrainBytes);
4414   _out->print_cr(G1PPRL_LINE_PREFIX);
4415   _out->print_cr(G1PPRL_LINE_PREFIX
4416                  G1PPRL_TYPE_H_FORMAT
4417                  G1PPRL_ADDR_BASE_H_FORMAT
4418                  G1PPRL_BYTE_H_FORMAT
4419                  G1PPRL_BYTE_H_FORMAT
4420                  G1PPRL_BYTE_H_FORMAT
4421                  G1PPRL_DOUBLE_H_FORMAT,
4422                  "type", "address-range",
4423                  "used", "prev-live", "next-live", "gc-eff");
4424   _out->print_cr(G1PPRL_LINE_PREFIX
4425                  G1PPRL_TYPE_H_FORMAT
4426                  G1PPRL_ADDR_BASE_H_FORMAT
4427                  G1PPRL_BYTE_H_FORMAT
4428                  G1PPRL_BYTE_H_FORMAT
4429                  G1PPRL_BYTE_H_FORMAT
4430                  G1PPRL_DOUBLE_H_FORMAT,
4431                  "", "",
4432                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4433 }
4434 
4435 // It takes as a parameter a reference to one of the _hum_* fields, it
4436 // deduces the corresponding value for a region in a humongous region
4437 // series (either the region size, or what's left if the _hum_* field
4438 // is < the region size), and updates the _hum_* field accordingly.
4439 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4440   size_t bytes = 0;
4441   // The > 0 check is to deal with the prev and next live bytes which
4442   // could be 0.
4443   if (*hum_bytes > 0) {
4444     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4445     *hum_bytes -= bytes;
4446   }
4447   return bytes;
4448 }
4449 
4450 // It deduces the values for a region in a humongous region series
4451 // from the _hum_* fields and updates those accordingly. It assumes
4452 // that that _hum_* fields have already been set up from the "starts
4453 // humongous" region and we visit the regions in address order.
4454 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4455                                                      size_t* capacity_bytes,
4456                                                      size_t* prev_live_bytes,
4457                                                      size_t* next_live_bytes) {
4458   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4459   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4460   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4461   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4462   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4463 }
4464 
4465 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4466   const char* type = "";
4467   HeapWord* bottom       = r->bottom();
4468   HeapWord* end          = r->end();
4469   size_t capacity_bytes  = r->capacity();
4470   size_t used_bytes      = r->used();
4471   size_t prev_live_bytes = r->live_bytes();
4472   size_t next_live_bytes = r->next_live_bytes();
4473   double gc_eff          = r->gc_efficiency();
4474   if (r->used() == 0) {
4475     type = "FREE";
4476   } else if (r->is_survivor()) {
4477     type = "SURV";
4478   } else if (r->is_young()) {
4479     type = "EDEN";
4480   } else if (r->startsHumongous()) {
4481     type = "HUMS";
4482 
4483     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4484            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4485            "they should have been zeroed after the last time we used them");
4486     // Set up the _hum_* fields.
4487     _hum_capacity_bytes  = capacity_bytes;
4488     _hum_used_bytes      = used_bytes;
4489     _hum_prev_live_bytes = prev_live_bytes;
4490     _hum_next_live_bytes = next_live_bytes;
4491     get_hum_bytes(&used_bytes, &capacity_bytes,
4492                   &prev_live_bytes, &next_live_bytes);
4493     end = bottom + HeapRegion::GrainWords;
4494   } else if (r->continuesHumongous()) {
4495     type = "HUMC";
4496     get_hum_bytes(&used_bytes, &capacity_bytes,
4497                   &prev_live_bytes, &next_live_bytes);
4498     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4499   } else {
4500     type = "OLD";
4501   }
4502 
4503   _total_used_bytes      += used_bytes;
4504   _total_capacity_bytes  += capacity_bytes;
4505   _total_prev_live_bytes += prev_live_bytes;
4506   _total_next_live_bytes += next_live_bytes;
4507 
4508   // Print a line for this particular region.
4509   _out->print_cr(G1PPRL_LINE_PREFIX
4510                  G1PPRL_TYPE_FORMAT
4511                  G1PPRL_ADDR_BASE_FORMAT
4512                  G1PPRL_BYTE_FORMAT
4513                  G1PPRL_BYTE_FORMAT
4514                  G1PPRL_BYTE_FORMAT
4515                  G1PPRL_DOUBLE_FORMAT,
4516                  type, bottom, end,
4517                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4518 
4519   return false;
4520 }
4521 
4522 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4523   // Print the footer of the output.
4524   _out->print_cr(G1PPRL_LINE_PREFIX);
4525   _out->print_cr(G1PPRL_LINE_PREFIX
4526                  " SUMMARY"
4527                  G1PPRL_SUM_MB_FORMAT("capacity")
4528                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4529                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4530                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4531                  bytes_to_mb(_total_capacity_bytes),
4532                  bytes_to_mb(_total_used_bytes),
4533                  perc(_total_used_bytes, _total_capacity_bytes),
4534                  bytes_to_mb(_total_prev_live_bytes),
4535                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4536                  bytes_to_mb(_total_next_live_bytes),
4537                  perc(_total_next_live_bytes, _total_capacity_bytes));
4538   _out->cr();
4539 }