1 /*
   2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1RemSet.hpp"
  32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  34 #include "memory/genOopClosures.inline.hpp"
  35 #include "memory/referencePolicy.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/oop.inline.hpp"
  38 #include "runtime/handles.inline.hpp"
  39 #include "runtime/java.hpp"
  40 
  41 //
  42 // CMS Bit Map Wrapper
  43 
  44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  45   _bm((uintptr_t*)NULL,0),
  46   _shifter(shifter) {
  47   _bmStartWord = (HeapWord*)(rs.base());
  48   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  49   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  50                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  51 
  52   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  53   // For now we'll just commit all of the bit map up fromt.
  54   // Later on we'll try to be more parsimonious with swap.
  55   guarantee(_virtual_space.initialize(brs, brs.size()),
  56             "couldn't reseve backing store for CMS bit map");
  57   assert(_virtual_space.committed_size() == brs.size(),
  58          "didn't reserve backing store for all of CMS bit map?");
  59   _bm.set_map((uintptr_t*)_virtual_space.low());
  60   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  61          _bmWordSize, "inconsistency in bit map sizing");
  62   _bm.set_size(_bmWordSize >> _shifter);
  63 }
  64 
  65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  66                                                HeapWord* limit) const {
  67   // First we must round addr *up* to a possible object boundary.
  68   addr = (HeapWord*)align_size_up((intptr_t)addr,
  69                                   HeapWordSize << _shifter);
  70   size_t addrOffset = heapWordToOffset(addr);
  71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  72   size_t limitOffset = heapWordToOffset(limit);
  73   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   assert(nextAddr >= addr, "get_next_one postcondition");
  76   assert(nextAddr == limit || isMarked(nextAddr),
  77          "get_next_one postcondition");
  78   return nextAddr;
  79 }
  80 
  81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  82                                                  HeapWord* limit) const {
  83   size_t addrOffset = heapWordToOffset(addr);
  84   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  85   size_t limitOffset = heapWordToOffset(limit);
  86   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  87   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  88   assert(nextAddr >= addr, "get_next_one postcondition");
  89   assert(nextAddr == limit || !isMarked(nextAddr),
  90          "get_next_one postcondition");
  91   return nextAddr;
  92 }
  93 
  94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  95   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  96   return (int) (diff >> _shifter);
  97 }
  98 
  99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 100   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 101   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 102   if (right > left) {
 103     // Right-open interval [leftOffset, rightOffset).
 104     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 105   } else {
 106     return true;
 107   }
 108 }
 109 
 110 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 111                                              size_t    from_start_index,
 112                                              HeapWord* to_start_word,
 113                                              size_t    word_num) {
 114   _bm.mostly_disjoint_range_union(from_bitmap,
 115                                   from_start_index,
 116                                   heapWordToOffset(to_start_word),
 117                                   word_num);
 118 }
 119 
 120 #ifndef PRODUCT
 121 bool CMBitMapRO::covers(ReservedSpace rs) const {
 122   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 123   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 124          "size inconsistency");
 125   return _bmStartWord == (HeapWord*)(rs.base()) &&
 126          _bmWordSize  == rs.size()>>LogHeapWordSize;
 127 }
 128 #endif
 129 
 130 void CMBitMap::clearAll() {
 131   _bm.clear();
 132   return;
 133 }
 134 
 135 void CMBitMap::markRange(MemRegion mr) {
 136   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 137   assert(!mr.is_empty(), "unexpected empty region");
 138   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 139           ((HeapWord *) mr.end())),
 140          "markRange memory region end is not card aligned");
 141   // convert address range into offset range
 142   _bm.at_put_range(heapWordToOffset(mr.start()),
 143                    heapWordToOffset(mr.end()), true);
 144 }
 145 
 146 void CMBitMap::clearRange(MemRegion mr) {
 147   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 148   assert(!mr.is_empty(), "unexpected empty region");
 149   // convert address range into offset range
 150   _bm.at_put_range(heapWordToOffset(mr.start()),
 151                    heapWordToOffset(mr.end()), false);
 152 }
 153 
 154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 155                                             HeapWord* end_addr) {
 156   HeapWord* start = getNextMarkedWordAddress(addr);
 157   start = MIN2(start, end_addr);
 158   HeapWord* end   = getNextUnmarkedWordAddress(start);
 159   end = MIN2(end, end_addr);
 160   assert(start <= end, "Consistency check");
 161   MemRegion mr(start, end);
 162   if (!mr.is_empty()) {
 163     clearRange(mr);
 164   }
 165   return mr;
 166 }
 167 
 168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 169   _base(NULL), _cm(cm)
 170 #ifdef ASSERT
 171   , _drain_in_progress(false)
 172   , _drain_in_progress_yields(false)
 173 #endif
 174 {}
 175 
 176 void CMMarkStack::allocate(size_t size) {
 177   _base = NEW_C_HEAP_ARRAY(oop, size);
 178   if (_base == NULL)
 179     vm_exit_during_initialization("Failed to allocate "
 180                                   "CM region mark stack");
 181   _index = 0;
 182   // QQQQ cast ...
 183   _capacity = (jint) size;
 184   _oops_do_bound = -1;
 185   NOT_PRODUCT(_max_depth = 0);
 186 }
 187 
 188 CMMarkStack::~CMMarkStack() {
 189   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 190 }
 191 
 192 void CMMarkStack::par_push(oop ptr) {
 193   while (true) {
 194     if (isFull()) {
 195       _overflow = true;
 196       return;
 197     }
 198     // Otherwise...
 199     jint index = _index;
 200     jint next_index = index+1;
 201     jint res = Atomic::cmpxchg(next_index, &_index, index);
 202     if (res == index) {
 203       _base[index] = ptr;
 204       // Note that we don't maintain this atomically.  We could, but it
 205       // doesn't seem necessary.
 206       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 207       return;
 208     }
 209     // Otherwise, we need to try again.
 210   }
 211 }
 212 
 213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 214   while (true) {
 215     if (isFull()) {
 216       _overflow = true;
 217       return;
 218     }
 219     // Otherwise...
 220     jint index = _index;
 221     jint next_index = index + n;
 222     if (next_index > _capacity) {
 223       _overflow = true;
 224       return;
 225     }
 226     jint res = Atomic::cmpxchg(next_index, &_index, index);
 227     if (res == index) {
 228       for (int i = 0; i < n; i++) {
 229         int ind = index + i;
 230         assert(ind < _capacity, "By overflow test above.");
 231         _base[ind] = ptr_arr[i];
 232       }
 233       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 234       return;
 235     }
 236     // Otherwise, we need to try again.
 237   }
 238 }
 239 
 240 
 241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 242   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 243   jint start = _index;
 244   jint next_index = start + n;
 245   if (next_index > _capacity) {
 246     _overflow = true;
 247     return;
 248   }
 249   // Otherwise.
 250   _index = next_index;
 251   for (int i = 0; i < n; i++) {
 252     int ind = start + i;
 253     assert(ind < _capacity, "By overflow test above.");
 254     _base[ind] = ptr_arr[i];
 255   }
 256 }
 257 
 258 
 259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 260   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 261   jint index = _index;
 262   if (index == 0) {
 263     *n = 0;
 264     return false;
 265   } else {
 266     int k = MIN2(max, index);
 267     jint new_ind = index - k;
 268     for (int j = 0; j < k; j++) {
 269       ptr_arr[j] = _base[new_ind + j];
 270     }
 271     _index = new_ind;
 272     *n = k;
 273     return true;
 274   }
 275 }
 276 
 277 
 278 CMRegionStack::CMRegionStack() : _base(NULL) {}
 279 
 280 void CMRegionStack::allocate(size_t size) {
 281   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 282   if (_base == NULL)
 283     vm_exit_during_initialization("Failed to allocate "
 284                                   "CM region mark stack");
 285   _index = 0;
 286   // QQQQ cast ...
 287   _capacity = (jint) size;
 288 }
 289 
 290 CMRegionStack::~CMRegionStack() {
 291   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 292 }
 293 
 294 void CMRegionStack::push(MemRegion mr) {
 295   assert(mr.word_size() > 0, "Precondition");
 296   while (true) {
 297     if (isFull()) {
 298       _overflow = true;
 299       return;
 300     }
 301     // Otherwise...
 302     jint index = _index;
 303     jint next_index = index+1;
 304     jint res = Atomic::cmpxchg(next_index, &_index, index);
 305     if (res == index) {
 306       _base[index] = mr;
 307       return;
 308     }
 309     // Otherwise, we need to try again.
 310   }
 311 }
 312 
 313 // Currently we do not call this at all. Normally we would call it
 314 // during the concurrent marking / remark phases but we now call
 315 // the lock-based version instead. But we might want to resurrect this
 316 // code in the future. So, we'll leave it here commented out.
 317 #if 0
 318 MemRegion CMRegionStack::pop() {
 319   while (true) {
 320     // Otherwise...
 321     jint index = _index;
 322 
 323     if (index == 0) {
 324       return MemRegion();
 325     }
 326     jint next_index = index-1;
 327     jint res = Atomic::cmpxchg(next_index, &_index, index);
 328     if (res == index) {
 329       MemRegion mr = _base[next_index];
 330       if (mr.start() != NULL) {
 331         assert(mr.end() != NULL, "invariant");
 332         assert(mr.word_size() > 0, "invariant");
 333         return mr;
 334       } else {
 335         // that entry was invalidated... let's skip it
 336         assert(mr.end() == NULL, "invariant");
 337       }
 338     }
 339     // Otherwise, we need to try again.
 340   }
 341 }
 342 #endif // 0
 343 
 344 void CMRegionStack::push_with_lock(MemRegion mr) {
 345   assert(mr.word_size() > 0, "Precondition");
 346   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 347 
 348   if (isFull()) {
 349     _overflow = true;
 350     return;
 351   }
 352 
 353   _base[_index] = mr;
 354   _index += 1;
 355 }
 356 
 357 MemRegion CMRegionStack::pop_with_lock() {
 358   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 359 
 360   while (true) {
 361     if (_index == 0) {
 362       return MemRegion();
 363     }
 364     _index -= 1;
 365 
 366     MemRegion mr = _base[_index];
 367     if (mr.start() != NULL) {
 368       assert(mr.end() != NULL, "invariant");
 369       assert(mr.word_size() > 0, "invariant");
 370       return mr;
 371     } else {
 372       // that entry was invalidated... let's skip it
 373       assert(mr.end() == NULL, "invariant");
 374     }
 375   }
 376 }
 377 
 378 bool CMRegionStack::invalidate_entries_into_cset() {
 379   bool result = false;
 380   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 381   for (int i = 0; i < _oops_do_bound; ++i) {
 382     MemRegion mr = _base[i];
 383     if (mr.start() != NULL) {
 384       assert(mr.end() != NULL, "invariant");
 385       assert(mr.word_size() > 0, "invariant");
 386       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 387       assert(hr != NULL, "invariant");
 388       if (hr->in_collection_set()) {
 389         // The region points into the collection set
 390         _base[i] = MemRegion();
 391         result = true;
 392       }
 393     } else {
 394       // that entry was invalidated... let's skip it
 395       assert(mr.end() == NULL, "invariant");
 396     }
 397   }
 398   return result;
 399 }
 400 
 401 template<class OopClosureClass>
 402 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 403   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 404          || SafepointSynchronize::is_at_safepoint(),
 405          "Drain recursion must be yield-safe.");
 406   bool res = true;
 407   debug_only(_drain_in_progress = true);
 408   debug_only(_drain_in_progress_yields = yield_after);
 409   while (!isEmpty()) {
 410     oop newOop = pop();
 411     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 412     assert(newOop->is_oop(), "Expected an oop");
 413     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 414            "only grey objects on this stack");
 415     // iterate over the oops in this oop, marking and pushing
 416     // the ones in CMS generation.
 417     newOop->oop_iterate(cl);
 418     if (yield_after && _cm->do_yield_check()) {
 419       res = false; break;
 420     }
 421   }
 422   debug_only(_drain_in_progress = false);
 423   return res;
 424 }
 425 
 426 void CMMarkStack::oops_do(OopClosure* f) {
 427   if (_index == 0) return;
 428   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 429          "Bound must be set.");
 430   for (int i = 0; i < _oops_do_bound; i++) {
 431     f->do_oop(&_base[i]);
 432   }
 433   _oops_do_bound = -1;
 434 }
 435 
 436 bool ConcurrentMark::not_yet_marked(oop obj) const {
 437   return (_g1h->is_obj_ill(obj)
 438           || (_g1h->is_in_permanent(obj)
 439               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 440 }
 441 
 442 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 443 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 444 #endif // _MSC_VER
 445 
 446 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 447                                int max_regions) :
 448   _markBitMap1(rs, MinObjAlignment - 1),
 449   _markBitMap2(rs, MinObjAlignment - 1),
 450 
 451   _parallel_marking_threads(0),
 452   _sleep_factor(0.0),
 453   _marking_task_overhead(1.0),
 454   _cleanup_sleep_factor(0.0),
 455   _cleanup_task_overhead(1.0),
 456   _region_bm(max_regions, false /* in_resource_area*/),
 457   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 458            CardTableModRefBS::card_shift,
 459            false /* in_resource_area*/),
 460   _prevMarkBitMap(&_markBitMap1),
 461   _nextMarkBitMap(&_markBitMap2),
 462   _at_least_one_mark_complete(false),
 463 
 464   _markStack(this),
 465   _regionStack(),
 466   // _finger set in set_non_marking_state
 467 
 468   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 469   // _active_tasks set in set_non_marking_state
 470   // _tasks set inside the constructor
 471   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 472   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 473 
 474   _has_overflown(false),
 475   _concurrent(false),
 476   _has_aborted(false),
 477   _restart_for_overflow(false),
 478   _concurrent_marking_in_progress(false),
 479   _should_gray_objects(false),
 480 
 481   // _verbose_level set below
 482 
 483   _init_times(),
 484   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 485   _cleanup_times(),
 486   _total_counting_time(0.0),
 487   _total_rs_scrub_time(0.0),
 488 
 489   _parallel_workers(NULL)
 490 {
 491   CMVerboseLevel verbose_level =
 492     (CMVerboseLevel) G1MarkingVerboseLevel;
 493   if (verbose_level < no_verbose)
 494     verbose_level = no_verbose;
 495   if (verbose_level > high_verbose)
 496     verbose_level = high_verbose;
 497   _verbose_level = verbose_level;
 498 
 499   if (verbose_low())
 500     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 501                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 502 
 503   _markStack.allocate(MarkStackSize);
 504   _regionStack.allocate(G1MarkRegionStackSize);
 505 
 506   // Create & start a ConcurrentMark thread.
 507   _cmThread = new ConcurrentMarkThread(this);
 508   assert(cmThread() != NULL, "CM Thread should have been created");
 509   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 510 
 511   _g1h = G1CollectedHeap::heap();
 512   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 513   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 514   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 515 
 516   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 517   satb_qs.set_buffer_size(G1SATBBufferSize);
 518 
 519   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 520   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
 521   for (int i = 0 ; i < size; i++) {
 522     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
 523   }
 524 
 525   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 526   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 527 
 528   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 529   _active_tasks = _max_task_num;
 530   for (int i = 0; i < (int) _max_task_num; ++i) {
 531     CMTaskQueue* task_queue = new CMTaskQueue();
 532     task_queue->initialize();
 533     _task_queues->register_queue(i, task_queue);
 534 
 535     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 536     _accum_task_vtime[i] = 0.0;
 537   }
 538 
 539   if (ConcGCThreads > ParallelGCThreads) {
 540     vm_exit_during_initialization("Can't have more ConcGCThreads "
 541                                   "than ParallelGCThreads.");
 542   }
 543   if (ParallelGCThreads == 0) {
 544     // if we are not running with any parallel GC threads we will not
 545     // spawn any marking threads either
 546     _parallel_marking_threads =   0;
 547     _sleep_factor             = 0.0;
 548     _marking_task_overhead    = 1.0;
 549   } else {
 550     if (ConcGCThreads > 0) {
 551       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 552       // if both are set
 553 
 554       _parallel_marking_threads = ConcGCThreads;
 555       _sleep_factor             = 0.0;
 556       _marking_task_overhead    = 1.0;
 557     } else if (G1MarkingOverheadPercent > 0) {
 558       // we will calculate the number of parallel marking threads
 559       // based on a target overhead with respect to the soft real-time
 560       // goal
 561 
 562       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 563       double overall_cm_overhead =
 564         (double) MaxGCPauseMillis * marking_overhead /
 565         (double) GCPauseIntervalMillis;
 566       double cpu_ratio = 1.0 / (double) os::processor_count();
 567       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 568       double marking_task_overhead =
 569         overall_cm_overhead / marking_thread_num *
 570                                                 (double) os::processor_count();
 571       double sleep_factor =
 572                          (1.0 - marking_task_overhead) / marking_task_overhead;
 573 
 574       _parallel_marking_threads = (size_t) marking_thread_num;
 575       _sleep_factor             = sleep_factor;
 576       _marking_task_overhead    = marking_task_overhead;
 577     } else {
 578       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;
 581     }
 582 
 583     if (parallel_marking_threads() > 1)
 584       _cleanup_task_overhead = 1.0;
 585     else
 586       _cleanup_task_overhead = marking_task_overhead();
 587     _cleanup_sleep_factor =
 588                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 589 
 590 #if 0
 591     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 592     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 593     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 594     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 595     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 596 #endif
 597 
 598     guarantee(parallel_marking_threads() > 0, "peace of mind");
 599     _parallel_workers = new WorkGang("G1 Parallel Marking Threads",
 600                                      (int) parallel_marking_threads(), false, true);
 601     if (_parallel_workers == NULL)
 602       vm_exit_during_initialization("Failed necessary allocation.");
 603   }
 604 
 605   // so that the call below can read a sensible value
 606   _heap_start = (HeapWord*) rs.base();
 607   set_non_marking_state();
 608 }
 609 
 610 void ConcurrentMark::update_g1_committed(bool force) {
 611   // If concurrent marking is not in progress, then we do not need to
 612   // update _heap_end. This has a subtle and important
 613   // side-effect. Imagine that two evacuation pauses happen between
 614   // marking completion and remark. The first one can grow the
 615   // heap (hence now the finger is below the heap end). Then, the
 616   // second one could unnecessarily push regions on the region
 617   // stack. This causes the invariant that the region stack is empty
 618   // at the beginning of remark to be false. By ensuring that we do
 619   // not observe heap expansions after marking is complete, then we do
 620   // not have this problem.
 621   if (!concurrent_marking_in_progress() && !force)
 622     return;
 623 
 624   MemRegion committed = _g1h->g1_committed();
 625   assert(committed.start() == _heap_start, "start shouldn't change");
 626   HeapWord* new_end = committed.end();
 627   if (new_end > _heap_end) {
 628     // The heap has been expanded.
 629 
 630     _heap_end = new_end;
 631   }
 632   // Notice that the heap can also shrink. However, this only happens
 633   // during a Full GC (at least currently) and the entire marking
 634   // phase will bail out and the task will not be restarted. So, let's
 635   // do nothing.
 636 }
 637 
 638 void ConcurrentMark::reset() {
 639   // Starting values for these two. This should be called in a STW
 640   // phase. CM will be notified of any future g1_committed expansions
 641   // will be at the end of evacuation pauses, when tasks are
 642   // inactive.
 643   MemRegion committed = _g1h->g1_committed();
 644   _heap_start = committed.start();
 645   _heap_end   = committed.end();
 646 
 647   // Separated the asserts so that we know which one fires.
 648   assert(_heap_start != NULL, "heap bounds should look ok");
 649   assert(_heap_end != NULL, "heap bounds should look ok");
 650   assert(_heap_start < _heap_end, "heap bounds should look ok");
 651 
 652   // reset all the marking data structures and any necessary flags
 653   clear_marking_state();
 654 
 655   if (verbose_low())
 656     gclog_or_tty->print_cr("[global] resetting");
 657 
 658   // We do reset all of them, since different phases will use
 659   // different number of active threads. So, it's easiest to have all
 660   // of them ready.
 661   for (int i = 0; i < (int) _max_task_num; ++i)
 662     _tasks[i]->reset(_nextMarkBitMap);
 663 
 664   // we need this to make sure that the flag is on during the evac
 665   // pause with initial mark piggy-backed
 666   set_concurrent_marking_in_progress();
 667 }
 668 
 669 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 670   assert(active_tasks <= _max_task_num, "we should not have more");
 671 
 672   _active_tasks = active_tasks;
 673   // Need to update the three data structures below according to the
 674   // number of active threads for this phase.
 675   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 676   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 677   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 678 
 679   _concurrent = concurrent;
 680   // We propagate this to all tasks, not just the active ones.
 681   for (int i = 0; i < (int) _max_task_num; ++i)
 682     _tasks[i]->set_concurrent(concurrent);
 683 
 684   if (concurrent) {
 685     set_concurrent_marking_in_progress();
 686   } else {
 687     // We currently assume that the concurrent flag has been set to
 688     // false before we start remark. At this point we should also be
 689     // in a STW phase.
 690     assert(!concurrent_marking_in_progress(), "invariant");
 691     assert(_finger == _heap_end, "only way to get here");
 692     update_g1_committed(true);
 693   }
 694 }
 695 
 696 void ConcurrentMark::set_non_marking_state() {
 697   // We set the global marking state to some default values when we're
 698   // not doing marking.
 699   clear_marking_state();
 700   _active_tasks = 0;
 701   clear_concurrent_marking_in_progress();
 702 }
 703 
 704 ConcurrentMark::~ConcurrentMark() {
 705   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 706   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
 707   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
 708                     _par_cleanup_thread_state);
 709 
 710   for (int i = 0; i < (int) _max_task_num; ++i) {
 711     delete _task_queues->queue(i);
 712     delete _tasks[i];
 713   }
 714   delete _task_queues;
 715   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 716 }
 717 
 718 // This closure is used to mark refs into the g1 generation
 719 // from external roots in the CMS bit map.
 720 // Called at the first checkpoint.
 721 //
 722 
 723 void ConcurrentMark::clearNextBitmap() {
 724   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 725   G1CollectorPolicy* g1p = g1h->g1_policy();
 726 
 727   // Make sure that the concurrent mark thread looks to still be in
 728   // the current cycle.
 729   guarantee(cmThread()->during_cycle(), "invariant");
 730 
 731   // We are finishing up the current cycle by clearing the next
 732   // marking bitmap and getting it ready for the next cycle. During
 733   // this time no other cycle can start. So, let's make sure that this
 734   // is the case.
 735   guarantee(!g1h->mark_in_progress(), "invariant");
 736 
 737   // clear the mark bitmap (no grey objects to start with).
 738   // We need to do this in chunks and offer to yield in between
 739   // each chunk.
 740   HeapWord* start  = _nextMarkBitMap->startWord();
 741   HeapWord* end    = _nextMarkBitMap->endWord();
 742   HeapWord* cur    = start;
 743   size_t chunkSize = M;
 744   while (cur < end) {
 745     HeapWord* next = cur + chunkSize;
 746     if (next > end)
 747       next = end;
 748     MemRegion mr(cur,next);
 749     _nextMarkBitMap->clearRange(mr);
 750     cur = next;
 751     do_yield_check();
 752 
 753     // Repeat the asserts from above. We'll do them as asserts here to
 754     // minimize their overhead on the product. However, we'll have
 755     // them as guarantees at the beginning / end of the bitmap
 756     // clearing to get some checking in the product.
 757     assert(cmThread()->during_cycle(), "invariant");
 758     assert(!g1h->mark_in_progress(), "invariant");
 759   }
 760 
 761   // Repeat the asserts from above.
 762   guarantee(cmThread()->during_cycle(), "invariant");
 763   guarantee(!g1h->mark_in_progress(), "invariant");
 764 }
 765 
 766 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 767 public:
 768   bool doHeapRegion(HeapRegion* r) {
 769     if (!r->continuesHumongous()) {
 770       r->note_start_of_marking(true);
 771     }
 772     return false;
 773   }
 774 };
 775 
 776 void ConcurrentMark::checkpointRootsInitialPre() {
 777   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 778   G1CollectorPolicy* g1p = g1h->g1_policy();
 779 
 780   _has_aborted = false;
 781 
 782 #ifndef PRODUCT
 783   if (G1PrintReachableAtInitialMark) {
 784     print_reachable("at-cycle-start",
 785                     true /* use_prev_marking */, true /* all */);
 786   }
 787 #endif
 788 
 789   // Initialise marking structures. This has to be done in a STW phase.
 790   reset();
 791 }
 792 
 793 class CMMarkRootsClosure: public OopsInGenClosure {
 794 private:
 795   ConcurrentMark*  _cm;
 796   G1CollectedHeap* _g1h;
 797   bool             _do_barrier;
 798 
 799 public:
 800   CMMarkRootsClosure(ConcurrentMark* cm,
 801                      G1CollectedHeap* g1h,
 802                      bool do_barrier) : _cm(cm), _g1h(g1h),
 803                                         _do_barrier(do_barrier) { }
 804 
 805   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 806   virtual void do_oop(      oop* p) { do_oop_work(p); }
 807 
 808   template <class T> void do_oop_work(T* p) {
 809     T heap_oop = oopDesc::load_heap_oop(p);
 810     if (!oopDesc::is_null(heap_oop)) {
 811       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 812       assert(obj->is_oop() || obj->mark() == NULL,
 813              "expected an oop, possibly with mark word displaced");
 814       HeapWord* addr = (HeapWord*)obj;
 815       if (_g1h->is_in_g1_reserved(addr)) {
 816         _cm->grayRoot(obj);
 817       }
 818     }
 819     if (_do_barrier) {
 820       assert(!_g1h->is_in_g1_reserved(p),
 821              "Should be called on external roots");
 822       do_barrier(p);
 823     }
 824   }
 825 };
 826 
 827 void ConcurrentMark::checkpointRootsInitialPost() {
 828   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 829 
 830   // For each region note start of marking.
 831   NoteStartOfMarkHRClosure startcl;
 832   g1h->heap_region_iterate(&startcl);
 833 
 834   // Start weak-reference discovery.
 835   ReferenceProcessor* rp = g1h->ref_processor();
 836   rp->verify_no_references_recorded();
 837   rp->enable_discovery(); // enable ("weak") refs discovery
 838   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 839 
 840   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 841   // This is the start of  the marking cycle, we're expected all
 842   // threads to have SATB queues with active set to false.
 843   satb_mq_set.set_active_all_threads(true, /* new active value */
 844                                      false /* expected_active */);
 845 
 846   // update_g1_committed() will be called at the end of an evac pause
 847   // when marking is on. So, it's also called at the end of the
 848   // initial-mark pause to update the heap end, if the heap expands
 849   // during it. No need to call it here.
 850 }
 851 
 852 // Checkpoint the roots into this generation from outside
 853 // this generation. [Note this initial checkpoint need only
 854 // be approximate -- we'll do a catch up phase subsequently.]
 855 void ConcurrentMark::checkpointRootsInitial() {
 856   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 857   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 858 
 859   double start = os::elapsedTime();
 860 
 861   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 862   g1p->record_concurrent_mark_init_start();
 863   checkpointRootsInitialPre();
 864 
 865   // YSR: when concurrent precleaning is in place, we'll
 866   // need to clear the cached card table here
 867 
 868   ResourceMark rm;
 869   HandleMark  hm;
 870 
 871   g1h->ensure_parsability(false);
 872   g1h->perm_gen()->save_marks();
 873 
 874   CMMarkRootsClosure notOlder(this, g1h, false);
 875   CMMarkRootsClosure older(this, g1h, true);
 876 
 877   g1h->set_marking_started();
 878   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 879 
 880   g1h->process_strong_roots(true,    // activate StrongRootsScope
 881                             false,   // fake perm gen collection
 882                             SharedHeap::SO_AllClasses,
 883                             &notOlder, // Regular roots
 884                             NULL,     // do not visit active blobs
 885                             &older    // Perm Gen Roots
 886                             );
 887   checkpointRootsInitialPost();
 888 
 889   // Statistics.
 890   double end = os::elapsedTime();
 891   _init_times.add((end - start) * 1000.0);
 892 
 893   g1p->record_concurrent_mark_init_end();
 894 }
 895 
 896 /*
 897    Notice that in the next two methods, we actually leave the STS
 898    during the barrier sync and join it immediately afterwards. If we
 899    do not do this, this then the following deadlock can occur: one
 900    thread could be in the barrier sync code, waiting for the other
 901    thread to also sync up, whereas another one could be trying to
 902    yield, while also waiting for the other threads to sync up too.
 903 
 904    Because the thread that does the sync barrier has left the STS, it
 905    is possible to be suspended for a Full GC or an evacuation pause
 906    could occur. This is actually safe, since the entering the sync
 907    barrier is one of the last things do_marking_step() does, and it
 908    doesn't manipulate any data structures afterwards.
 909 */
 910 
 911 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 912   if (verbose_low())
 913     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 914 
 915   ConcurrentGCThread::stsLeave();
 916   _first_overflow_barrier_sync.enter();
 917   ConcurrentGCThread::stsJoin();
 918   // at this point everyone should have synced up and not be doing any
 919   // more work
 920 
 921   if (verbose_low())
 922     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 923 
 924   // let task 0 do this
 925   if (task_num == 0) {
 926     // task 0 is responsible for clearing the global data structures
 927     clear_marking_state();
 928 
 929     if (PrintGC) {
 930       gclog_or_tty->date_stamp(PrintGCDateStamps);
 931       gclog_or_tty->stamp(PrintGCTimeStamps);
 932       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 933     }
 934   }
 935 
 936   // after this, each task should reset its own data structures then
 937   // then go into the second barrier
 938 }
 939 
 940 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 941   if (verbose_low())
 942     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 943 
 944   ConcurrentGCThread::stsLeave();
 945   _second_overflow_barrier_sync.enter();
 946   ConcurrentGCThread::stsJoin();
 947   // at this point everything should be re-initialised and ready to go
 948 
 949   if (verbose_low())
 950     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 951 }
 952 
 953 void ConcurrentMark::grayRoot(oop p) {
 954   HeapWord* addr = (HeapWord*) p;
 955   // We can't really check against _heap_start and _heap_end, since it
 956   // is possible during an evacuation pause with piggy-backed
 957   // initial-mark that the committed space is expanded during the
 958   // pause without CM observing this change. So the assertions below
 959   // is a bit conservative; but better than nothing.
 960   assert(_g1h->g1_committed().contains(addr),
 961          "address should be within the heap bounds");
 962 
 963   if (!_nextMarkBitMap->isMarked(addr))
 964     _nextMarkBitMap->parMark(addr);
 965 }
 966 
 967 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 968   // The objects on the region have already been marked "in bulk" by
 969   // the caller. We only need to decide whether to push the region on
 970   // the region stack or not.
 971 
 972   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 973     // We're done with marking and waiting for remark. We do not need to
 974     // push anything else on the region stack.
 975     return;
 976 
 977   HeapWord* finger = _finger;
 978 
 979   if (verbose_low())
 980     gclog_or_tty->print_cr("[global] attempting to push "
 981                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 982                            PTR_FORMAT, mr.start(), mr.end(), finger);
 983 
 984   if (mr.start() < finger) {
 985     // The finger is always heap region aligned and it is not possible
 986     // for mr to span heap regions.
 987     assert(mr.end() <= finger, "invariant");
 988 
 989     // Separated the asserts so that we know which one fires.
 990     assert(mr.start() <= mr.end(),
 991            "region boundaries should fall within the committed space");
 992     assert(_heap_start <= mr.start(),
 993            "region boundaries should fall within the committed space");
 994     assert(mr.end() <= _heap_end,
 995            "region boundaries should fall within the committed space");
 996     if (verbose_low())
 997       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 998                              "below the finger, pushing it",
 999                              mr.start(), mr.end());
1000 
1001     if (!region_stack_push(mr)) {
1002       if (verbose_low())
1003         gclog_or_tty->print_cr("[global] region stack has overflown.");
1004     }
1005   }
1006 }
1007 
1008 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1009   // The object is not marked by the caller. We need to at least mark
1010   // it and maybe push in on the stack.
1011 
1012   HeapWord* addr = (HeapWord*)p;
1013   if (!_nextMarkBitMap->isMarked(addr)) {
1014     // We definitely need to mark it, irrespective whether we bail out
1015     // because we're done with marking.
1016     if (_nextMarkBitMap->parMark(addr)) {
1017       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1018         // If we're done with concurrent marking and we're waiting for
1019         // remark, then we're not pushing anything on the stack.
1020         return;
1021 
1022       // No OrderAccess:store_load() is needed. It is implicit in the
1023       // CAS done in parMark(addr) above
1024       HeapWord* finger = _finger;
1025 
1026       if (addr < finger) {
1027         if (!mark_stack_push(oop(addr))) {
1028           if (verbose_low())
1029             gclog_or_tty->print_cr("[global] global stack overflow "
1030                                    "during parMark");
1031         }
1032       }
1033     }
1034   }
1035 }
1036 
1037 class CMConcurrentMarkingTask: public AbstractGangTask {
1038 private:
1039   ConcurrentMark*       _cm;
1040   ConcurrentMarkThread* _cmt;
1041 
1042 public:
1043   void work(int worker_i) {
1044     assert(Thread::current()->is_ConcurrentGC_thread(),
1045            "this should only be done by a conc GC thread");
1046 
1047     double start_vtime = os::elapsedVTime();
1048 
1049     ConcurrentGCThread::stsJoin();
1050 
1051     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052     CMTask* the_task = _cm->task(worker_i);
1053     the_task->record_start_time();
1054     if (!_cm->has_aborted()) {
1055       do {
1056         double start_vtime_sec = os::elapsedVTime();
1057         double start_time_sec = os::elapsedTime();
1058         the_task->do_marking_step(10.0);
1059         double end_time_sec = os::elapsedTime();
1060         double end_vtime_sec = os::elapsedVTime();
1061         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1062         double elapsed_time_sec = end_time_sec - start_time_sec;
1063         _cm->clear_has_overflown();
1064 
1065         bool ret = _cm->do_yield_check(worker_i);
1066 
1067         jlong sleep_time_ms;
1068         if (!_cm->has_aborted() && the_task->has_aborted()) {
1069           sleep_time_ms =
1070             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1071           ConcurrentGCThread::stsLeave();
1072           os::sleep(Thread::current(), sleep_time_ms, false);
1073           ConcurrentGCThread::stsJoin();
1074         }
1075         double end_time2_sec = os::elapsedTime();
1076         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1077 
1078 #if 0
1079           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1080                                  "overhead %1.4lf",
1081                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1082                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1083           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1084                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1085 #endif
1086       } while (!_cm->has_aborted() && the_task->has_aborted());
1087     }
1088     the_task->record_end_time();
1089     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1090 
1091     ConcurrentGCThread::stsLeave();
1092 
1093     double end_vtime = os::elapsedVTime();
1094     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1095   }
1096 
1097   CMConcurrentMarkingTask(ConcurrentMark* cm,
1098                           ConcurrentMarkThread* cmt) :
1099       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1100 
1101   ~CMConcurrentMarkingTask() { }
1102 };
1103 
1104 void ConcurrentMark::markFromRoots() {
1105   // we might be tempted to assert that:
1106   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1107   //        "inconsistent argument?");
1108   // However that wouldn't be right, because it's possible that
1109   // a safepoint is indeed in progress as a younger generation
1110   // stop-the-world GC happens even as we mark in this generation.
1111 
1112   _restart_for_overflow = false;
1113 
1114   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1115 
1116   CMConcurrentMarkingTask markingTask(this, cmThread());
1117   if (parallel_marking_threads() > 0)
1118     _parallel_workers->run_task(&markingTask);
1119   else
1120     markingTask.work(0);
1121   print_stats();
1122 }
1123 
1124 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1125   // world is stopped at this checkpoint
1126   assert(SafepointSynchronize::is_at_safepoint(),
1127          "world should be stopped");
1128   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1129 
1130   // If a full collection has happened, we shouldn't do this.
1131   if (has_aborted()) {
1132     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1133     return;
1134   }
1135 
1136   if (VerifyDuringGC) {
1137     HandleMark hm;  // handle scope
1138     gclog_or_tty->print(" VerifyDuringGC:(before)");
1139     Universe::heap()->prepare_for_verify();
1140     Universe::verify(true, false, true);
1141   }
1142 
1143   G1CollectorPolicy* g1p = g1h->g1_policy();
1144   g1p->record_concurrent_mark_remark_start();
1145 
1146   double start = os::elapsedTime();
1147 
1148   checkpointRootsFinalWork();
1149 
1150   double mark_work_end = os::elapsedTime();
1151 
1152   weakRefsWork(clear_all_soft_refs);
1153 
1154   if (has_overflown()) {
1155     // Oops.  We overflowed.  Restart concurrent marking.
1156     _restart_for_overflow = true;
1157     // Clear the flag. We do not need it any more.
1158     clear_has_overflown();
1159     if (G1TraceMarkStackOverflow)
1160       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1161   } else {
1162     // We're done with marking.
1163     // This is the end of  the marking cycle, we're expected all
1164     // threads to have SATB queues with active set to true.
1165     JavaThread::satb_mark_queue_set().set_active_all_threads(
1166                                                   false, /* new active value */
1167                                                   true /* expected_active */);
1168 
1169     if (VerifyDuringGC) {
1170       HandleMark hm;  // handle scope
1171       gclog_or_tty->print(" VerifyDuringGC:(after)");
1172       Universe::heap()->prepare_for_verify();
1173       Universe::heap()->verify(/* allow_dirty */      true,
1174                                /* silent */           false,
1175                                /* use_prev_marking */ false);
1176     }
1177   }
1178 
1179 #if VERIFY_OBJS_PROCESSED
1180   _scan_obj_cl.objs_processed = 0;
1181   ThreadLocalObjQueue::objs_enqueued = 0;
1182 #endif
1183 
1184   // Statistics
1185   double now = os::elapsedTime();
1186   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1187   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1188   _remark_times.add((now - start) * 1000.0);
1189 
1190   g1p->record_concurrent_mark_remark_end();
1191 }
1192 
1193 
1194 #define CARD_BM_TEST_MODE 0
1195 
1196 class CalcLiveObjectsClosure: public HeapRegionClosure {
1197 
1198   CMBitMapRO* _bm;
1199   ConcurrentMark* _cm;
1200   bool _changed;
1201   bool _yield;
1202   size_t _words_done;
1203   size_t _tot_live;
1204   size_t _tot_used;
1205   size_t _regions_done;
1206   double _start_vtime_sec;
1207 
1208   BitMap* _region_bm;
1209   BitMap* _card_bm;
1210   intptr_t _bottom_card_num;
1211   bool _final;
1212 
1213   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1214     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1215 #if CARD_BM_TEST_MODE
1216       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1217 #else
1218       _card_bm->par_at_put(i - _bottom_card_num, 1);
1219 #endif
1220     }
1221   }
1222 
1223 public:
1224   CalcLiveObjectsClosure(bool final,
1225                          CMBitMapRO *bm, ConcurrentMark *cm,
1226                          BitMap* region_bm, BitMap* card_bm) :
1227     _bm(bm), _cm(cm), _changed(false), _yield(true),
1228     _words_done(0), _tot_live(0), _tot_used(0),
1229     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1230     _regions_done(0), _start_vtime_sec(0.0)
1231   {
1232     _bottom_card_num =
1233       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1234                CardTableModRefBS::card_shift);
1235   }
1236 
1237   // It takes a region that's not empty (i.e., it has at least one
1238   // live object in it and sets its corresponding bit on the region
1239   // bitmap to 1. If the region is "starts humongous" it will also set
1240   // to 1 the bits on the region bitmap that correspond to its
1241   // associated "continues humongous" regions.
1242   void set_bit_for_region(HeapRegion* hr) {
1243     assert(!hr->continuesHumongous(), "should have filtered those out");
1244 
1245     size_t index = hr->hrs_index();
1246     if (!hr->startsHumongous()) {
1247       // Normal (non-humongous) case: just set the bit.
1248       _region_bm->par_at_put((BitMap::idx_t) index, true);
1249     } else {
1250       // Starts humongous case: calculate how many regions are part of
1251       // this humongous region and then set the bit range. It might
1252       // have been a bit more efficient to look at the object that
1253       // spans these humongous regions to calculate their number from
1254       // the object's size. However, it's a good idea to calculate
1255       // this based on the metadata itself, and not the region
1256       // contents, so that this code is not aware of what goes into
1257       // the humongous regions (in case this changes in the future).
1258       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1259       size_t end_index = index + 1;
1260       while (end_index < g1h->n_regions()) {
1261         HeapRegion* chr = g1h->region_at(end_index);
1262         if (!chr->continuesHumongous()) {
1263           break;
1264         }
1265         end_index += 1;
1266       }
1267       _region_bm->par_at_put_range((BitMap::idx_t) index,
1268                                    (BitMap::idx_t) end_index, true);
1269     }
1270   }
1271 
1272   bool doHeapRegion(HeapRegion* hr) {
1273     if (!_final && _regions_done == 0)
1274       _start_vtime_sec = os::elapsedVTime();
1275 
1276     if (hr->continuesHumongous()) {
1277       // We will ignore these here and process them when their
1278       // associated "starts humongous" region is processed (see
1279       // set_bit_for_heap_region()). Note that we cannot rely on their
1280       // associated "starts humongous" region to have their bit set to
1281       // 1 since, due to the region chunking in the parallel region
1282       // iteration, a "continues humongous" region might be visited
1283       // before its associated "starts humongous".
1284       return false;
1285     }
1286 
1287     HeapWord* nextTop = hr->next_top_at_mark_start();
1288     HeapWord* start   = hr->top_at_conc_mark_count();
1289     assert(hr->bottom() <= start && start <= hr->end() &&
1290            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1291            start <= nextTop,
1292            "Preconditions.");
1293     // Otherwise, record the number of word's we'll examine.
1294     size_t words_done = (nextTop - start);
1295     // Find the first marked object at or after "start".
1296     start = _bm->getNextMarkedWordAddress(start, nextTop);
1297     size_t marked_bytes = 0;
1298 
1299     // Below, the term "card num" means the result of shifting an address
1300     // by the card shift -- address 0 corresponds to card number 0.  One
1301     // must subtract the card num of the bottom of the heap to obtain a
1302     // card table index.
1303     // The first card num of the sequence of live cards currently being
1304     // constructed.  -1 ==> no sequence.
1305     intptr_t start_card_num = -1;
1306     // The last card num of the sequence of live cards currently being
1307     // constructed.  -1 ==> no sequence.
1308     intptr_t last_card_num = -1;
1309 
1310     while (start < nextTop) {
1311       if (_yield && _cm->do_yield_check()) {
1312         // We yielded.  It might be for a full collection, in which case
1313         // all bets are off; terminate the traversal.
1314         if (_cm->has_aborted()) {
1315           _changed = false;
1316           return true;
1317         } else {
1318           // Otherwise, it might be a collection pause, and the region
1319           // we're looking at might be in the collection set.  We'll
1320           // abandon this region.
1321           return false;
1322         }
1323       }
1324       oop obj = oop(start);
1325       int obj_sz = obj->size();
1326       // The card num of the start of the current object.
1327       intptr_t obj_card_num =
1328         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1329 
1330       HeapWord* obj_last = start + obj_sz - 1;
1331       intptr_t obj_last_card_num =
1332         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1333 
1334       if (obj_card_num != last_card_num) {
1335         if (start_card_num == -1) {
1336           assert(last_card_num == -1, "Both or neither.");
1337           start_card_num = obj_card_num;
1338         } else {
1339           assert(last_card_num != -1, "Both or neither.");
1340           assert(obj_card_num >= last_card_num, "Inv");
1341           if ((obj_card_num - last_card_num) > 1) {
1342             // Mark the last run, and start a new one.
1343             mark_card_num_range(start_card_num, last_card_num);
1344             start_card_num = obj_card_num;
1345           }
1346         }
1347 #if CARD_BM_TEST_MODE
1348         /*
1349         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1350                                obj_card_num - _bottom_card_num,
1351                                obj_last_card_num - _bottom_card_num);
1352         */
1353         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1354           _card_bm->par_at_put(j - _bottom_card_num, 1);
1355         }
1356 #endif
1357       }
1358       // In any case, we set the last card num.
1359       last_card_num = obj_last_card_num;
1360 
1361       marked_bytes += (size_t)obj_sz * HeapWordSize;
1362       // Find the next marked object after this one.
1363       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1364       _changed = true;
1365     }
1366     // Handle the last range, if any.
1367     if (start_card_num != -1)
1368       mark_card_num_range(start_card_num, last_card_num);
1369     if (_final) {
1370       // Mark the allocated-since-marking portion...
1371       HeapWord* tp = hr->top();
1372       if (nextTop < tp) {
1373         start_card_num =
1374           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1375         last_card_num =
1376           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1377         mark_card_num_range(start_card_num, last_card_num);
1378         // This definitely means the region has live objects.
1379         set_bit_for_region(hr);
1380       }
1381     }
1382 
1383     hr->add_to_marked_bytes(marked_bytes);
1384     // Update the live region bitmap.
1385     if (marked_bytes > 0) {
1386       set_bit_for_region(hr);
1387     }
1388     hr->set_top_at_conc_mark_count(nextTop);
1389     _tot_live += hr->next_live_bytes();
1390     _tot_used += hr->used();
1391     _words_done = words_done;
1392 
1393     if (!_final) {
1394       ++_regions_done;
1395       if (_regions_done % 10 == 0) {
1396         double end_vtime_sec = os::elapsedVTime();
1397         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1398         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1399           jlong sleep_time_ms =
1400             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1401           os::sleep(Thread::current(), sleep_time_ms, false);
1402           _start_vtime_sec = end_vtime_sec;
1403         }
1404       }
1405     }
1406 
1407     return false;
1408   }
1409 
1410   bool changed() { return _changed;  }
1411   void reset()   { _changed = false; _words_done = 0; }
1412   void no_yield() { _yield = false; }
1413   size_t words_done() { return _words_done; }
1414   size_t tot_live() { return _tot_live; }
1415   size_t tot_used() { return _tot_used; }
1416 };
1417 
1418 
1419 void ConcurrentMark::calcDesiredRegions() {
1420   _region_bm.clear();
1421   _card_bm.clear();
1422   CalcLiveObjectsClosure calccl(false /*final*/,
1423                                 nextMarkBitMap(), this,
1424                                 &_region_bm, &_card_bm);
1425   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1426   g1h->heap_region_iterate(&calccl);
1427 
1428   do {
1429     calccl.reset();
1430     g1h->heap_region_iterate(&calccl);
1431   } while (calccl.changed());
1432 }
1433 
1434 class G1ParFinalCountTask: public AbstractGangTask {
1435 protected:
1436   G1CollectedHeap* _g1h;
1437   CMBitMap* _bm;
1438   size_t _n_workers;
1439   size_t *_live_bytes;
1440   size_t *_used_bytes;
1441   BitMap* _region_bm;
1442   BitMap* _card_bm;
1443 public:
1444   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1445                       BitMap* region_bm, BitMap* card_bm) :
1446     AbstractGangTask("G1 final counting"), _g1h(g1h),
1447     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1448   {
1449     if (ParallelGCThreads > 0)
1450       _n_workers = _g1h->workers()->total_workers();
1451     else
1452       _n_workers = 1;
1453     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1454     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1455   }
1456 
1457   ~G1ParFinalCountTask() {
1458     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1459     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1460   }
1461 
1462   void work(int i) {
1463     CalcLiveObjectsClosure calccl(true /*final*/,
1464                                   _bm, _g1h->concurrent_mark(),
1465                                   _region_bm, _card_bm);
1466     calccl.no_yield();
1467     if (ParallelGCThreads > 0) {
1468       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1469                                             HeapRegion::FinalCountClaimValue);
1470     } else {
1471       _g1h->heap_region_iterate(&calccl);
1472     }
1473     assert(calccl.complete(), "Shouldn't have yielded!");
1474 
1475     assert((size_t) i < _n_workers, "invariant");
1476     _live_bytes[i] = calccl.tot_live();
1477     _used_bytes[i] = calccl.tot_used();
1478   }
1479   size_t live_bytes()  {
1480     size_t live_bytes = 0;
1481     for (size_t i = 0; i < _n_workers; ++i)
1482       live_bytes += _live_bytes[i];
1483     return live_bytes;
1484   }
1485   size_t used_bytes()  {
1486     size_t used_bytes = 0;
1487     for (size_t i = 0; i < _n_workers; ++i)
1488       used_bytes += _used_bytes[i];
1489     return used_bytes;
1490   }
1491 };
1492 
1493 class G1ParNoteEndTask;
1494 
1495 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1496   G1CollectedHeap* _g1;
1497   int _worker_num;
1498   size_t _max_live_bytes;
1499   size_t _regions_claimed;
1500   size_t _freed_bytes;
1501   size_t _cleared_h_regions;
1502   size_t _freed_regions;
1503   UncleanRegionList* _unclean_region_list;
1504   double _claimed_region_time;
1505   double _max_region_time;
1506 
1507 public:
1508   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1509                              UncleanRegionList* list,
1510                              int worker_num);
1511   size_t freed_bytes() { return _freed_bytes; }
1512   size_t cleared_h_regions() { return _cleared_h_regions; }
1513   size_t freed_regions() { return  _freed_regions; }
1514   UncleanRegionList* unclean_region_list() {
1515     return _unclean_region_list;
1516   }
1517 
1518   bool doHeapRegion(HeapRegion *r);
1519 
1520   size_t max_live_bytes() { return _max_live_bytes; }
1521   size_t regions_claimed() { return _regions_claimed; }
1522   double claimed_region_time_sec() { return _claimed_region_time; }
1523   double max_region_time_sec() { return _max_region_time; }
1524 };
1525 
1526 class G1ParNoteEndTask: public AbstractGangTask {
1527   friend class G1NoteEndOfConcMarkClosure;
1528 protected:
1529   G1CollectedHeap* _g1h;
1530   size_t _max_live_bytes;
1531   size_t _freed_bytes;
1532   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1533 public:
1534   G1ParNoteEndTask(G1CollectedHeap* g1h,
1535                    ConcurrentMark::ParCleanupThreadState**
1536                    par_cleanup_thread_state) :
1537     AbstractGangTask("G1 note end"), _g1h(g1h),
1538     _max_live_bytes(0), _freed_bytes(0),
1539     _par_cleanup_thread_state(par_cleanup_thread_state)
1540   {}
1541 
1542   void work(int i) {
1543     double start = os::elapsedTime();
1544     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1545                                            &_par_cleanup_thread_state[i]->list,
1546                                            i);
1547     if (ParallelGCThreads > 0) {
1548       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1549                                             HeapRegion::NoteEndClaimValue);
1550     } else {
1551       _g1h->heap_region_iterate(&g1_note_end);
1552     }
1553     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1554 
1555     // Now finish up freeing the current thread's regions.
1556     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1557                                   g1_note_end.cleared_h_regions(),
1558                                   0, NULL);
1559     {
1560       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1561       _max_live_bytes += g1_note_end.max_live_bytes();
1562       _freed_bytes += g1_note_end.freed_bytes();
1563     }
1564     double end = os::elapsedTime();
1565     if (G1PrintParCleanupStats) {
1566       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1567                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1568                           i, start, end, (end-start)*1000.0,
1569                           g1_note_end.regions_claimed(),
1570                           g1_note_end.claimed_region_time_sec()*1000.0,
1571                           g1_note_end.max_region_time_sec()*1000.0);
1572     }
1573   }
1574   size_t max_live_bytes() { return _max_live_bytes; }
1575   size_t freed_bytes() { return _freed_bytes; }
1576 };
1577 
1578 class G1ParScrubRemSetTask: public AbstractGangTask {
1579 protected:
1580   G1RemSet* _g1rs;
1581   BitMap* _region_bm;
1582   BitMap* _card_bm;
1583 public:
1584   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1585                        BitMap* region_bm, BitMap* card_bm) :
1586     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1587     _region_bm(region_bm), _card_bm(card_bm)
1588   {}
1589 
1590   void work(int i) {
1591     if (ParallelGCThreads > 0) {
1592       _g1rs->scrub_par(_region_bm, _card_bm, i,
1593                        HeapRegion::ScrubRemSetClaimValue);
1594     } else {
1595       _g1rs->scrub(_region_bm, _card_bm);
1596     }
1597   }
1598 
1599 };
1600 
1601 G1NoteEndOfConcMarkClosure::
1602 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1603                            UncleanRegionList* list,
1604                            int worker_num)
1605   : _g1(g1), _worker_num(worker_num),
1606     _max_live_bytes(0), _regions_claimed(0),
1607     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1608     _claimed_region_time(0.0), _max_region_time(0.0),
1609     _unclean_region_list(list)
1610 {}
1611 
1612 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1613   // We use a claim value of zero here because all regions
1614   // were claimed with value 1 in the FinalCount task.
1615   r->reset_gc_time_stamp();
1616   if (!r->continuesHumongous()) {
1617     double start = os::elapsedTime();
1618     _regions_claimed++;
1619     r->note_end_of_marking();
1620     _max_live_bytes += r->max_live_bytes();
1621     _g1->free_region_if_totally_empty_work(r,
1622                                            _freed_bytes,
1623                                            _cleared_h_regions,
1624                                            _freed_regions,
1625                                            _unclean_region_list,
1626                                            true /*par*/);
1627     double region_time = (os::elapsedTime() - start);
1628     _claimed_region_time += region_time;
1629     if (region_time > _max_region_time) _max_region_time = region_time;
1630   }
1631   return false;
1632 }
1633 
1634 void ConcurrentMark::cleanup() {
1635   // world is stopped at this checkpoint
1636   assert(SafepointSynchronize::is_at_safepoint(),
1637          "world should be stopped");
1638   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1639 
1640   // If a full collection has happened, we shouldn't do this.
1641   if (has_aborted()) {
1642     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1643     return;
1644   }
1645 
1646   if (VerifyDuringGC) {
1647     HandleMark hm;  // handle scope
1648     gclog_or_tty->print(" VerifyDuringGC:(before)");
1649     Universe::heap()->prepare_for_verify();
1650     Universe::verify(/* allow dirty  */ true,
1651                      /* silent       */ false,
1652                      /* prev marking */ true);
1653   }
1654 
1655   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1656   g1p->record_concurrent_mark_cleanup_start();
1657 
1658   double start = os::elapsedTime();
1659 
1660   // Do counting once more with the world stopped for good measure.
1661   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1662                                         &_region_bm, &_card_bm);
1663   if (ParallelGCThreads > 0) {
1664     assert(g1h->check_heap_region_claim_values(
1665                                                HeapRegion::InitialClaimValue),
1666            "sanity check");
1667 
1668     int n_workers = g1h->workers()->total_workers();
1669     g1h->set_par_threads(n_workers);
1670     g1h->workers()->run_task(&g1_par_count_task);
1671     g1h->set_par_threads(0);
1672 
1673     assert(g1h->check_heap_region_claim_values(
1674                                              HeapRegion::FinalCountClaimValue),
1675            "sanity check");
1676   } else {
1677     g1_par_count_task.work(0);
1678   }
1679 
1680   size_t known_garbage_bytes =
1681     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1682 #if 0
1683   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1684                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1685                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1686                          (double) known_garbage_bytes / (double) (1024 * 1024));
1687 #endif // 0
1688   g1p->set_known_garbage_bytes(known_garbage_bytes);
1689 
1690   size_t start_used_bytes = g1h->used();
1691   _at_least_one_mark_complete = true;
1692   g1h->set_marking_complete();
1693 
1694   double count_end = os::elapsedTime();
1695   double this_final_counting_time = (count_end - start);
1696   if (G1PrintParCleanupStats) {
1697     gclog_or_tty->print_cr("Cleanup:");
1698     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1699                            this_final_counting_time*1000.0);
1700   }
1701   _total_counting_time += this_final_counting_time;
1702 
1703   // Install newly created mark bitMap as "prev".
1704   swapMarkBitMaps();
1705 
1706   g1h->reset_gc_time_stamp();
1707 
1708   // Note end of marking in all heap regions.
1709   double note_end_start = os::elapsedTime();
1710   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1711   if (ParallelGCThreads > 0) {
1712     int n_workers = g1h->workers()->total_workers();
1713     g1h->set_par_threads(n_workers);
1714     g1h->workers()->run_task(&g1_par_note_end_task);
1715     g1h->set_par_threads(0);
1716 
1717     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1718            "sanity check");
1719   } else {
1720     g1_par_note_end_task.work(0);
1721   }
1722   g1h->set_unclean_regions_coming(true);
1723   double note_end_end = os::elapsedTime();
1724   // Tell the mutators that there might be unclean regions coming...
1725   if (G1PrintParCleanupStats) {
1726     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1727                            (note_end_end - note_end_start)*1000.0);
1728   }
1729 
1730 
1731   // call below, since it affects the metric by which we sort the heap
1732   // regions.
1733   if (G1ScrubRemSets) {
1734     double rs_scrub_start = os::elapsedTime();
1735     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1736     if (ParallelGCThreads > 0) {
1737       int n_workers = g1h->workers()->total_workers();
1738       g1h->set_par_threads(n_workers);
1739       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1740       g1h->set_par_threads(0);
1741 
1742       assert(g1h->check_heap_region_claim_values(
1743                                             HeapRegion::ScrubRemSetClaimValue),
1744              "sanity check");
1745     } else {
1746       g1_par_scrub_rs_task.work(0);
1747     }
1748 
1749     double rs_scrub_end = os::elapsedTime();
1750     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1751     _total_rs_scrub_time += this_rs_scrub_time;
1752   }
1753 
1754   // this will also free any regions totally full of garbage objects,
1755   // and sort the regions.
1756   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1757                         g1_par_note_end_task.freed_bytes(),
1758                         g1_par_note_end_task.max_live_bytes());
1759 
1760   // Statistics.
1761   double end = os::elapsedTime();
1762   _cleanup_times.add((end - start) * 1000.0);
1763 
1764   // G1CollectedHeap::heap()->print();
1765   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1766   // G1CollectedHeap::heap()->get_gc_time_stamp());
1767 
1768   if (PrintGC || PrintGCDetails) {
1769     g1h->print_size_transition(gclog_or_tty,
1770                                start_used_bytes,
1771                                g1h->used(),
1772                                g1h->capacity());
1773   }
1774 
1775   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1776   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1777 
1778   // We need to make this be a "collection" so any collection pause that
1779   // races with it goes around and waits for completeCleanup to finish.
1780   g1h->increment_total_collections();
1781 
1782   if (VerifyDuringGC) {
1783     HandleMark hm;  // handle scope
1784     gclog_or_tty->print(" VerifyDuringGC:(after)");
1785     Universe::heap()->prepare_for_verify();
1786     Universe::verify(/* allow dirty  */ true,
1787                      /* silent       */ false,
1788                      /* prev marking */ true);
1789   }
1790 }
1791 
1792 void ConcurrentMark::completeCleanup() {
1793   // A full collection intervened.
1794   if (has_aborted()) return;
1795 
1796   int first = 0;
1797   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1798   for (int t = 0; t < last; t++) {
1799     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1800     assert(list->well_formed(), "Inv");
1801     HeapRegion* hd = list->hd();
1802     while (hd != NULL) {
1803       // Now finish up the other stuff.
1804       hd->rem_set()->clear();
1805       HeapRegion* next_hd = hd->next_from_unclean_list();
1806       (void)list->pop();
1807       assert(list->hd() == next_hd, "how not?");
1808       _g1h->put_region_on_unclean_list(hd);
1809       if (!hd->isHumongous()) {
1810         // Add this to the _free_regions count by 1.
1811         _g1h->finish_free_region_work(0, 0, 1, NULL);
1812       }
1813       hd = list->hd();
1814       assert(hd == next_hd, "how not?");
1815     }
1816   }
1817 }
1818 
1819 
1820 class G1CMIsAliveClosure: public BoolObjectClosure {
1821   G1CollectedHeap* _g1;
1822  public:
1823   G1CMIsAliveClosure(G1CollectedHeap* g1) :
1824     _g1(g1)
1825   {}
1826 
1827   void do_object(oop obj) {
1828     assert(false, "not to be invoked");
1829   }
1830   bool do_object_b(oop obj) {
1831     HeapWord* addr = (HeapWord*)obj;
1832     return addr != NULL &&
1833            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1834   }
1835 };
1836 
1837 class G1CMKeepAliveClosure: public OopClosure {
1838   G1CollectedHeap* _g1;
1839   ConcurrentMark*  _cm;
1840   CMBitMap*        _bitMap;
1841  public:
1842   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1843                        CMBitMap* bitMap) :
1844     _g1(g1), _cm(cm),
1845     _bitMap(bitMap) {}
1846 
1847   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1848   virtual void do_oop(      oop* p) { do_oop_work(p); }
1849 
1850   template <class T> void do_oop_work(T* p) {
1851     oop thisOop = oopDesc::load_decode_heap_oop(p);
1852     HeapWord* addr = (HeapWord*)thisOop;
1853     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1854       _bitMap->mark(addr);
1855       _cm->mark_stack_push(thisOop);
1856     }
1857   }
1858 };
1859 
1860 class G1CMDrainMarkingStackClosure: public VoidClosure {
1861   CMMarkStack*                  _markStack;
1862   CMBitMap*                     _bitMap;
1863   G1CMKeepAliveClosure*         _oopClosure;
1864  public:
1865   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1866                                G1CMKeepAliveClosure* oopClosure) :
1867     _bitMap(bitMap),
1868     _markStack(markStack),
1869     _oopClosure(oopClosure)
1870   {}
1871 
1872   void do_void() {
1873     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1874   }
1875 };
1876 
1877 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1878   ResourceMark rm;
1879   HandleMark   hm;
1880   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
1881   ReferenceProcessor* rp = g1h->ref_processor();
1882 
1883   // Process weak references.
1884   rp->setup_policy(clear_all_soft_refs);
1885   assert(_markStack.isEmpty(), "mark stack should be empty");
1886 
1887   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
1888   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1889   G1CMDrainMarkingStackClosure
1890     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1891                                &g1KeepAliveClosure);
1892 
1893   // XXXYYY  Also: copy the parallel ref processing code from CMS.
1894   rp->process_discovered_references(&g1IsAliveClosure,
1895                                     &g1KeepAliveClosure,
1896                                     &g1DrainMarkingStackClosure,
1897                                     NULL);
1898   assert(_markStack.overflow() || _markStack.isEmpty(),
1899          "mark stack should be empty (unless it overflowed)");
1900   if (_markStack.overflow()) {
1901     set_has_overflown();
1902   }
1903 
1904   rp->enqueue_discovered_references();
1905   rp->verify_no_references_recorded();
1906   assert(!rp->discovery_enabled(), "should have been disabled");
1907 
1908   // Now clean up stale oops in SymbolTable and StringTable
1909   SymbolTable::unlink(&g1IsAliveClosure);
1910   StringTable::unlink(&g1IsAliveClosure);
1911 }
1912 
1913 void ConcurrentMark::swapMarkBitMaps() {
1914   CMBitMapRO* temp = _prevMarkBitMap;
1915   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
1916   _nextMarkBitMap  = (CMBitMap*)  temp;
1917 }
1918 
1919 class CMRemarkTask: public AbstractGangTask {
1920 private:
1921   ConcurrentMark *_cm;
1922 
1923 public:
1924   void work(int worker_i) {
1925     // Since all available tasks are actually started, we should
1926     // only proceed if we're supposed to be actived.
1927     if ((size_t)worker_i < _cm->active_tasks()) {
1928       CMTask* task = _cm->task(worker_i);
1929       task->record_start_time();
1930       do {
1931         task->do_marking_step(1000000000.0 /* something very large */);
1932       } while (task->has_aborted() && !_cm->has_overflown());
1933       // If we overflow, then we do not want to restart. We instead
1934       // want to abort remark and do concurrent marking again.
1935       task->record_end_time();
1936     }
1937   }
1938 
1939   CMRemarkTask(ConcurrentMark* cm) :
1940     AbstractGangTask("Par Remark"), _cm(cm) { }
1941 };
1942 
1943 void ConcurrentMark::checkpointRootsFinalWork() {
1944   ResourceMark rm;
1945   HandleMark   hm;
1946   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1947 
1948   g1h->ensure_parsability(false);
1949 
1950   if (ParallelGCThreads > 0) {
1951     G1CollectedHeap::StrongRootsScope srs(g1h);
1952     // this is remark, so we'll use up all available threads
1953     int active_workers = ParallelGCThreads;
1954     set_phase(active_workers, false);
1955 
1956     CMRemarkTask remarkTask(this);
1957     // We will start all available threads, even if we decide that the
1958     // active_workers will be fewer. The extra ones will just bail out
1959     // immediately.
1960     int n_workers = g1h->workers()->total_workers();
1961     g1h->set_par_threads(n_workers);
1962     g1h->workers()->run_task(&remarkTask);
1963     g1h->set_par_threads(0);
1964   } else {
1965     G1CollectedHeap::StrongRootsScope srs(g1h);
1966     // this is remark, so we'll use up all available threads
1967     int active_workers = 1;
1968     set_phase(active_workers, false);
1969 
1970     CMRemarkTask remarkTask(this);
1971     // We will start all available threads, even if we decide that the
1972     // active_workers will be fewer. The extra ones will just bail out
1973     // immediately.
1974     remarkTask.work(0);
1975   }
1976   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1977   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1978 
1979   print_stats();
1980 
1981   if (!restart_for_overflow())
1982     set_non_marking_state();
1983 
1984 #if VERIFY_OBJS_PROCESSED
1985   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1986     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1987                            _scan_obj_cl.objs_processed,
1988                            ThreadLocalObjQueue::objs_enqueued);
1989     guarantee(_scan_obj_cl.objs_processed ==
1990               ThreadLocalObjQueue::objs_enqueued,
1991               "Different number of objs processed and enqueued.");
1992   }
1993 #endif
1994 }
1995 
1996 #ifndef PRODUCT
1997 
1998 class PrintReachableOopClosure: public OopClosure {
1999 private:
2000   G1CollectedHeap* _g1h;
2001   CMBitMapRO*      _bitmap;
2002   outputStream*    _out;
2003   bool             _use_prev_marking;
2004   bool             _all;
2005 
2006 public:
2007   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2008                            outputStream* out,
2009                            bool          use_prev_marking,
2010                            bool          all) :
2011     _g1h(G1CollectedHeap::heap()),
2012     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2013 
2014   void do_oop(narrowOop* p) { do_oop_work(p); }
2015   void do_oop(      oop* p) { do_oop_work(p); }
2016 
2017   template <class T> void do_oop_work(T* p) {
2018     oop         obj = oopDesc::load_decode_heap_oop(p);
2019     const char* str = NULL;
2020     const char* str2 = "";
2021 
2022     if (obj == NULL) {
2023       str = "";
2024     } else if (!_g1h->is_in_g1_reserved(obj)) {
2025       str = " O";
2026     } else {
2027       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2028       guarantee(hr != NULL, "invariant");
2029       bool over_tams = false;
2030       if (_use_prev_marking) {
2031         over_tams = hr->obj_allocated_since_prev_marking(obj);
2032       } else {
2033         over_tams = hr->obj_allocated_since_next_marking(obj);
2034       }
2035       bool marked = _bitmap->isMarked((HeapWord*) obj);
2036 
2037       if (over_tams) {
2038         str = " >";
2039         if (marked) {
2040           str2 = " AND MARKED";
2041         }
2042       } else if (marked) {
2043         str = " M";
2044       } else {
2045         str = " NOT";
2046       }
2047     }
2048 
2049     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2050                    p, (void*) obj, str, str2);
2051   }
2052 };
2053 
2054 class PrintReachableObjectClosure : public ObjectClosure {
2055 private:
2056   CMBitMapRO*   _bitmap;
2057   outputStream* _out;
2058   bool          _use_prev_marking;
2059   bool          _all;
2060   HeapRegion*   _hr;
2061 
2062 public:
2063   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2064                               outputStream* out,
2065                               bool          use_prev_marking,
2066                               bool          all,
2067                               HeapRegion*   hr) :
2068     _bitmap(bitmap), _out(out),
2069     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2070 
2071   void do_object(oop o) {
2072     bool over_tams;
2073     if (_use_prev_marking) {
2074       over_tams = _hr->obj_allocated_since_prev_marking(o);
2075     } else {
2076       over_tams = _hr->obj_allocated_since_next_marking(o);
2077     }
2078     bool marked = _bitmap->isMarked((HeapWord*) o);
2079     bool print_it = _all || over_tams || marked;
2080 
2081     if (print_it) {
2082       _out->print_cr(" "PTR_FORMAT"%s",
2083                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2084       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2085       o->oop_iterate(&oopCl);
2086     }
2087   }
2088 };
2089 
2090 class PrintReachableRegionClosure : public HeapRegionClosure {
2091 private:
2092   CMBitMapRO*   _bitmap;
2093   outputStream* _out;
2094   bool          _use_prev_marking;
2095   bool          _all;
2096 
2097 public:
2098   bool doHeapRegion(HeapRegion* hr) {
2099     HeapWord* b = hr->bottom();
2100     HeapWord* e = hr->end();
2101     HeapWord* t = hr->top();
2102     HeapWord* p = NULL;
2103     if (_use_prev_marking) {
2104       p = hr->prev_top_at_mark_start();
2105     } else {
2106       p = hr->next_top_at_mark_start();
2107     }
2108     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2109                    "TAMS: "PTR_FORMAT, b, e, t, p);
2110     _out->cr();
2111 
2112     HeapWord* from = b;
2113     HeapWord* to   = t;
2114 
2115     if (to > from) {
2116       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2117       _out->cr();
2118       PrintReachableObjectClosure ocl(_bitmap, _out,
2119                                       _use_prev_marking, _all, hr);
2120       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2121       _out->cr();
2122     }
2123 
2124     return false;
2125   }
2126 
2127   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2128                               outputStream* out,
2129                               bool          use_prev_marking,
2130                               bool          all) :
2131     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2132 };
2133 
2134 void ConcurrentMark::print_reachable(const char* str,
2135                                      bool use_prev_marking,
2136                                      bool all) {
2137   gclog_or_tty->cr();
2138   gclog_or_tty->print_cr("== Doing heap dump... ");
2139 
2140   if (G1PrintReachableBaseFile == NULL) {
2141     gclog_or_tty->print_cr("  #### error: no base file defined");
2142     return;
2143   }
2144 
2145   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2146       (JVM_MAXPATHLEN - 1)) {
2147     gclog_or_tty->print_cr("  #### error: file name too long");
2148     return;
2149   }
2150 
2151   char file_name[JVM_MAXPATHLEN];
2152   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2153   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2154 
2155   fileStream fout(file_name);
2156   if (!fout.is_open()) {
2157     gclog_or_tty->print_cr("  #### error: could not open file");
2158     return;
2159   }
2160 
2161   outputStream* out = &fout;
2162 
2163   CMBitMapRO* bitmap = NULL;
2164   if (use_prev_marking) {
2165     bitmap = _prevMarkBitMap;
2166   } else {
2167     bitmap = _nextMarkBitMap;
2168   }
2169 
2170   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2171   out->cr();
2172 
2173   out->print_cr("--- ITERATING OVER REGIONS");
2174   out->cr();
2175   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2176   _g1h->heap_region_iterate(&rcl);
2177   out->cr();
2178 
2179   gclog_or_tty->print_cr("  done");
2180   gclog_or_tty->flush();
2181 }
2182 
2183 #endif // PRODUCT
2184 
2185 // This note is for drainAllSATBBuffers and the code in between.
2186 // In the future we could reuse a task to do this work during an
2187 // evacuation pause (since now tasks are not active and can be claimed
2188 // during an evacuation pause). This was a late change to the code and
2189 // is currently not being taken advantage of.
2190 
2191 class CMGlobalObjectClosure : public ObjectClosure {
2192 private:
2193   ConcurrentMark* _cm;
2194 
2195 public:
2196   void do_object(oop obj) {
2197     _cm->deal_with_reference(obj);
2198   }
2199 
2200   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2201 };
2202 
2203 void ConcurrentMark::deal_with_reference(oop obj) {
2204   if (verbose_high())
2205     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2206                            (void*) obj);
2207 
2208 
2209   HeapWord* objAddr = (HeapWord*) obj;
2210   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2211   if (_g1h->is_in_g1_reserved(objAddr)) {
2212     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2213     HeapRegion* hr = _g1h->heap_region_containing(obj);
2214     if (_g1h->is_obj_ill(obj, hr)) {
2215       if (verbose_high())
2216         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2217                                "marked", (void*) obj);
2218 
2219       // we need to mark it first
2220       if (_nextMarkBitMap->parMark(objAddr)) {
2221         // No OrderAccess:store_load() is needed. It is implicit in the
2222         // CAS done in parMark(objAddr) above
2223         HeapWord* finger = _finger;
2224         if (objAddr < finger) {
2225           if (verbose_high())
2226             gclog_or_tty->print_cr("[global] below the global finger "
2227                                    "("PTR_FORMAT"), pushing it", finger);
2228           if (!mark_stack_push(obj)) {
2229             if (verbose_low())
2230               gclog_or_tty->print_cr("[global] global stack overflow during "
2231                                      "deal_with_reference");
2232           }
2233         }
2234       }
2235     }
2236   }
2237 }
2238 
2239 void ConcurrentMark::drainAllSATBBuffers() {
2240   CMGlobalObjectClosure oc(this);
2241   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2242   satb_mq_set.set_closure(&oc);
2243 
2244   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2245     if (verbose_medium())
2246       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2247   }
2248 
2249   // no need to check whether we should do this, as this is only
2250   // called during an evacuation pause
2251   satb_mq_set.iterate_closure_all_threads();
2252 
2253   satb_mq_set.set_closure(NULL);
2254   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2255 }
2256 
2257 void ConcurrentMark::markPrev(oop p) {
2258   // Note we are overriding the read-only view of the prev map here, via
2259   // the cast.
2260   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2261 }
2262 
2263 void ConcurrentMark::clear(oop p) {
2264   assert(p != NULL && p->is_oop(), "expected an oop");
2265   HeapWord* addr = (HeapWord*)p;
2266   assert(addr >= _nextMarkBitMap->startWord() ||
2267          addr < _nextMarkBitMap->endWord(), "in a region");
2268 
2269   _nextMarkBitMap->clear(addr);
2270 }
2271 
2272 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2273   // Note we are overriding the read-only view of the prev map here, via
2274   // the cast.
2275   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2276   _nextMarkBitMap->clearRange(mr);
2277 }
2278 
2279 HeapRegion*
2280 ConcurrentMark::claim_region(int task_num) {
2281   // "checkpoint" the finger
2282   HeapWord* finger = _finger;
2283 
2284   // _heap_end will not change underneath our feet; it only changes at
2285   // yield points.
2286   while (finger < _heap_end) {
2287     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2288 
2289     // is the gap between reading the finger and doing the CAS too long?
2290 
2291     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2292     HeapWord*   bottom        = curr_region->bottom();
2293     HeapWord*   end           = curr_region->end();
2294     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2295 
2296     if (verbose_low())
2297       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2298                              "["PTR_FORMAT", "PTR_FORMAT"), "
2299                              "limit = "PTR_FORMAT,
2300                              task_num, curr_region, bottom, end, limit);
2301 
2302     HeapWord* res =
2303       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2304     if (res == finger) {
2305       // we succeeded
2306 
2307       // notice that _finger == end cannot be guaranteed here since,
2308       // someone else might have moved the finger even further
2309       assert(_finger >= end, "the finger should have moved forward");
2310 
2311       if (verbose_low())
2312         gclog_or_tty->print_cr("[%d] we were successful with region = "
2313                                PTR_FORMAT, task_num, curr_region);
2314 
2315       if (limit > bottom) {
2316         if (verbose_low())
2317           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2318                                  "returning it ", task_num, curr_region);
2319         return curr_region;
2320       } else {
2321         assert(limit == bottom,
2322                "the region limit should be at bottom");
2323         if (verbose_low())
2324           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2325                                  "returning NULL", task_num, curr_region);
2326         // we return NULL and the caller should try calling
2327         // claim_region() again.
2328         return NULL;
2329       }
2330     } else {
2331       assert(_finger > finger, "the finger should have moved forward");
2332       if (verbose_low())
2333         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2334                                "global finger = "PTR_FORMAT", "
2335                                "our finger = "PTR_FORMAT,
2336                                task_num, _finger, finger);
2337 
2338       // read it again
2339       finger = _finger;
2340     }
2341   }
2342 
2343   return NULL;
2344 }
2345 
2346 void ConcurrentMark::oops_do(OopClosure* cl) {
2347   if (_markStack.size() > 0 && verbose_low())
2348     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2349                            "size = %d", _markStack.size());
2350   // we first iterate over the contents of the mark stack...
2351   _markStack.oops_do(cl);
2352 
2353   for (int i = 0; i < (int)_max_task_num; ++i) {
2354     OopTaskQueue* queue = _task_queues->queue((int)i);
2355 
2356     if (queue->size() > 0 && verbose_low())
2357       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2358                              "size = %d", i, queue->size());
2359 
2360     // ...then over the contents of the all the task queues.
2361     queue->oops_do(cl);
2362   }
2363 
2364   // finally, invalidate any entries that in the region stack that
2365   // point into the collection set
2366   if (_regionStack.invalidate_entries_into_cset()) {
2367     // otherwise, any gray objects copied during the evacuation pause
2368     // might not be visited.
2369     assert(_should_gray_objects, "invariant");
2370   }
2371 }
2372 
2373 void ConcurrentMark::clear_marking_state() {
2374   _markStack.setEmpty();
2375   _markStack.clear_overflow();
2376   _regionStack.setEmpty();
2377   _regionStack.clear_overflow();
2378   clear_has_overflown();
2379   _finger = _heap_start;
2380 
2381   for (int i = 0; i < (int)_max_task_num; ++i) {
2382     OopTaskQueue* queue = _task_queues->queue(i);
2383     queue->set_empty();
2384   }
2385 }
2386 
2387 void ConcurrentMark::print_stats() {
2388   if (verbose_stats()) {
2389     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2390     for (size_t i = 0; i < _active_tasks; ++i) {
2391       _tasks[i]->print_stats();
2392       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2393     }
2394   }
2395 }
2396 
2397 class CSMarkOopClosure: public OopClosure {
2398   friend class CSMarkBitMapClosure;
2399 
2400   G1CollectedHeap* _g1h;
2401   CMBitMap*        _bm;
2402   ConcurrentMark*  _cm;
2403   oop*             _ms;
2404   jint*            _array_ind_stack;
2405   int              _ms_size;
2406   int              _ms_ind;
2407   int              _array_increment;
2408 
2409   bool push(oop obj, int arr_ind = 0) {
2410     if (_ms_ind == _ms_size) {
2411       gclog_or_tty->print_cr("Mark stack is full.");
2412       return false;
2413     }
2414     _ms[_ms_ind] = obj;
2415     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2416     _ms_ind++;
2417     return true;
2418   }
2419 
2420   oop pop() {
2421     if (_ms_ind == 0) return NULL;
2422     else {
2423       _ms_ind--;
2424       return _ms[_ms_ind];
2425     }
2426   }
2427 
2428   template <class T> bool drain() {
2429     while (_ms_ind > 0) {
2430       oop obj = pop();
2431       assert(obj != NULL, "Since index was non-zero.");
2432       if (obj->is_objArray()) {
2433         jint arr_ind = _array_ind_stack[_ms_ind];
2434         objArrayOop aobj = objArrayOop(obj);
2435         jint len = aobj->length();
2436         jint next_arr_ind = arr_ind + _array_increment;
2437         if (next_arr_ind < len) {
2438           push(obj, next_arr_ind);
2439         }
2440         // Now process this portion of this one.
2441         int lim = MIN2(next_arr_ind, len);
2442         for (int j = arr_ind; j < lim; j++) {
2443           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2444         }
2445 
2446       } else {
2447         obj->oop_iterate(this);
2448       }
2449       if (abort()) return false;
2450     }
2451     return true;
2452   }
2453 
2454 public:
2455   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2456     _g1h(G1CollectedHeap::heap()),
2457     _cm(cm),
2458     _bm(cm->nextMarkBitMap()),
2459     _ms_size(ms_size), _ms_ind(0),
2460     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2461     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2462     _array_increment(MAX2(ms_size/8, 16))
2463   {}
2464 
2465   ~CSMarkOopClosure() {
2466     FREE_C_HEAP_ARRAY(oop, _ms);
2467     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2468   }
2469 
2470   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2471   virtual void do_oop(      oop* p) { do_oop_work(p); }
2472 
2473   template <class T> void do_oop_work(T* p) {
2474     T heap_oop = oopDesc::load_heap_oop(p);
2475     if (oopDesc::is_null(heap_oop)) return;
2476     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2477     if (obj->is_forwarded()) {
2478       // If the object has already been forwarded, we have to make sure
2479       // that it's marked.  So follow the forwarding pointer.  Note that
2480       // this does the right thing for self-forwarding pointers in the
2481       // evacuation failure case.
2482       obj = obj->forwardee();
2483     }
2484     HeapRegion* hr = _g1h->heap_region_containing(obj);
2485     if (hr != NULL) {
2486       if (hr->in_collection_set()) {
2487         if (_g1h->is_obj_ill(obj)) {
2488           _bm->mark((HeapWord*)obj);
2489           if (!push(obj)) {
2490             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2491             set_abort();
2492           }
2493         }
2494       } else {
2495         // Outside the collection set; we need to gray it
2496         _cm->deal_with_reference(obj);
2497       }
2498     }
2499   }
2500 };
2501 
2502 class CSMarkBitMapClosure: public BitMapClosure {
2503   G1CollectedHeap* _g1h;
2504   CMBitMap*        _bitMap;
2505   ConcurrentMark*  _cm;
2506   CSMarkOopClosure _oop_cl;
2507 public:
2508   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2509     _g1h(G1CollectedHeap::heap()),
2510     _bitMap(cm->nextMarkBitMap()),
2511     _oop_cl(cm, ms_size)
2512   {}
2513 
2514   ~CSMarkBitMapClosure() {}
2515 
2516   bool do_bit(size_t offset) {
2517     // convert offset into a HeapWord*
2518     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2519     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2520            "address out of range");
2521     assert(_bitMap->isMarked(addr), "tautology");
2522     oop obj = oop(addr);
2523     if (!obj->is_forwarded()) {
2524       if (!_oop_cl.push(obj)) return false;
2525       if (UseCompressedOops) {
2526         if (!_oop_cl.drain<narrowOop>()) return false;
2527       } else {
2528         if (!_oop_cl.drain<oop>()) return false;
2529       }
2530     }
2531     // Otherwise...
2532     return true;
2533   }
2534 };
2535 
2536 
2537 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2538   CMBitMap* _bm;
2539   CSMarkBitMapClosure _bit_cl;
2540   enum SomePrivateConstants {
2541     MSSize = 1000
2542   };
2543   bool _completed;
2544 public:
2545   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2546     _bm(cm->nextMarkBitMap()),
2547     _bit_cl(cm, MSSize),
2548     _completed(true)
2549   {}
2550 
2551   ~CompleteMarkingInCSHRClosure() {}
2552 
2553   bool doHeapRegion(HeapRegion* r) {
2554     if (!r->evacuation_failed()) {
2555       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2556       if (!mr.is_empty()) {
2557         if (!_bm->iterate(&_bit_cl, mr)) {
2558           _completed = false;
2559           return true;
2560         }
2561       }
2562     }
2563     return false;
2564   }
2565 
2566   bool completed() { return _completed; }
2567 };
2568 
2569 class ClearMarksInHRClosure: public HeapRegionClosure {
2570   CMBitMap* _bm;
2571 public:
2572   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2573 
2574   bool doHeapRegion(HeapRegion* r) {
2575     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2576       MemRegion usedMR = r->used_region();
2577       _bm->clearRange(r->used_region());
2578     }
2579     return false;
2580   }
2581 };
2582 
2583 void ConcurrentMark::complete_marking_in_collection_set() {
2584   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2585 
2586   if (!g1h->mark_in_progress()) {
2587     g1h->g1_policy()->record_mark_closure_time(0.0);
2588     return;
2589   }
2590 
2591   int i = 1;
2592   double start = os::elapsedTime();
2593   while (true) {
2594     i++;
2595     CompleteMarkingInCSHRClosure cmplt(this);
2596     g1h->collection_set_iterate(&cmplt);
2597     if (cmplt.completed()) break;
2598   }
2599   double end_time = os::elapsedTime();
2600   double elapsed_time_ms = (end_time - start) * 1000.0;
2601   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2602 
2603   ClearMarksInHRClosure clr(nextMarkBitMap());
2604   g1h->collection_set_iterate(&clr);
2605 }
2606 
2607 // The next two methods deal with the following optimisation. Some
2608 // objects are gray by being marked and located above the finger. If
2609 // they are copied, during an evacuation pause, below the finger then
2610 // the need to be pushed on the stack. The observation is that, if
2611 // there are no regions in the collection set located above the
2612 // finger, then the above cannot happen, hence we do not need to
2613 // explicitly gray any objects when copying them to below the
2614 // finger. The global stack will be scanned to ensure that, if it
2615 // points to objects being copied, it will update their
2616 // location. There is a tricky situation with the gray objects in
2617 // region stack that are being coped, however. See the comment in
2618 // newCSet().
2619 
2620 void ConcurrentMark::newCSet() {
2621   if (!concurrent_marking_in_progress())
2622     // nothing to do if marking is not in progress
2623     return;
2624 
2625   // find what the lowest finger is among the global and local fingers
2626   _min_finger = _finger;
2627   for (int i = 0; i < (int)_max_task_num; ++i) {
2628     CMTask* task = _tasks[i];
2629     HeapWord* task_finger = task->finger();
2630     if (task_finger != NULL && task_finger < _min_finger)
2631       _min_finger = task_finger;
2632   }
2633 
2634   _should_gray_objects = false;
2635 
2636   // This fixes a very subtle and fustrating bug. It might be the case
2637   // that, during en evacuation pause, heap regions that contain
2638   // objects that are gray (by being in regions contained in the
2639   // region stack) are included in the collection set. Since such gray
2640   // objects will be moved, and because it's not easy to redirect
2641   // region stack entries to point to a new location (because objects
2642   // in one region might be scattered to multiple regions after they
2643   // are copied), one option is to ensure that all marked objects
2644   // copied during a pause are pushed on the stack. Notice, however,
2645   // that this problem can only happen when the region stack is not
2646   // empty during an evacuation pause. So, we make the fix a bit less
2647   // conservative and ensure that regions are pushed on the stack,
2648   // irrespective whether all collection set regions are below the
2649   // finger, if the region stack is not empty. This is expected to be
2650   // a rare case, so I don't think it's necessary to be smarted about it.
2651   if (!region_stack_empty())
2652     _should_gray_objects = true;
2653 }
2654 
2655 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2656   if (!concurrent_marking_in_progress())
2657     return;
2658 
2659   HeapWord* region_end = hr->end();
2660   if (region_end > _min_finger)
2661     _should_gray_objects = true;
2662 }
2663 
2664 // abandon current marking iteration due to a Full GC
2665 void ConcurrentMark::abort() {
2666   // Clear all marks to force marking thread to do nothing
2667   _nextMarkBitMap->clearAll();
2668   // Empty mark stack
2669   clear_marking_state();
2670   for (int i = 0; i < (int)_max_task_num; ++i)
2671     _tasks[i]->clear_region_fields();
2672   _has_aborted = true;
2673 
2674   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2675   satb_mq_set.abandon_partial_marking();
2676   // This can be called either during or outside marking, we'll read
2677   // the expected_active value from the SATB queue set.
2678   satb_mq_set.set_active_all_threads(
2679                                  false, /* new active value */
2680                                  satb_mq_set.is_active() /* expected_active */);
2681 }
2682 
2683 static void print_ms_time_info(const char* prefix, const char* name,
2684                                NumberSeq& ns) {
2685   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2686                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2687   if (ns.num() > 0) {
2688     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2689                            prefix, ns.sd(), ns.maximum());
2690   }
2691 }
2692 
2693 void ConcurrentMark::print_summary_info() {
2694   gclog_or_tty->print_cr(" Concurrent marking:");
2695   print_ms_time_info("  ", "init marks", _init_times);
2696   print_ms_time_info("  ", "remarks", _remark_times);
2697   {
2698     print_ms_time_info("     ", "final marks", _remark_mark_times);
2699     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2700 
2701   }
2702   print_ms_time_info("  ", "cleanups", _cleanup_times);
2703   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2704                          _total_counting_time,
2705                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2706                           (double)_cleanup_times.num()
2707                          : 0.0));
2708   if (G1ScrubRemSets) {
2709     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2710                            _total_rs_scrub_time,
2711                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2712                             (double)_cleanup_times.num()
2713                            : 0.0));
2714   }
2715   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2716                          (_init_times.sum() + _remark_times.sum() +
2717                           _cleanup_times.sum())/1000.0);
2718   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2719                 "(%8.2f s marking, %8.2f s counting).",
2720                 cmThread()->vtime_accum(),
2721                 cmThread()->vtime_mark_accum(),
2722                 cmThread()->vtime_count_accum());
2723 }
2724 
2725 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2726   _parallel_workers->print_worker_threads_on(st);
2727 }
2728 
2729 // Closures
2730 // XXX: there seems to be a lot of code  duplication here;
2731 // should refactor and consolidate the shared code.
2732 
2733 // This closure is used to mark refs into the CMS generation in
2734 // the CMS bit map. Called at the first checkpoint.
2735 
2736 // We take a break if someone is trying to stop the world.
2737 bool ConcurrentMark::do_yield_check(int worker_i) {
2738   if (should_yield()) {
2739     if (worker_i == 0)
2740       _g1h->g1_policy()->record_concurrent_pause();
2741     cmThread()->yield();
2742     if (worker_i == 0)
2743       _g1h->g1_policy()->record_concurrent_pause_end();
2744     return true;
2745   } else {
2746     return false;
2747   }
2748 }
2749 
2750 bool ConcurrentMark::should_yield() {
2751   return cmThread()->should_yield();
2752 }
2753 
2754 bool ConcurrentMark::containing_card_is_marked(void* p) {
2755   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2756   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2757 }
2758 
2759 bool ConcurrentMark::containing_cards_are_marked(void* start,
2760                                                  void* last) {
2761   return
2762     containing_card_is_marked(start) &&
2763     containing_card_is_marked(last);
2764 }
2765 
2766 #ifndef PRODUCT
2767 // for debugging purposes
2768 void ConcurrentMark::print_finger() {
2769   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2770                          _heap_start, _heap_end, _finger);
2771   for (int i = 0; i < (int) _max_task_num; ++i) {
2772     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
2773   }
2774   gclog_or_tty->print_cr("");
2775 }
2776 #endif
2777 
2778 // Closure for iteration over bitmaps
2779 class CMBitMapClosure : public BitMapClosure {
2780 private:
2781   // the bitmap that is being iterated over
2782   CMBitMap*                   _nextMarkBitMap;
2783   ConcurrentMark*             _cm;
2784   CMTask*                     _task;
2785   // true if we're scanning a heap region claimed by the task (so that
2786   // we move the finger along), false if we're not, i.e. currently when
2787   // scanning a heap region popped from the region stack (so that we
2788   // do not move the task finger along; it'd be a mistake if we did so).
2789   bool                        _scanning_heap_region;
2790 
2791 public:
2792   CMBitMapClosure(CMTask *task,
2793                   ConcurrentMark* cm,
2794                   CMBitMap* nextMarkBitMap)
2795     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2796 
2797   void set_scanning_heap_region(bool scanning_heap_region) {
2798     _scanning_heap_region = scanning_heap_region;
2799   }
2800 
2801   bool do_bit(size_t offset) {
2802     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2803     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2804     assert( addr < _cm->finger(), "invariant");
2805 
2806     if (_scanning_heap_region) {
2807       statsOnly( _task->increase_objs_found_on_bitmap() );
2808       assert(addr >= _task->finger(), "invariant");
2809       // We move that task's local finger along.
2810       _task->move_finger_to(addr);
2811     } else {
2812       // We move the task's region finger along.
2813       _task->move_region_finger_to(addr);
2814     }
2815 
2816     _task->scan_object(oop(addr));
2817     // we only partially drain the local queue and global stack
2818     _task->drain_local_queue(true);
2819     _task->drain_global_stack(true);
2820 
2821     // if the has_aborted flag has been raised, we need to bail out of
2822     // the iteration
2823     return !_task->has_aborted();
2824   }
2825 };
2826 
2827 // Closure for iterating over objects, currently only used for
2828 // processing SATB buffers.
2829 class CMObjectClosure : public ObjectClosure {
2830 private:
2831   CMTask* _task;
2832 
2833 public:
2834   void do_object(oop obj) {
2835     _task->deal_with_reference(obj);
2836   }
2837 
2838   CMObjectClosure(CMTask* task) : _task(task) { }
2839 };
2840 
2841 // Closure for iterating over object fields
2842 class CMOopClosure : public OopClosure {
2843 private:
2844   G1CollectedHeap*   _g1h;
2845   ConcurrentMark*    _cm;
2846   CMTask*            _task;
2847 
2848 public:
2849   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2850   virtual void do_oop(      oop* p) { do_oop_work(p); }
2851 
2852   template <class T> void do_oop_work(T* p) {
2853     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2854     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2855            "invariant");
2856 
2857     oop obj = oopDesc::load_decode_heap_oop(p);
2858     if (_cm->verbose_high())
2859       gclog_or_tty->print_cr("[%d] we're looking at location "
2860                              "*"PTR_FORMAT" = "PTR_FORMAT,
2861                              _task->task_id(), p, (void*) obj);
2862     _task->deal_with_reference(obj);
2863   }
2864 
2865   CMOopClosure(G1CollectedHeap* g1h,
2866                ConcurrentMark* cm,
2867                CMTask* task)
2868     : _g1h(g1h), _cm(cm), _task(task) { }
2869 };
2870 
2871 void CMTask::setup_for_region(HeapRegion* hr) {
2872   // Separated the asserts so that we know which one fires.
2873   assert(hr != NULL,
2874         "claim_region() should have filtered out continues humongous regions");
2875   assert(!hr->continuesHumongous(),
2876         "claim_region() should have filtered out continues humongous regions");
2877 
2878   if (_cm->verbose_low())
2879     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2880                            _task_id, hr);
2881 
2882   _curr_region  = hr;
2883   _finger       = hr->bottom();
2884   update_region_limit();
2885 }
2886 
2887 void CMTask::update_region_limit() {
2888   HeapRegion* hr            = _curr_region;
2889   HeapWord* bottom          = hr->bottom();
2890   HeapWord* limit           = hr->next_top_at_mark_start();
2891 
2892   if (limit == bottom) {
2893     if (_cm->verbose_low())
2894       gclog_or_tty->print_cr("[%d] found an empty region "
2895                              "["PTR_FORMAT", "PTR_FORMAT")",
2896                              _task_id, bottom, limit);
2897     // The region was collected underneath our feet.
2898     // We set the finger to bottom to ensure that the bitmap
2899     // iteration that will follow this will not do anything.
2900     // (this is not a condition that holds when we set the region up,
2901     // as the region is not supposed to be empty in the first place)
2902     _finger = bottom;
2903   } else if (limit >= _region_limit) {
2904     assert(limit >= _finger, "peace of mind");
2905   } else {
2906     assert(limit < _region_limit, "only way to get here");
2907     // This can happen under some pretty unusual circumstances.  An
2908     // evacuation pause empties the region underneath our feet (NTAMS
2909     // at bottom). We then do some allocation in the region (NTAMS
2910     // stays at bottom), followed by the region being used as a GC
2911     // alloc region (NTAMS will move to top() and the objects
2912     // originally below it will be grayed). All objects now marked in
2913     // the region are explicitly grayed, if below the global finger,
2914     // and we do not need in fact to scan anything else. So, we simply
2915     // set _finger to be limit to ensure that the bitmap iteration
2916     // doesn't do anything.
2917     _finger = limit;
2918   }
2919 
2920   _region_limit = limit;
2921 }
2922 
2923 void CMTask::giveup_current_region() {
2924   assert(_curr_region != NULL, "invariant");
2925   if (_cm->verbose_low())
2926     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2927                            _task_id, _curr_region);
2928   clear_region_fields();
2929 }
2930 
2931 void CMTask::clear_region_fields() {
2932   // Values for these three fields that indicate that we're not
2933   // holding on to a region.
2934   _curr_region   = NULL;
2935   _finger        = NULL;
2936   _region_limit  = NULL;
2937 
2938   _region_finger = NULL;
2939 }
2940 
2941 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2942   guarantee(nextMarkBitMap != NULL, "invariant");
2943 
2944   if (_cm->verbose_low())
2945     gclog_or_tty->print_cr("[%d] resetting", _task_id);
2946 
2947   _nextMarkBitMap                = nextMarkBitMap;
2948   clear_region_fields();
2949 
2950   _calls                         = 0;
2951   _elapsed_time_ms               = 0.0;
2952   _termination_time_ms           = 0.0;
2953   _termination_start_time_ms     = 0.0;
2954 
2955 #if _MARKING_STATS_
2956   _local_pushes                  = 0;
2957   _local_pops                    = 0;
2958   _local_max_size                = 0;
2959   _objs_scanned                  = 0;
2960   _global_pushes                 = 0;
2961   _global_pops                   = 0;
2962   _global_max_size               = 0;
2963   _global_transfers_to           = 0;
2964   _global_transfers_from         = 0;
2965   _region_stack_pops             = 0;
2966   _regions_claimed               = 0;
2967   _objs_found_on_bitmap          = 0;
2968   _satb_buffers_processed        = 0;
2969   _steal_attempts                = 0;
2970   _steals                        = 0;
2971   _aborted                       = 0;
2972   _aborted_overflow              = 0;
2973   _aborted_cm_aborted            = 0;
2974   _aborted_yield                 = 0;
2975   _aborted_timed_out             = 0;
2976   _aborted_satb                  = 0;
2977   _aborted_termination           = 0;
2978 #endif // _MARKING_STATS_
2979 }
2980 
2981 bool CMTask::should_exit_termination() {
2982   regular_clock_call();
2983   // This is called when we are in the termination protocol. We should
2984   // quit if, for some reason, this task wants to abort or the global
2985   // stack is not empty (this means that we can get work from it).
2986   return !_cm->mark_stack_empty() || has_aborted();
2987 }
2988 
2989 // This determines whether the method below will check both the local
2990 // and global fingers when determining whether to push on the stack a
2991 // gray object (value 1) or whether it will only check the global one
2992 // (value 0). The tradeoffs are that the former will be a bit more
2993 // accurate and possibly push less on the stack, but it might also be
2994 // a little bit slower.
2995 
2996 #define _CHECK_BOTH_FINGERS_      1
2997 
2998 void CMTask::deal_with_reference(oop obj) {
2999   if (_cm->verbose_high())
3000     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3001                            _task_id, (void*) obj);
3002 
3003   ++_refs_reached;
3004 
3005   HeapWord* objAddr = (HeapWord*) obj;
3006   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3007   if (_g1h->is_in_g1_reserved(objAddr)) {
3008     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3009     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3010     if (_g1h->is_obj_ill(obj, hr)) {
3011       if (_cm->verbose_high())
3012         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3013                                _task_id, (void*) obj);
3014 
3015       // we need to mark it first
3016       if (_nextMarkBitMap->parMark(objAddr)) {
3017         // No OrderAccess:store_load() is needed. It is implicit in the
3018         // CAS done in parMark(objAddr) above
3019         HeapWord* global_finger = _cm->finger();
3020 
3021 #if _CHECK_BOTH_FINGERS_
3022         // we will check both the local and global fingers
3023 
3024         if (_finger != NULL && objAddr < _finger) {
3025           if (_cm->verbose_high())
3026             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3027                                    "pushing it", _task_id, _finger);
3028           push(obj);
3029         } else if (_curr_region != NULL && objAddr < _region_limit) {
3030           // do nothing
3031         } else if (objAddr < global_finger) {
3032           // Notice that the global finger might be moving forward
3033           // concurrently. This is not a problem. In the worst case, we
3034           // mark the object while it is above the global finger and, by
3035           // the time we read the global finger, it has moved forward
3036           // passed this object. In this case, the object will probably
3037           // be visited when a task is scanning the region and will also
3038           // be pushed on the stack. So, some duplicate work, but no
3039           // correctness problems.
3040 
3041           if (_cm->verbose_high())
3042             gclog_or_tty->print_cr("[%d] below the global finger "
3043                                    "("PTR_FORMAT"), pushing it",
3044                                    _task_id, global_finger);
3045           push(obj);
3046         } else {
3047           // do nothing
3048         }
3049 #else // _CHECK_BOTH_FINGERS_
3050       // we will only check the global finger
3051 
3052         if (objAddr < global_finger) {
3053           // see long comment above
3054 
3055           if (_cm->verbose_high())
3056             gclog_or_tty->print_cr("[%d] below the global finger "
3057                                    "("PTR_FORMAT"), pushing it",
3058                                    _task_id, global_finger);
3059           push(obj);
3060         }
3061 #endif // _CHECK_BOTH_FINGERS_
3062       }
3063     }
3064   }
3065 }
3066 
3067 void CMTask::push(oop obj) {
3068   HeapWord* objAddr = (HeapWord*) obj;
3069   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3070   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3071          "invariant");
3072   assert(!_g1h->is_obj_ill(obj), "invariant");
3073   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3074 
3075   if (_cm->verbose_high())
3076     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3077 
3078   if (!_task_queue->push(obj)) {
3079     // The local task queue looks full. We need to push some entries
3080     // to the global stack.
3081 
3082     if (_cm->verbose_medium())
3083       gclog_or_tty->print_cr("[%d] task queue overflow, "
3084                              "moving entries to the global stack",
3085                              _task_id);
3086     move_entries_to_global_stack();
3087 
3088     // this should succeed since, even if we overflow the global
3089     // stack, we should have definitely removed some entries from the
3090     // local queue. So, there must be space on it.
3091     bool success = _task_queue->push(obj);
3092     assert(success, "invariant");
3093   }
3094 
3095   statsOnly( int tmp_size = _task_queue->size();
3096              if (tmp_size > _local_max_size)
3097                _local_max_size = tmp_size;
3098              ++_local_pushes );
3099 }
3100 
3101 void CMTask::reached_limit() {
3102   assert(_words_scanned >= _words_scanned_limit ||
3103          _refs_reached >= _refs_reached_limit ,
3104          "shouldn't have been called otherwise");
3105   regular_clock_call();
3106 }
3107 
3108 void CMTask::regular_clock_call() {
3109   if (has_aborted())
3110     return;
3111 
3112   // First, we need to recalculate the words scanned and refs reached
3113   // limits for the next clock call.
3114   recalculate_limits();
3115 
3116   // During the regular clock call we do the following
3117 
3118   // (1) If an overflow has been flagged, then we abort.
3119   if (_cm->has_overflown()) {
3120     set_has_aborted();
3121     return;
3122   }
3123 
3124   // If we are not concurrent (i.e. we're doing remark) we don't need
3125   // to check anything else. The other steps are only needed during
3126   // the concurrent marking phase.
3127   if (!concurrent())
3128     return;
3129 
3130   // (2) If marking has been aborted for Full GC, then we also abort.
3131   if (_cm->has_aborted()) {
3132     set_has_aborted();
3133     statsOnly( ++_aborted_cm_aborted );
3134     return;
3135   }
3136 
3137   double curr_time_ms = os::elapsedVTime() * 1000.0;
3138 
3139   // (3) If marking stats are enabled, then we update the step history.
3140 #if _MARKING_STATS_
3141   if (_words_scanned >= _words_scanned_limit)
3142     ++_clock_due_to_scanning;
3143   if (_refs_reached >= _refs_reached_limit)
3144     ++_clock_due_to_marking;
3145 
3146   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3147   _interval_start_time_ms = curr_time_ms;
3148   _all_clock_intervals_ms.add(last_interval_ms);
3149 
3150   if (_cm->verbose_medium()) {
3151     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3152                            "scanned = %d%s, refs reached = %d%s",
3153                            _task_id, last_interval_ms,
3154                            _words_scanned,
3155                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3156                            _refs_reached,
3157                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3158   }
3159 #endif // _MARKING_STATS_
3160 
3161   // (4) We check whether we should yield. If we have to, then we abort.
3162   if (_cm->should_yield()) {
3163     // We should yield. To do this we abort the task. The caller is
3164     // responsible for yielding.
3165     set_has_aborted();
3166     statsOnly( ++_aborted_yield );
3167     return;
3168   }
3169 
3170   // (5) We check whether we've reached our time quota. If we have,
3171   // then we abort.
3172   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3173   if (elapsed_time_ms > _time_target_ms) {
3174     set_has_aborted();
3175     _has_aborted_timed_out = true;
3176     statsOnly( ++_aborted_timed_out );
3177     return;
3178   }
3179 
3180   // (6) Finally, we check whether there are enough completed STAB
3181   // buffers available for processing. If there are, we abort.
3182   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3183   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3184     if (_cm->verbose_low())
3185       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3186                              _task_id);
3187     // we do need to process SATB buffers, we'll abort and restart
3188     // the marking task to do so
3189     set_has_aborted();
3190     statsOnly( ++_aborted_satb );
3191     return;
3192   }
3193 }
3194 
3195 void CMTask::recalculate_limits() {
3196   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3197   _words_scanned_limit      = _real_words_scanned_limit;
3198 
3199   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3200   _refs_reached_limit       = _real_refs_reached_limit;
3201 }
3202 
3203 void CMTask::decrease_limits() {
3204   // This is called when we believe that we're going to do an infrequent
3205   // operation which will increase the per byte scanned cost (i.e. move
3206   // entries to/from the global stack). It basically tries to decrease the
3207   // scanning limit so that the clock is called earlier.
3208 
3209   if (_cm->verbose_medium())
3210     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3211 
3212   _words_scanned_limit = _real_words_scanned_limit -
3213     3 * words_scanned_period / 4;
3214   _refs_reached_limit  = _real_refs_reached_limit -
3215     3 * refs_reached_period / 4;
3216 }
3217 
3218 void CMTask::move_entries_to_global_stack() {
3219   // local array where we'll store the entries that will be popped
3220   // from the local queue
3221   oop buffer[global_stack_transfer_size];
3222 
3223   int n = 0;
3224   oop obj;
3225   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3226     buffer[n] = obj;
3227     ++n;
3228   }
3229 
3230   if (n > 0) {
3231     // we popped at least one entry from the local queue
3232 
3233     statsOnly( ++_global_transfers_to; _local_pops += n );
3234 
3235     if (!_cm->mark_stack_push(buffer, n)) {
3236       if (_cm->verbose_low())
3237         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3238       set_has_aborted();
3239     } else {
3240       // the transfer was successful
3241 
3242       if (_cm->verbose_medium())
3243         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3244                                _task_id, n);
3245       statsOnly( int tmp_size = _cm->mark_stack_size();
3246                  if (tmp_size > _global_max_size)
3247                    _global_max_size = tmp_size;
3248                  _global_pushes += n );
3249     }
3250   }
3251 
3252   // this operation was quite expensive, so decrease the limits
3253   decrease_limits();
3254 }
3255 
3256 void CMTask::get_entries_from_global_stack() {
3257   // local array where we'll store the entries that will be popped
3258   // from the global stack.
3259   oop buffer[global_stack_transfer_size];
3260   int n;
3261   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3262   assert(n <= global_stack_transfer_size,
3263          "we should not pop more than the given limit");
3264   if (n > 0) {
3265     // yes, we did actually pop at least one entry
3266 
3267     statsOnly( ++_global_transfers_from; _global_pops += n );
3268     if (_cm->verbose_medium())
3269       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3270                              _task_id, n);
3271     for (int i = 0; i < n; ++i) {
3272       bool success = _task_queue->push(buffer[i]);
3273       // We only call this when the local queue is empty or under a
3274       // given target limit. So, we do not expect this push to fail.
3275       assert(success, "invariant");
3276     }
3277 
3278     statsOnly( int tmp_size = _task_queue->size();
3279                if (tmp_size > _local_max_size)
3280                  _local_max_size = tmp_size;
3281                _local_pushes += n );
3282   }
3283 
3284   // this operation was quite expensive, so decrease the limits
3285   decrease_limits();
3286 }
3287 
3288 void CMTask::drain_local_queue(bool partially) {
3289   if (has_aborted())
3290     return;
3291 
3292   // Decide what the target size is, depending whether we're going to
3293   // drain it partially (so that other tasks can steal if they run out
3294   // of things to do) or totally (at the very end).
3295   size_t target_size;
3296   if (partially)
3297     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3298   else
3299     target_size = 0;
3300 
3301   if (_task_queue->size() > target_size) {
3302     if (_cm->verbose_high())
3303       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3304                              _task_id, target_size);
3305 
3306     oop obj;
3307     bool ret = _task_queue->pop_local(obj);
3308     while (ret) {
3309       statsOnly( ++_local_pops );
3310 
3311       if (_cm->verbose_high())
3312         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3313                                (void*) obj);
3314 
3315       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3316       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3317              "invariant");
3318 
3319       scan_object(obj);
3320 
3321       if (_task_queue->size() <= target_size || has_aborted())
3322         ret = false;
3323       else
3324         ret = _task_queue->pop_local(obj);
3325     }
3326 
3327     if (_cm->verbose_high())
3328       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3329                              _task_id, _task_queue->size());
3330   }
3331 }
3332 
3333 void CMTask::drain_global_stack(bool partially) {
3334   if (has_aborted())
3335     return;
3336 
3337   // We have a policy to drain the local queue before we attempt to
3338   // drain the global stack.
3339   assert(partially || _task_queue->size() == 0, "invariant");
3340 
3341   // Decide what the target size is, depending whether we're going to
3342   // drain it partially (so that other tasks can steal if they run out
3343   // of things to do) or totally (at the very end).  Notice that,
3344   // because we move entries from the global stack in chunks or
3345   // because another task might be doing the same, we might in fact
3346   // drop below the target. But, this is not a problem.
3347   size_t target_size;
3348   if (partially)
3349     target_size = _cm->partial_mark_stack_size_target();
3350   else
3351     target_size = 0;
3352 
3353   if (_cm->mark_stack_size() > target_size) {
3354     if (_cm->verbose_low())
3355       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3356                              _task_id, target_size);
3357 
3358     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3359       get_entries_from_global_stack();
3360       drain_local_queue(partially);
3361     }
3362 
3363     if (_cm->verbose_low())
3364       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3365                              _task_id, _cm->mark_stack_size());
3366   }
3367 }
3368 
3369 // SATB Queue has several assumptions on whether to call the par or
3370 // non-par versions of the methods. this is why some of the code is
3371 // replicated. We should really get rid of the single-threaded version
3372 // of the code to simplify things.
3373 void CMTask::drain_satb_buffers() {
3374   if (has_aborted())
3375     return;
3376 
3377   // We set this so that the regular clock knows that we're in the
3378   // middle of draining buffers and doesn't set the abort flag when it
3379   // notices that SATB buffers are available for draining. It'd be
3380   // very counter productive if it did that. :-)
3381   _draining_satb_buffers = true;
3382 
3383   CMObjectClosure oc(this);
3384   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3385   if (ParallelGCThreads > 0)
3386     satb_mq_set.set_par_closure(_task_id, &oc);
3387   else
3388     satb_mq_set.set_closure(&oc);
3389 
3390   // This keeps claiming and applying the closure to completed buffers
3391   // until we run out of buffers or we need to abort.
3392   if (ParallelGCThreads > 0) {
3393     while (!has_aborted() &&
3394            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3395       if (_cm->verbose_medium())
3396         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3397       statsOnly( ++_satb_buffers_processed );
3398       regular_clock_call();
3399     }
3400   } else {
3401     while (!has_aborted() &&
3402            satb_mq_set.apply_closure_to_completed_buffer()) {
3403       if (_cm->verbose_medium())
3404         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3405       statsOnly( ++_satb_buffers_processed );
3406       regular_clock_call();
3407     }
3408   }
3409 
3410   if (!concurrent() && !has_aborted()) {
3411     // We should only do this during remark.
3412     if (ParallelGCThreads > 0)
3413       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3414     else
3415       satb_mq_set.iterate_closure_all_threads();
3416   }
3417 
3418   _draining_satb_buffers = false;
3419 
3420   assert(has_aborted() ||
3421          concurrent() ||
3422          satb_mq_set.completed_buffers_num() == 0, "invariant");
3423 
3424   if (ParallelGCThreads > 0)
3425     satb_mq_set.set_par_closure(_task_id, NULL);
3426   else
3427     satb_mq_set.set_closure(NULL);
3428 
3429   // again, this was a potentially expensive operation, decrease the
3430   // limits to get the regular clock call early
3431   decrease_limits();
3432 }
3433 
3434 void CMTask::drain_region_stack(BitMapClosure* bc) {
3435   if (has_aborted())
3436     return;
3437 
3438   assert(_region_finger == NULL,
3439          "it should be NULL when we're not scanning a region");
3440 
3441   if (!_cm->region_stack_empty()) {
3442     if (_cm->verbose_low())
3443       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3444                              _task_id, _cm->region_stack_size());
3445 
3446     MemRegion mr = _cm->region_stack_pop_with_lock();
3447     // it returns MemRegion() if the pop fails
3448     statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3449 
3450     while (mr.start() != NULL) {
3451       if (_cm->verbose_medium())
3452         gclog_or_tty->print_cr("[%d] we are scanning region "
3453                                "["PTR_FORMAT", "PTR_FORMAT")",
3454                                _task_id, mr.start(), mr.end());
3455       assert(mr.end() <= _cm->finger(),
3456              "otherwise the region shouldn't be on the stack");
3457       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3458       if (_nextMarkBitMap->iterate(bc, mr)) {
3459         assert(!has_aborted(),
3460                "cannot abort the task without aborting the bitmap iteration");
3461 
3462         // We finished iterating over the region without aborting.
3463         regular_clock_call();
3464         if (has_aborted())
3465           mr = MemRegion();
3466         else {
3467           mr = _cm->region_stack_pop_with_lock();
3468           // it returns MemRegion() if the pop fails
3469           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3470         }
3471       } else {
3472         assert(has_aborted(), "currently the only way to do so");
3473 
3474         // The only way to abort the bitmap iteration is to return
3475         // false from the do_bit() method. However, inside the
3476         // do_bit() method we move the _region_finger to point to the
3477         // object currently being looked at. So, if we bail out, we
3478         // have definitely set _region_finger to something non-null.
3479         assert(_region_finger != NULL, "invariant");
3480 
3481         // The iteration was actually aborted. So now _region_finger
3482         // points to the address of the object we last scanned. If we
3483         // leave it there, when we restart this task, we will rescan
3484         // the object. It is easy to avoid this. We move the finger by
3485         // enough to point to the next possible object header (the
3486         // bitmap knows by how much we need to move it as it knows its
3487         // granularity).
3488         MemRegion newRegion =
3489           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3490 
3491         if (!newRegion.is_empty()) {
3492           if (_cm->verbose_low()) {
3493             gclog_or_tty->print_cr("[%d] pushing unscanned region"
3494                                    "[" PTR_FORMAT "," PTR_FORMAT ") on region stack",
3495                                    _task_id,
3496                                    newRegion.start(), newRegion.end());
3497           }
3498           // Now push the part of the region we didn't scan on the
3499           // region stack to make sure a task scans it later.
3500           _cm->region_stack_push_with_lock(newRegion);
3501         }
3502         // break from while
3503         mr = MemRegion();
3504       }
3505       _region_finger = NULL;
3506     }
3507 
3508     if (_cm->verbose_low())
3509       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3510                              _task_id, _cm->region_stack_size());
3511   }
3512 }
3513 
3514 void CMTask::print_stats() {
3515   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3516                          _task_id, _calls);
3517   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3518                          _elapsed_time_ms, _termination_time_ms);
3519   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3520                          _step_times_ms.num(), _step_times_ms.avg(),
3521                          _step_times_ms.sd());
3522   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3523                          _step_times_ms.maximum(), _step_times_ms.sum());
3524 
3525 #if _MARKING_STATS_
3526   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3527                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3528                          _all_clock_intervals_ms.sd());
3529   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3530                          _all_clock_intervals_ms.maximum(),
3531                          _all_clock_intervals_ms.sum());
3532   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3533                          _clock_due_to_scanning, _clock_due_to_marking);
3534   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3535                          _objs_scanned, _objs_found_on_bitmap);
3536   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3537                          _local_pushes, _local_pops, _local_max_size);
3538   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3539                          _global_pushes, _global_pops, _global_max_size);
3540   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3541                          _global_transfers_to,_global_transfers_from);
3542   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3543                          _regions_claimed, _region_stack_pops);
3544   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3545   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3546                          _steal_attempts, _steals);
3547   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3548   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3549                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3550   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3551                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3552 #endif // _MARKING_STATS_
3553 }
3554 
3555 /*****************************************************************************
3556 
3557     The do_marking_step(time_target_ms) method is the building block
3558     of the parallel marking framework. It can be called in parallel
3559     with other invocations of do_marking_step() on different tasks
3560     (but only one per task, obviously) and concurrently with the
3561     mutator threads, or during remark, hence it eliminates the need
3562     for two versions of the code. When called during remark, it will
3563     pick up from where the task left off during the concurrent marking
3564     phase. Interestingly, tasks are also claimable during evacuation
3565     pauses too, since do_marking_step() ensures that it aborts before
3566     it needs to yield.
3567 
3568     The data structures that is uses to do marking work are the
3569     following:
3570 
3571       (1) Marking Bitmap. If there are gray objects that appear only
3572       on the bitmap (this happens either when dealing with an overflow
3573       or when the initial marking phase has simply marked the roots
3574       and didn't push them on the stack), then tasks claim heap
3575       regions whose bitmap they then scan to find gray objects. A
3576       global finger indicates where the end of the last claimed region
3577       is. A local finger indicates how far into the region a task has
3578       scanned. The two fingers are used to determine how to gray an
3579       object (i.e. whether simply marking it is OK, as it will be
3580       visited by a task in the future, or whether it needs to be also
3581       pushed on a stack).
3582 
3583       (2) Local Queue. The local queue of the task which is accessed
3584       reasonably efficiently by the task. Other tasks can steal from
3585       it when they run out of work. Throughout the marking phase, a
3586       task attempts to keep its local queue short but not totally
3587       empty, so that entries are available for stealing by other
3588       tasks. Only when there is no more work, a task will totally
3589       drain its local queue.
3590 
3591       (3) Global Mark Stack. This handles local queue overflow. During
3592       marking only sets of entries are moved between it and the local
3593       queues, as access to it requires a mutex and more fine-grain
3594       interaction with it which might cause contention. If it
3595       overflows, then the marking phase should restart and iterate
3596       over the bitmap to identify gray objects. Throughout the marking
3597       phase, tasks attempt to keep the global mark stack at a small
3598       length but not totally empty, so that entries are available for
3599       popping by other tasks. Only when there is no more work, tasks
3600       will totally drain the global mark stack.
3601 
3602       (4) Global Region Stack. Entries on it correspond to areas of
3603       the bitmap that need to be scanned since they contain gray
3604       objects. Pushes on the region stack only happen during
3605       evacuation pauses and typically correspond to areas covered by
3606       GC LABS. If it overflows, then the marking phase should restart
3607       and iterate over the bitmap to identify gray objects. Tasks will
3608       try to totally drain the region stack as soon as possible.
3609 
3610       (5) SATB Buffer Queue. This is where completed SATB buffers are
3611       made available. Buffers are regularly removed from this queue
3612       and scanned for roots, so that the queue doesn't get too
3613       long. During remark, all completed buffers are processed, as
3614       well as the filled in parts of any uncompleted buffers.
3615 
3616     The do_marking_step() method tries to abort when the time target
3617     has been reached. There are a few other cases when the
3618     do_marking_step() method also aborts:
3619 
3620       (1) When the marking phase has been aborted (after a Full GC).
3621 
3622       (2) When a global overflow (either on the global stack or the
3623       region stack) has been triggered. Before the task aborts, it
3624       will actually sync up with the other tasks to ensure that all
3625       the marking data structures (local queues, stacks, fingers etc.)
3626       are re-initialised so that when do_marking_step() completes,
3627       the marking phase can immediately restart.
3628 
3629       (3) When enough completed SATB buffers are available. The
3630       do_marking_step() method only tries to drain SATB buffers right
3631       at the beginning. So, if enough buffers are available, the
3632       marking step aborts and the SATB buffers are processed at
3633       the beginning of the next invocation.
3634 
3635       (4) To yield. when we have to yield then we abort and yield
3636       right at the end of do_marking_step(). This saves us from a lot
3637       of hassle as, by yielding we might allow a Full GC. If this
3638       happens then objects will be compacted underneath our feet, the
3639       heap might shrink, etc. We save checking for this by just
3640       aborting and doing the yield right at the end.
3641 
3642     From the above it follows that the do_marking_step() method should
3643     be called in a loop (or, otherwise, regularly) until it completes.
3644 
3645     If a marking step completes without its has_aborted() flag being
3646     true, it means it has completed the current marking phase (and
3647     also all other marking tasks have done so and have all synced up).
3648 
3649     A method called regular_clock_call() is invoked "regularly" (in
3650     sub ms intervals) throughout marking. It is this clock method that
3651     checks all the abort conditions which were mentioned above and
3652     decides when the task should abort. A work-based scheme is used to
3653     trigger this clock method: when the number of object words the
3654     marking phase has scanned or the number of references the marking
3655     phase has visited reach a given limit. Additional invocations to
3656     the method clock have been planted in a few other strategic places
3657     too. The initial reason for the clock method was to avoid calling
3658     vtime too regularly, as it is quite expensive. So, once it was in
3659     place, it was natural to piggy-back all the other conditions on it
3660     too and not constantly check them throughout the code.
3661 
3662  *****************************************************************************/
3663 
3664 void CMTask::do_marking_step(double time_target_ms) {
3665   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3666   assert(concurrent() == _cm->concurrent(), "they should be the same");
3667 
3668   assert(concurrent() || _cm->region_stack_empty(),
3669          "the region stack should have been cleared before remark");
3670   assert(_region_finger == NULL,
3671          "this should be non-null only when a region is being scanned");
3672 
3673   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3674   assert(_task_queues != NULL, "invariant");
3675   assert(_task_queue != NULL, "invariant");
3676   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3677 
3678   assert(!_claimed,
3679          "only one thread should claim this task at any one time");
3680 
3681   // OK, this doesn't safeguard again all possible scenarios, as it is
3682   // possible for two threads to set the _claimed flag at the same
3683   // time. But it is only for debugging purposes anyway and it will
3684   // catch most problems.
3685   _claimed = true;
3686 
3687   _start_time_ms = os::elapsedVTime() * 1000.0;
3688   statsOnly( _interval_start_time_ms = _start_time_ms );
3689 
3690   double diff_prediction_ms =
3691     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3692   _time_target_ms = time_target_ms - diff_prediction_ms;
3693 
3694   // set up the variables that are used in the work-based scheme to
3695   // call the regular clock method
3696   _words_scanned = 0;
3697   _refs_reached  = 0;
3698   recalculate_limits();
3699 
3700   // clear all flags
3701   clear_has_aborted();
3702   _has_aborted_timed_out = false;
3703   _draining_satb_buffers = false;
3704 
3705   ++_calls;
3706 
3707   if (_cm->verbose_low())
3708     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3709                            "target = %1.2lfms >>>>>>>>>>",
3710                            _task_id, _calls, _time_target_ms);
3711 
3712   // Set up the bitmap and oop closures. Anything that uses them is
3713   // eventually called from this method, so it is OK to allocate these
3714   // statically.
3715   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3716   CMOopClosure    oop_closure(_g1h, _cm, this);
3717   set_oop_closure(&oop_closure);
3718 
3719   if (_cm->has_overflown()) {
3720     // This can happen if the region stack or the mark stack overflows
3721     // during a GC pause and this task, after a yield point,
3722     // restarts. We have to abort as we need to get into the overflow
3723     // protocol which happens right at the end of this task.
3724     set_has_aborted();
3725   }
3726 
3727   // First drain any available SATB buffers. After this, we will not
3728   // look at SATB buffers before the next invocation of this method.
3729   // If enough completed SATB buffers are queued up, the regular clock
3730   // will abort this task so that it restarts.
3731   drain_satb_buffers();
3732   // ...then partially drain the local queue and the global stack
3733   drain_local_queue(true);
3734   drain_global_stack(true);
3735 
3736   // Then totally drain the region stack.  We will not look at
3737   // it again before the next invocation of this method. Entries on
3738   // the region stack are only added during evacuation pauses, for
3739   // which we have to yield. When we do, we abort the task anyway so
3740   // it will look at the region stack again when it restarts.
3741   bitmap_closure.set_scanning_heap_region(false);
3742   drain_region_stack(&bitmap_closure);
3743   // ...then partially drain the local queue and the global stack
3744   drain_local_queue(true);
3745   drain_global_stack(true);
3746 
3747   do {
3748     if (!has_aborted() && _curr_region != NULL) {
3749       // This means that we're already holding on to a region.
3750       assert(_finger != NULL, "if region is not NULL, then the finger "
3751              "should not be NULL either");
3752 
3753       // We might have restarted this task after an evacuation pause
3754       // which might have evacuated the region we're holding on to
3755       // underneath our feet. Let's read its limit again to make sure
3756       // that we do not iterate over a region of the heap that
3757       // contains garbage (update_region_limit() will also move
3758       // _finger to the start of the region if it is found empty).
3759       update_region_limit();
3760       // We will start from _finger not from the start of the region,
3761       // as we might be restarting this task after aborting half-way
3762       // through scanning this region. In this case, _finger points to
3763       // the address where we last found a marked object. If this is a
3764       // fresh region, _finger points to start().
3765       MemRegion mr = MemRegion(_finger, _region_limit);
3766 
3767       if (_cm->verbose_low())
3768         gclog_or_tty->print_cr("[%d] we're scanning part "
3769                                "["PTR_FORMAT", "PTR_FORMAT") "
3770                                "of region "PTR_FORMAT,
3771                                _task_id, _finger, _region_limit, _curr_region);
3772 
3773       // Let's iterate over the bitmap of the part of the
3774       // region that is left.
3775       bitmap_closure.set_scanning_heap_region(true);
3776       if (mr.is_empty() ||
3777           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3778         // We successfully completed iterating over the region. Now,
3779         // let's give up the region.
3780         giveup_current_region();
3781         regular_clock_call();
3782       } else {
3783         assert(has_aborted(), "currently the only way to do so");
3784         // The only way to abort the bitmap iteration is to return
3785         // false from the do_bit() method. However, inside the
3786         // do_bit() method we move the _finger to point to the
3787         // object currently being looked at. So, if we bail out, we
3788         // have definitely set _finger to something non-null.
3789         assert(_finger != NULL, "invariant");
3790 
3791         // Region iteration was actually aborted. So now _finger
3792         // points to the address of the object we last scanned. If we
3793         // leave it there, when we restart this task, we will rescan
3794         // the object. It is easy to avoid this. We move the finger by
3795         // enough to point to the next possible object header (the
3796         // bitmap knows by how much we need to move it as it knows its
3797         // granularity).
3798         assert(_finger < _region_limit, "invariant");
3799         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3800         // Check if bitmap iteration was aborted while scanning the last object
3801         if (new_finger >= _region_limit) {
3802             giveup_current_region();
3803         } else {
3804             move_finger_to(new_finger);
3805         }
3806       }
3807     }
3808     // At this point we have either completed iterating over the
3809     // region we were holding on to, or we have aborted.
3810 
3811     // We then partially drain the local queue and the global stack.
3812     // (Do we really need this?)
3813     drain_local_queue(true);
3814     drain_global_stack(true);
3815 
3816     // Read the note on the claim_region() method on why it might
3817     // return NULL with potentially more regions available for
3818     // claiming and why we have to check out_of_regions() to determine
3819     // whether we're done or not.
3820     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3821       // We are going to try to claim a new region. We should have
3822       // given up on the previous one.
3823       // Separated the asserts so that we know which one fires.
3824       assert(_curr_region  == NULL, "invariant");
3825       assert(_finger       == NULL, "invariant");
3826       assert(_region_limit == NULL, "invariant");
3827       if (_cm->verbose_low())
3828         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3829       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3830       if (claimed_region != NULL) {
3831         // Yes, we managed to claim one
3832         statsOnly( ++_regions_claimed );
3833 
3834         if (_cm->verbose_low())
3835           gclog_or_tty->print_cr("[%d] we successfully claimed "
3836                                  "region "PTR_FORMAT,
3837                                  _task_id, claimed_region);
3838 
3839         setup_for_region(claimed_region);
3840         assert(_curr_region == claimed_region, "invariant");
3841       }
3842       // It is important to call the regular clock here. It might take
3843       // a while to claim a region if, for example, we hit a large
3844       // block of empty regions. So we need to call the regular clock
3845       // method once round the loop to make sure it's called
3846       // frequently enough.
3847       regular_clock_call();
3848     }
3849 
3850     if (!has_aborted() && _curr_region == NULL) {
3851       assert(_cm->out_of_regions(),
3852              "at this point we should be out of regions");
3853     }
3854   } while ( _curr_region != NULL && !has_aborted());
3855 
3856   if (!has_aborted()) {
3857     // We cannot check whether the global stack is empty, since other
3858     // tasks might be pushing objects to it concurrently. We also cannot
3859     // check if the region stack is empty because if a thread is aborting
3860     // it can push a partially done region back.
3861     assert(_cm->out_of_regions(),
3862            "at this point we should be out of regions");
3863 
3864     if (_cm->verbose_low())
3865       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3866 
3867     // Try to reduce the number of available SATB buffers so that
3868     // remark has less work to do.
3869     drain_satb_buffers();
3870   }
3871 
3872   // Since we've done everything else, we can now totally drain the
3873   // local queue and global stack.
3874   drain_local_queue(false);
3875   drain_global_stack(false);
3876 
3877   // Attempt at work stealing from other task's queues.
3878   if (!has_aborted()) {
3879     // We have not aborted. This means that we have finished all that
3880     // we could. Let's try to do some stealing...
3881 
3882     // We cannot check whether the global stack is empty, since other
3883     // tasks might be pushing objects to it concurrently. We also cannot
3884     // check if the region stack is empty because if a thread is aborting
3885     // it can push a partially done region back.
3886     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3887            "only way to reach here");
3888 
3889     if (_cm->verbose_low())
3890       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3891 
3892     while (!has_aborted()) {
3893       oop obj;
3894       statsOnly( ++_steal_attempts );
3895 
3896       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3897         if (_cm->verbose_medium())
3898           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3899                                  _task_id, (void*) obj);
3900 
3901         statsOnly( ++_steals );
3902 
3903         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3904                "any stolen object should be marked");
3905         scan_object(obj);
3906 
3907         // And since we're towards the end, let's totally drain the
3908         // local queue and global stack.
3909         drain_local_queue(false);
3910         drain_global_stack(false);
3911       } else {
3912         break;
3913       }
3914     }
3915   }
3916 
3917   // We still haven't aborted. Now, let's try to get into the
3918   // termination protocol.
3919   if (!has_aborted()) {
3920     // We cannot check whether the global stack is empty, since other
3921     // tasks might be concurrently pushing objects on it. We also cannot
3922     // check if the region stack is empty because if a thread is aborting
3923     // it can push a partially done region back.
3924     // Separated the asserts so that we know which one fires.
3925     assert(_cm->out_of_regions(), "only way to reach here");
3926     assert(_task_queue->size() == 0, "only way to reach here");
3927 
3928     if (_cm->verbose_low())
3929       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3930 
3931     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3932     // The CMTask class also extends the TerminatorTerminator class,
3933     // hence its should_exit_termination() method will also decide
3934     // whether to exit the termination protocol or not.
3935     bool finished = _cm->terminator()->offer_termination(this);
3936     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3937     _termination_time_ms +=
3938       termination_end_time_ms - _termination_start_time_ms;
3939 
3940     if (finished) {
3941       // We're all done.
3942 
3943       if (_task_id == 0) {
3944         // let's allow task 0 to do this
3945         if (concurrent()) {
3946           assert(_cm->concurrent_marking_in_progress(), "invariant");
3947           // we need to set this to false before the next
3948           // safepoint. This way we ensure that the marking phase
3949           // doesn't observe any more heap expansions.
3950           _cm->clear_concurrent_marking_in_progress();
3951         }
3952       }
3953 
3954       // We can now guarantee that the global stack is empty, since
3955       // all other tasks have finished. We separated the guarantees so
3956       // that, if a condition is false, we can immediately find out
3957       // which one.
3958       guarantee(_cm->out_of_regions(), "only way to reach here");
3959       guarantee(_cm->region_stack_empty(), "only way to reach here");
3960       guarantee(_cm->mark_stack_empty(), "only way to reach here");
3961       guarantee(_task_queue->size() == 0, "only way to reach here");
3962       guarantee(!_cm->has_overflown(), "only way to reach here");
3963       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
3964       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
3965 
3966       if (_cm->verbose_low())
3967         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
3968     } else {
3969       // Apparently there's more work to do. Let's abort this task. It
3970       // will restart it and we can hopefully find more things to do.
3971 
3972       if (_cm->verbose_low())
3973         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
3974 
3975       set_has_aborted();
3976       statsOnly( ++_aborted_termination );
3977     }
3978   }
3979 
3980   // Mainly for debugging purposes to make sure that a pointer to the
3981   // closure which was statically allocated in this frame doesn't
3982   // escape it by accident.
3983   set_oop_closure(NULL);
3984   double end_time_ms = os::elapsedVTime() * 1000.0;
3985   double elapsed_time_ms = end_time_ms - _start_time_ms;
3986   // Update the step history.
3987   _step_times_ms.add(elapsed_time_ms);
3988 
3989   if (has_aborted()) {
3990     // The task was aborted for some reason.
3991 
3992     statsOnly( ++_aborted );
3993 
3994     if (_has_aborted_timed_out) {
3995       double diff_ms = elapsed_time_ms - _time_target_ms;
3996       // Keep statistics of how well we did with respect to hitting
3997       // our target only if we actually timed out (if we aborted for
3998       // other reasons, then the results might get skewed).
3999       _marking_step_diffs_ms.add(diff_ms);
4000     }
4001 
4002     if (_cm->has_overflown()) {
4003       // This is the interesting one. We aborted because a global
4004       // overflow was raised. This means we have to restart the
4005       // marking phase and start iterating over regions. However, in
4006       // order to do this we have to make sure that all tasks stop
4007       // what they are doing and re-initialise in a safe manner. We
4008       // will achieve this with the use of two barrier sync points.
4009 
4010       if (_cm->verbose_low())
4011         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4012 
4013       _cm->enter_first_sync_barrier(_task_id);
4014       // When we exit this sync barrier we know that all tasks have
4015       // stopped doing marking work. So, it's now safe to
4016       // re-initialise our data structures. At the end of this method,
4017       // task 0 will clear the global data structures.
4018 
4019       statsOnly( ++_aborted_overflow );
4020 
4021       // We clear the local state of this task...
4022       clear_region_fields();
4023 
4024       // ...and enter the second barrier.
4025       _cm->enter_second_sync_barrier(_task_id);
4026       // At this point everything has bee re-initialised and we're
4027       // ready to restart.
4028     }
4029 
4030     if (_cm->verbose_low()) {
4031       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4032                              "elapsed = %1.2lfms <<<<<<<<<<",
4033                              _task_id, _time_target_ms, elapsed_time_ms);
4034       if (_cm->has_aborted())
4035         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4036                                _task_id);
4037     }
4038   } else {
4039     if (_cm->verbose_low())
4040       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4041                              "elapsed = %1.2lfms <<<<<<<<<<",
4042                              _task_id, _time_target_ms, elapsed_time_ms);
4043   }
4044 
4045   _claimed = false;
4046 }
4047 
4048 CMTask::CMTask(int task_id,
4049                ConcurrentMark* cm,
4050                CMTaskQueue* task_queue,
4051                CMTaskQueueSet* task_queues)
4052   : _g1h(G1CollectedHeap::heap()),
4053     _task_id(task_id), _cm(cm),
4054     _claimed(false),
4055     _nextMarkBitMap(NULL), _hash_seed(17),
4056     _task_queue(task_queue),
4057     _task_queues(task_queues),
4058     _oop_closure(NULL) {
4059   guarantee(task_queue != NULL, "invariant");
4060   guarantee(task_queues != NULL, "invariant");
4061 
4062   statsOnly( _clock_due_to_scanning = 0;
4063              _clock_due_to_marking  = 0 );
4064 
4065   _marking_step_diffs_ms.add(0.5);
4066 }