1 /*
   2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1RemSet.hpp"
  32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  34 #include "memory/genOopClosures.inline.hpp"
  35 #include "memory/referencePolicy.hpp"
  36 #include "memory/resourceArea.hpp"
  37 #include "oops/oop.inline.hpp"
  38 #include "runtime/handles.inline.hpp"
  39 #include "runtime/java.hpp"
  40 
  41 //
  42 // CMS Bit Map Wrapper
  43 
  44 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  45   _bm((uintptr_t*)NULL,0),
  46   _shifter(shifter) {
  47   _bmStartWord = (HeapWord*)(rs.base());
  48   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  49   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  50                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  51 
  52   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  53   // For now we'll just commit all of the bit map up fromt.
  54   // Later on we'll try to be more parsimonious with swap.
  55   guarantee(_virtual_space.initialize(brs, brs.size()),
  56             "couldn't reseve backing store for CMS bit map");
  57   assert(_virtual_space.committed_size() == brs.size(),
  58          "didn't reserve backing store for all of CMS bit map?");
  59   _bm.set_map((uintptr_t*)_virtual_space.low());
  60   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  61          _bmWordSize, "inconsistency in bit map sizing");
  62   _bm.set_size(_bmWordSize >> _shifter);
  63 }
  64 
  65 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  66                                                HeapWord* limit) const {
  67   // First we must round addr *up* to a possible object boundary.
  68   addr = (HeapWord*)align_size_up((intptr_t)addr,
  69                                   HeapWordSize << _shifter);
  70   size_t addrOffset = heapWordToOffset(addr);
  71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  72   size_t limitOffset = heapWordToOffset(limit);
  73   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   assert(nextAddr >= addr, "get_next_one postcondition");
  76   assert(nextAddr == limit || isMarked(nextAddr),
  77          "get_next_one postcondition");
  78   return nextAddr;
  79 }
  80 
  81 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  82                                                  HeapWord* limit) const {
  83   size_t addrOffset = heapWordToOffset(addr);
  84   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  85   size_t limitOffset = heapWordToOffset(limit);
  86   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  87   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  88   assert(nextAddr >= addr, "get_next_one postcondition");
  89   assert(nextAddr == limit || !isMarked(nextAddr),
  90          "get_next_one postcondition");
  91   return nextAddr;
  92 }
  93 
  94 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  95   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  96   return (int) (diff >> _shifter);
  97 }
  98 
  99 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 100   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 101   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 102   if (right > left) {
 103     // Right-open interval [leftOffset, rightOffset).
 104     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 105   } else {
 106     return true;
 107   }
 108 }
 109 
 110 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 111                                              size_t    from_start_index,
 112                                              HeapWord* to_start_word,
 113                                              size_t    word_num) {
 114   _bm.mostly_disjoint_range_union(from_bitmap,
 115                                   from_start_index,
 116                                   heapWordToOffset(to_start_word),
 117                                   word_num);
 118 }
 119 
 120 #ifndef PRODUCT
 121 bool CMBitMapRO::covers(ReservedSpace rs) const {
 122   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 123   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 124          "size inconsistency");
 125   return _bmStartWord == (HeapWord*)(rs.base()) &&
 126          _bmWordSize  == rs.size()>>LogHeapWordSize;
 127 }
 128 #endif
 129 
 130 void CMBitMap::clearAll() {
 131   _bm.clear();
 132   return;
 133 }
 134 
 135 void CMBitMap::markRange(MemRegion mr) {
 136   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 137   assert(!mr.is_empty(), "unexpected empty region");
 138   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 139           ((HeapWord *) mr.end())),
 140          "markRange memory region end is not card aligned");
 141   // convert address range into offset range
 142   _bm.at_put_range(heapWordToOffset(mr.start()),
 143                    heapWordToOffset(mr.end()), true);
 144 }
 145 
 146 void CMBitMap::clearRange(MemRegion mr) {
 147   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 148   assert(!mr.is_empty(), "unexpected empty region");
 149   // convert address range into offset range
 150   _bm.at_put_range(heapWordToOffset(mr.start()),
 151                    heapWordToOffset(mr.end()), false);
 152 }
 153 
 154 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 155                                             HeapWord* end_addr) {
 156   HeapWord* start = getNextMarkedWordAddress(addr);
 157   start = MIN2(start, end_addr);
 158   HeapWord* end   = getNextUnmarkedWordAddress(start);
 159   end = MIN2(end, end_addr);
 160   assert(start <= end, "Consistency check");
 161   MemRegion mr(start, end);
 162   if (!mr.is_empty()) {
 163     clearRange(mr);
 164   }
 165   return mr;
 166 }
 167 
 168 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 169   _base(NULL), _cm(cm)
 170 #ifdef ASSERT
 171   , _drain_in_progress(false)
 172   , _drain_in_progress_yields(false)
 173 #endif
 174 {}
 175 
 176 void CMMarkStack::allocate(size_t size) {
 177   _base = NEW_C_HEAP_ARRAY(oop, size);
 178   if (_base == NULL)
 179     vm_exit_during_initialization("Failed to allocate "
 180                                   "CM region mark stack");
 181   _index = 0;
 182   // QQQQ cast ...
 183   _capacity = (jint) size;
 184   _oops_do_bound = -1;
 185   NOT_PRODUCT(_max_depth = 0);
 186 }
 187 
 188 CMMarkStack::~CMMarkStack() {
 189   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 190 }
 191 
 192 void CMMarkStack::par_push(oop ptr) {
 193   while (true) {
 194     if (isFull()) {
 195       _overflow = true;
 196       return;
 197     }
 198     // Otherwise...
 199     jint index = _index;
 200     jint next_index = index+1;
 201     jint res = Atomic::cmpxchg(next_index, &_index, index);
 202     if (res == index) {
 203       _base[index] = ptr;
 204       // Note that we don't maintain this atomically.  We could, but it
 205       // doesn't seem necessary.
 206       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 207       return;
 208     }
 209     // Otherwise, we need to try again.
 210   }
 211 }
 212 
 213 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 214   while (true) {
 215     if (isFull()) {
 216       _overflow = true;
 217       return;
 218     }
 219     // Otherwise...
 220     jint index = _index;
 221     jint next_index = index + n;
 222     if (next_index > _capacity) {
 223       _overflow = true;
 224       return;
 225     }
 226     jint res = Atomic::cmpxchg(next_index, &_index, index);
 227     if (res == index) {
 228       for (int i = 0; i < n; i++) {
 229         int ind = index + i;
 230         assert(ind < _capacity, "By overflow test above.");
 231         _base[ind] = ptr_arr[i];
 232       }
 233       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 234       return;
 235     }
 236     // Otherwise, we need to try again.
 237   }
 238 }
 239 
 240 
 241 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 242   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 243   jint start = _index;
 244   jint next_index = start + n;
 245   if (next_index > _capacity) {
 246     _overflow = true;
 247     return;
 248   }
 249   // Otherwise.
 250   _index = next_index;
 251   for (int i = 0; i < n; i++) {
 252     int ind = start + i;
 253     assert(ind < _capacity, "By overflow test above.");
 254     _base[ind] = ptr_arr[i];
 255   }
 256 }
 257 
 258 
 259 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 260   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 261   jint index = _index;
 262   if (index == 0) {
 263     *n = 0;
 264     return false;
 265   } else {
 266     int k = MIN2(max, index);
 267     jint new_ind = index - k;
 268     for (int j = 0; j < k; j++) {
 269       ptr_arr[j] = _base[new_ind + j];
 270     }
 271     _index = new_ind;
 272     *n = k;
 273     return true;
 274   }
 275 }
 276 
 277 
 278 CMRegionStack::CMRegionStack() : _base(NULL) {}
 279 
 280 void CMRegionStack::allocate(size_t size) {
 281   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 282   if (_base == NULL)
 283     vm_exit_during_initialization("Failed to allocate "
 284                                   "CM region mark stack");
 285   _index = 0;
 286   // QQQQ cast ...
 287   _capacity = (jint) size;
 288 }
 289 
 290 CMRegionStack::~CMRegionStack() {
 291   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 292 }
 293 
 294 void CMRegionStack::push_lock_free(MemRegion mr) {
 295   assert(mr.word_size() > 0, "Precondition");
 296   while (true) {
 297     jint index = _index;
 298 
 299     if (index >= _capacity) {
 300       _overflow = true;
 301       return;
 302     }
 303     // Otherwise...
 304     jint next_index = index+1;
 305     jint res = Atomic::cmpxchg(next_index, &_index, index);
 306     if (res == index) {
 307       _base[index] = mr;
 308       return;
 309     }
 310     // Otherwise, we need to try again.
 311   }
 312 }
 313 
 314 // Lock-free pop of the region stack. Called during the concurrent
 315 // marking / remark phases. Should only be called in tandem with
 316 // other lock-free pops.
 317 MemRegion CMRegionStack::pop_lock_free() {
 318   while (true) {
 319     jint index = _index;
 320 
 321     if (index == 0) {
 322       return MemRegion();
 323     }
 324     // Otherwise...
 325     jint next_index = index-1;
 326     jint res = Atomic::cmpxchg(next_index, &_index, index);
 327     if (res == index) {
 328       MemRegion mr = _base[next_index];
 329       if (mr.start() != NULL) {
 330         assert(mr.end() != NULL, "invariant");
 331         assert(mr.word_size() > 0, "invariant");
 332         return mr;
 333       } else {
 334         // that entry was invalidated... let's skip it
 335         assert(mr.end() == NULL, "invariant");
 336       }
 337     }
 338     // Otherwise, we need to try again.
 339   }
 340 }
 341 
 342 #if 0
 343 // The routines that manipulate the region stack with a lock are
 344 // not currently used. They should be retained, however, as a
 345 // diagnostic aid.
 346 
 347 void CMRegionStack::push_with_lock(MemRegion mr) {
 348   assert(mr.word_size() > 0, "Precondition");
 349   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 350 
 351   if (isFull()) {
 352     _overflow = true;
 353     return;
 354   }
 355 
 356   _base[_index] = mr;
 357   _index += 1;
 358 }
 359 
 360 MemRegion CMRegionStack::pop_with_lock() {
 361   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 362 
 363   while (true) {
 364     if (_index == 0) {
 365       return MemRegion();
 366     }
 367     _index -= 1;
 368 
 369     MemRegion mr = _base[_index];
 370     if (mr.start() != NULL) {
 371       assert(mr.end() != NULL, "invariant");
 372       assert(mr.word_size() > 0, "invariant");
 373       return mr;
 374     } else {
 375       // that entry was invalidated... let's skip it
 376       assert(mr.end() == NULL, "invariant");
 377     }
 378   }
 379 }
 380 #endif
 381 
 382 bool CMRegionStack::invalidate_entries_into_cset() {
 383   bool result = false;
 384   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 385   for (int i = 0; i < _oops_do_bound; ++i) {
 386     MemRegion mr = _base[i];
 387     if (mr.start() != NULL) {
 388       assert(mr.end() != NULL, "invariant");
 389       assert(mr.word_size() > 0, "invariant");
 390       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 391       assert(hr != NULL, "invariant");
 392       if (hr->in_collection_set()) {
 393         // The region points into the collection set
 394         _base[i] = MemRegion();
 395         result = true;
 396       }
 397     } else {
 398       // that entry was invalidated... let's skip it
 399       assert(mr.end() == NULL, "invariant");
 400     }
 401   }
 402   return result;
 403 }
 404 
 405 template<class OopClosureClass>
 406 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 407   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 408          || SafepointSynchronize::is_at_safepoint(),
 409          "Drain recursion must be yield-safe.");
 410   bool res = true;
 411   debug_only(_drain_in_progress = true);
 412   debug_only(_drain_in_progress_yields = yield_after);
 413   while (!isEmpty()) {
 414     oop newOop = pop();
 415     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 416     assert(newOop->is_oop(), "Expected an oop");
 417     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 418            "only grey objects on this stack");
 419     // iterate over the oops in this oop, marking and pushing
 420     // the ones in CMS generation.
 421     newOop->oop_iterate(cl);
 422     if (yield_after && _cm->do_yield_check()) {
 423       res = false; break;
 424     }
 425   }
 426   debug_only(_drain_in_progress = false);
 427   return res;
 428 }
 429 
 430 void CMMarkStack::oops_do(OopClosure* f) {
 431   if (_index == 0) return;
 432   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 433          "Bound must be set.");
 434   for (int i = 0; i < _oops_do_bound; i++) {
 435     f->do_oop(&_base[i]);
 436   }
 437   _oops_do_bound = -1;
 438 }
 439 
 440 bool ConcurrentMark::not_yet_marked(oop obj) const {
 441   return (_g1h->is_obj_ill(obj)
 442           || (_g1h->is_in_permanent(obj)
 443               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 444 }
 445 
 446 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 447 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 448 #endif // _MSC_VER
 449 
 450 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 451                                int max_regions) :
 452   _markBitMap1(rs, MinObjAlignment - 1),
 453   _markBitMap2(rs, MinObjAlignment - 1),
 454 
 455   _parallel_marking_threads(0),
 456   _sleep_factor(0.0),
 457   _marking_task_overhead(1.0),
 458   _cleanup_sleep_factor(0.0),
 459   _cleanup_task_overhead(1.0),
 460   _region_bm(max_regions, false /* in_resource_area*/),
 461   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 462            CardTableModRefBS::card_shift,
 463            false /* in_resource_area*/),
 464   _prevMarkBitMap(&_markBitMap1),
 465   _nextMarkBitMap(&_markBitMap2),
 466   _at_least_one_mark_complete(false),
 467 
 468   _markStack(this),
 469   _regionStack(),
 470   // _finger set in set_non_marking_state
 471 
 472   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 473   // _active_tasks set in set_non_marking_state
 474   // _tasks set inside the constructor
 475   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 476   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 477 
 478   _has_overflown(false),
 479   _concurrent(false),
 480   _has_aborted(false),
 481   _restart_for_overflow(false),
 482   _concurrent_marking_in_progress(false),
 483   _should_gray_objects(false),
 484 
 485   // _verbose_level set below
 486 
 487   _init_times(),
 488   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 489   _cleanup_times(),
 490   _total_counting_time(0.0),
 491   _total_rs_scrub_time(0.0),
 492 
 493   _parallel_workers(NULL)
 494 {
 495   CMVerboseLevel verbose_level =
 496     (CMVerboseLevel) G1MarkingVerboseLevel;
 497   if (verbose_level < no_verbose)
 498     verbose_level = no_verbose;
 499   if (verbose_level > high_verbose)
 500     verbose_level = high_verbose;
 501   _verbose_level = verbose_level;
 502 
 503   if (verbose_low())
 504     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 505                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 506 
 507   _markStack.allocate(MarkStackSize);
 508   _regionStack.allocate(G1MarkRegionStackSize);
 509 
 510   // Create & start a ConcurrentMark thread.
 511   _cmThread = new ConcurrentMarkThread(this);
 512   assert(cmThread() != NULL, "CM Thread should have been created");
 513   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 514 
 515   _g1h = G1CollectedHeap::heap();
 516   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 517   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 518   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 519 
 520   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 521   satb_qs.set_buffer_size(G1SATBBufferSize);
 522 
 523   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 524   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
 525   for (int i = 0 ; i < size; i++) {
 526     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
 527   }
 528 
 529   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 530   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 531 
 532   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 533   _active_tasks = _max_task_num;
 534   for (int i = 0; i < (int) _max_task_num; ++i) {
 535     CMTaskQueue* task_queue = new CMTaskQueue();
 536     task_queue->initialize();
 537     _task_queues->register_queue(i, task_queue);
 538 
 539     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 540     _accum_task_vtime[i] = 0.0;
 541   }
 542 
 543   if (ConcGCThreads > ParallelGCThreads) {
 544     vm_exit_during_initialization("Can't have more ConcGCThreads "
 545                                   "than ParallelGCThreads.");
 546   }
 547   if (ParallelGCThreads == 0) {
 548     // if we are not running with any parallel GC threads we will not
 549     // spawn any marking threads either
 550     _parallel_marking_threads =   0;
 551     _sleep_factor             = 0.0;
 552     _marking_task_overhead    = 1.0;
 553   } else {
 554     if (ConcGCThreads > 0) {
 555       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 556       // if both are set
 557 
 558       _parallel_marking_threads = ConcGCThreads;
 559       _sleep_factor             = 0.0;
 560       _marking_task_overhead    = 1.0;
 561     } else if (G1MarkingOverheadPercent > 0) {
 562       // we will calculate the number of parallel marking threads
 563       // based on a target overhead with respect to the soft real-time
 564       // goal
 565 
 566       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 567       double overall_cm_overhead =
 568         (double) MaxGCPauseMillis * marking_overhead /
 569         (double) GCPauseIntervalMillis;
 570       double cpu_ratio = 1.0 / (double) os::processor_count();
 571       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 572       double marking_task_overhead =
 573         overall_cm_overhead / marking_thread_num *
 574                                                 (double) os::processor_count();
 575       double sleep_factor =
 576                          (1.0 - marking_task_overhead) / marking_task_overhead;
 577 
 578       _parallel_marking_threads = (size_t) marking_thread_num;
 579       _sleep_factor             = sleep_factor;
 580       _marking_task_overhead    = marking_task_overhead;
 581     } else {
 582       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 583       _sleep_factor             = 0.0;
 584       _marking_task_overhead    = 1.0;
 585     }
 586 
 587     if (parallel_marking_threads() > 1)
 588       _cleanup_task_overhead = 1.0;
 589     else
 590       _cleanup_task_overhead = marking_task_overhead();
 591     _cleanup_sleep_factor =
 592                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 593 
 594 #if 0
 595     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 596     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 597     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 598     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 599     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 600 #endif
 601 
 602     guarantee(parallel_marking_threads() > 0, "peace of mind");
 603     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 604          (int) _parallel_marking_threads, false, true);
 605     if (_parallel_workers == NULL) {
 606       vm_exit_during_initialization("Failed necessary allocation.");
 607     } else {
 608       _parallel_workers->initialize_workers();
 609     }
 610   }
 611 
 612   // so that the call below can read a sensible value
 613   _heap_start = (HeapWord*) rs.base();
 614   set_non_marking_state();
 615 }
 616 
 617 void ConcurrentMark::update_g1_committed(bool force) {
 618   // If concurrent marking is not in progress, then we do not need to
 619   // update _heap_end. This has a subtle and important
 620   // side-effect. Imagine that two evacuation pauses happen between
 621   // marking completion and remark. The first one can grow the
 622   // heap (hence now the finger is below the heap end). Then, the
 623   // second one could unnecessarily push regions on the region
 624   // stack. This causes the invariant that the region stack is empty
 625   // at the beginning of remark to be false. By ensuring that we do
 626   // not observe heap expansions after marking is complete, then we do
 627   // not have this problem.
 628   if (!concurrent_marking_in_progress() && !force)
 629     return;
 630 
 631   MemRegion committed = _g1h->g1_committed();
 632   assert(committed.start() == _heap_start, "start shouldn't change");
 633   HeapWord* new_end = committed.end();
 634   if (new_end > _heap_end) {
 635     // The heap has been expanded.
 636 
 637     _heap_end = new_end;
 638   }
 639   // Notice that the heap can also shrink. However, this only happens
 640   // during a Full GC (at least currently) and the entire marking
 641   // phase will bail out and the task will not be restarted. So, let's
 642   // do nothing.
 643 }
 644 
 645 void ConcurrentMark::reset() {
 646   // Starting values for these two. This should be called in a STW
 647   // phase. CM will be notified of any future g1_committed expansions
 648   // will be at the end of evacuation pauses, when tasks are
 649   // inactive.
 650   MemRegion committed = _g1h->g1_committed();
 651   _heap_start = committed.start();
 652   _heap_end   = committed.end();
 653 
 654   // Separated the asserts so that we know which one fires.
 655   assert(_heap_start != NULL, "heap bounds should look ok");
 656   assert(_heap_end != NULL, "heap bounds should look ok");
 657   assert(_heap_start < _heap_end, "heap bounds should look ok");
 658 
 659   // reset all the marking data structures and any necessary flags
 660   clear_marking_state();
 661 
 662   if (verbose_low())
 663     gclog_or_tty->print_cr("[global] resetting");
 664 
 665   // We do reset all of them, since different phases will use
 666   // different number of active threads. So, it's easiest to have all
 667   // of them ready.
 668   for (int i = 0; i < (int) _max_task_num; ++i) {
 669     _tasks[i]->reset(_nextMarkBitMap);
 670   }
 671 
 672   // we need this to make sure that the flag is on during the evac
 673   // pause with initial mark piggy-backed
 674   set_concurrent_marking_in_progress();
 675 }
 676 
 677 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 678   assert(active_tasks <= _max_task_num, "we should not have more");
 679 
 680   _active_tasks = active_tasks;
 681   // Need to update the three data structures below according to the
 682   // number of active threads for this phase.
 683   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 684   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 685   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 686 
 687   _concurrent = concurrent;
 688   // We propagate this to all tasks, not just the active ones.
 689   for (int i = 0; i < (int) _max_task_num; ++i)
 690     _tasks[i]->set_concurrent(concurrent);
 691 
 692   if (concurrent) {
 693     set_concurrent_marking_in_progress();
 694   } else {
 695     // We currently assume that the concurrent flag has been set to
 696     // false before we start remark. At this point we should also be
 697     // in a STW phase.
 698     assert(!concurrent_marking_in_progress(), "invariant");
 699     assert(_finger == _heap_end, "only way to get here");
 700     update_g1_committed(true);
 701   }
 702 }
 703 
 704 void ConcurrentMark::set_non_marking_state() {
 705   // We set the global marking state to some default values when we're
 706   // not doing marking.
 707   clear_marking_state();
 708   _active_tasks = 0;
 709   clear_concurrent_marking_in_progress();
 710 }
 711 
 712 ConcurrentMark::~ConcurrentMark() {
 713   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 714   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
 715   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
 716                     _par_cleanup_thread_state);
 717 
 718   for (int i = 0; i < (int) _max_task_num; ++i) {
 719     delete _task_queues->queue(i);
 720     delete _tasks[i];
 721   }
 722   delete _task_queues;
 723   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 724 }
 725 
 726 // This closure is used to mark refs into the g1 generation
 727 // from external roots in the CMS bit map.
 728 // Called at the first checkpoint.
 729 //
 730 
 731 void ConcurrentMark::clearNextBitmap() {
 732   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 733   G1CollectorPolicy* g1p = g1h->g1_policy();
 734 
 735   // Make sure that the concurrent mark thread looks to still be in
 736   // the current cycle.
 737   guarantee(cmThread()->during_cycle(), "invariant");
 738 
 739   // We are finishing up the current cycle by clearing the next
 740   // marking bitmap and getting it ready for the next cycle. During
 741   // this time no other cycle can start. So, let's make sure that this
 742   // is the case.
 743   guarantee(!g1h->mark_in_progress(), "invariant");
 744 
 745   // clear the mark bitmap (no grey objects to start with).
 746   // We need to do this in chunks and offer to yield in between
 747   // each chunk.
 748   HeapWord* start  = _nextMarkBitMap->startWord();
 749   HeapWord* end    = _nextMarkBitMap->endWord();
 750   HeapWord* cur    = start;
 751   size_t chunkSize = M;
 752   while (cur < end) {
 753     HeapWord* next = cur + chunkSize;
 754     if (next > end)
 755       next = end;
 756     MemRegion mr(cur,next);
 757     _nextMarkBitMap->clearRange(mr);
 758     cur = next;
 759     do_yield_check();
 760 
 761     // Repeat the asserts from above. We'll do them as asserts here to
 762     // minimize their overhead on the product. However, we'll have
 763     // them as guarantees at the beginning / end of the bitmap
 764     // clearing to get some checking in the product.
 765     assert(cmThread()->during_cycle(), "invariant");
 766     assert(!g1h->mark_in_progress(), "invariant");
 767   }
 768 
 769   // Repeat the asserts from above.
 770   guarantee(cmThread()->during_cycle(), "invariant");
 771   guarantee(!g1h->mark_in_progress(), "invariant");
 772 }
 773 
 774 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 775 public:
 776   bool doHeapRegion(HeapRegion* r) {
 777     if (!r->continuesHumongous()) {
 778       r->note_start_of_marking(true);
 779     }
 780     return false;
 781   }
 782 };
 783 
 784 void ConcurrentMark::checkpointRootsInitialPre() {
 785   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 786   G1CollectorPolicy* g1p = g1h->g1_policy();
 787 
 788   _has_aborted = false;
 789 
 790 #ifndef PRODUCT
 791   if (G1PrintReachableAtInitialMark) {
 792     print_reachable("at-cycle-start",
 793                     true /* use_prev_marking */, true /* all */);
 794   }
 795 #endif
 796 
 797   // Initialise marking structures. This has to be done in a STW phase.
 798   reset();
 799 }
 800 
 801 class CMMarkRootsClosure: public OopsInGenClosure {
 802 private:
 803   ConcurrentMark*  _cm;
 804   G1CollectedHeap* _g1h;
 805   bool             _do_barrier;
 806 
 807 public:
 808   CMMarkRootsClosure(ConcurrentMark* cm,
 809                      G1CollectedHeap* g1h,
 810                      bool do_barrier) : _cm(cm), _g1h(g1h),
 811                                         _do_barrier(do_barrier) { }
 812 
 813   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 814   virtual void do_oop(      oop* p) { do_oop_work(p); }
 815 
 816   template <class T> void do_oop_work(T* p) {
 817     T heap_oop = oopDesc::load_heap_oop(p);
 818     if (!oopDesc::is_null(heap_oop)) {
 819       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 820       assert(obj->is_oop() || obj->mark() == NULL,
 821              "expected an oop, possibly with mark word displaced");
 822       HeapWord* addr = (HeapWord*)obj;
 823       if (_g1h->is_in_g1_reserved(addr)) {
 824         _cm->grayRoot(obj);
 825       }
 826     }
 827     if (_do_barrier) {
 828       assert(!_g1h->is_in_g1_reserved(p),
 829              "Should be called on external roots");
 830       do_barrier(p);
 831     }
 832   }
 833 };
 834 
 835 void ConcurrentMark::checkpointRootsInitialPost() {
 836   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 837 
 838   // For each region note start of marking.
 839   NoteStartOfMarkHRClosure startcl;
 840   g1h->heap_region_iterate(&startcl);
 841 
 842   // Start weak-reference discovery.
 843   ReferenceProcessor* rp = g1h->ref_processor();
 844   rp->verify_no_references_recorded();
 845   rp->enable_discovery(); // enable ("weak") refs discovery
 846   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 847 
 848   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 849   // This is the start of  the marking cycle, we're expected all
 850   // threads to have SATB queues with active set to false.
 851   satb_mq_set.set_active_all_threads(true, /* new active value */
 852                                      false /* expected_active */);
 853 
 854   // update_g1_committed() will be called at the end of an evac pause
 855   // when marking is on. So, it's also called at the end of the
 856   // initial-mark pause to update the heap end, if the heap expands
 857   // during it. No need to call it here.
 858 }
 859 
 860 // Checkpoint the roots into this generation from outside
 861 // this generation. [Note this initial checkpoint need only
 862 // be approximate -- we'll do a catch up phase subsequently.]
 863 void ConcurrentMark::checkpointRootsInitial() {
 864   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 865   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 866 
 867   double start = os::elapsedTime();
 868 
 869   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 870   g1p->record_concurrent_mark_init_start();
 871   checkpointRootsInitialPre();
 872 
 873   // YSR: when concurrent precleaning is in place, we'll
 874   // need to clear the cached card table here
 875 
 876   ResourceMark rm;
 877   HandleMark  hm;
 878 
 879   g1h->ensure_parsability(false);
 880   g1h->perm_gen()->save_marks();
 881 
 882   CMMarkRootsClosure notOlder(this, g1h, false);
 883   CMMarkRootsClosure older(this, g1h, true);
 884 
 885   g1h->set_marking_started();
 886   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 887 
 888   g1h->process_strong_roots(true,    // activate StrongRootsScope
 889                             false,   // fake perm gen collection
 890                             SharedHeap::SO_AllClasses,
 891                             &notOlder, // Regular roots
 892                             NULL,     // do not visit active blobs
 893                             &older    // Perm Gen Roots
 894                             );
 895   checkpointRootsInitialPost();
 896 
 897   // Statistics.
 898   double end = os::elapsedTime();
 899   _init_times.add((end - start) * 1000.0);
 900 
 901   g1p->record_concurrent_mark_init_end();
 902 }
 903 
 904 /*
 905    Notice that in the next two methods, we actually leave the STS
 906    during the barrier sync and join it immediately afterwards. If we
 907    do not do this, this then the following deadlock can occur: one
 908    thread could be in the barrier sync code, waiting for the other
 909    thread to also sync up, whereas another one could be trying to
 910    yield, while also waiting for the other threads to sync up too.
 911 
 912    Because the thread that does the sync barrier has left the STS, it
 913    is possible to be suspended for a Full GC or an evacuation pause
 914    could occur. This is actually safe, since the entering the sync
 915    barrier is one of the last things do_marking_step() does, and it
 916    doesn't manipulate any data structures afterwards.
 917 */
 918 
 919 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 920   if (verbose_low())
 921     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 922 
 923   ConcurrentGCThread::stsLeave();
 924   _first_overflow_barrier_sync.enter();
 925   ConcurrentGCThread::stsJoin();
 926   // at this point everyone should have synced up and not be doing any
 927   // more work
 928 
 929   if (verbose_low())
 930     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 931 
 932   // let task 0 do this
 933   if (task_num == 0) {
 934     // task 0 is responsible for clearing the global data structures
 935     clear_marking_state();
 936 
 937     if (PrintGC) {
 938       gclog_or_tty->date_stamp(PrintGCDateStamps);
 939       gclog_or_tty->stamp(PrintGCTimeStamps);
 940       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 941     }
 942   }
 943 
 944   // after this, each task should reset its own data structures then
 945   // then go into the second barrier
 946 }
 947 
 948 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 949   if (verbose_low())
 950     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 951 
 952   ConcurrentGCThread::stsLeave();
 953   _second_overflow_barrier_sync.enter();
 954   ConcurrentGCThread::stsJoin();
 955   // at this point everything should be re-initialised and ready to go
 956 
 957   if (verbose_low())
 958     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 959 }
 960 
 961 void ConcurrentMark::grayRoot(oop p) {
 962   HeapWord* addr = (HeapWord*) p;
 963   // We can't really check against _heap_start and _heap_end, since it
 964   // is possible during an evacuation pause with piggy-backed
 965   // initial-mark that the committed space is expanded during the
 966   // pause without CM observing this change. So the assertions below
 967   // is a bit conservative; but better than nothing.
 968   assert(_g1h->g1_committed().contains(addr),
 969          "address should be within the heap bounds");
 970 
 971   if (!_nextMarkBitMap->isMarked(addr))
 972     _nextMarkBitMap->parMark(addr);
 973 }
 974 
 975 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 976   // The objects on the region have already been marked "in bulk" by
 977   // the caller. We only need to decide whether to push the region on
 978   // the region stack or not.
 979 
 980   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 981     // We're done with marking and waiting for remark. We do not need to
 982     // push anything else on the region stack.
 983     return;
 984 
 985   HeapWord* finger = _finger;
 986 
 987   if (verbose_low())
 988     gclog_or_tty->print_cr("[global] attempting to push "
 989                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 990                            PTR_FORMAT, mr.start(), mr.end(), finger);
 991 
 992   if (mr.start() < finger) {
 993     // The finger is always heap region aligned and it is not possible
 994     // for mr to span heap regions.
 995     assert(mr.end() <= finger, "invariant");
 996 
 997     // Separated the asserts so that we know which one fires.
 998     assert(mr.start() <= mr.end(),
 999            "region boundaries should fall within the committed space");
1000     assert(_heap_start <= mr.start(),
1001            "region boundaries should fall within the committed space");
1002     assert(mr.end() <= _heap_end,
1003            "region boundaries should fall within the committed space");
1004     if (verbose_low())
1005       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1006                              "below the finger, pushing it",
1007                              mr.start(), mr.end());
1008 
1009     if (!region_stack_push_lock_free(mr)) {
1010       if (verbose_low())
1011         gclog_or_tty->print_cr("[global] region stack has overflown.");
1012     }
1013   }
1014 }
1015 
1016 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1017   // The object is not marked by the caller. We need to at least mark
1018   // it and maybe push in on the stack.
1019 
1020   HeapWord* addr = (HeapWord*)p;
1021   if (!_nextMarkBitMap->isMarked(addr)) {
1022     // We definitely need to mark it, irrespective whether we bail out
1023     // because we're done with marking.
1024     if (_nextMarkBitMap->parMark(addr)) {
1025       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1026         // If we're done with concurrent marking and we're waiting for
1027         // remark, then we're not pushing anything on the stack.
1028         return;
1029 
1030       // No OrderAccess:store_load() is needed. It is implicit in the
1031       // CAS done in parMark(addr) above
1032       HeapWord* finger = _finger;
1033 
1034       if (addr < finger) {
1035         if (!mark_stack_push(oop(addr))) {
1036           if (verbose_low())
1037             gclog_or_tty->print_cr("[global] global stack overflow "
1038                                    "during parMark");
1039         }
1040       }
1041     }
1042   }
1043 }
1044 
1045 class CMConcurrentMarkingTask: public AbstractGangTask {
1046 private:
1047   ConcurrentMark*       _cm;
1048   ConcurrentMarkThread* _cmt;
1049 
1050 public:
1051   void work(int worker_i) {
1052     assert(Thread::current()->is_ConcurrentGC_thread(),
1053            "this should only be done by a conc GC thread");
1054 
1055     double start_vtime = os::elapsedVTime();
1056 
1057     ConcurrentGCThread::stsJoin();
1058 
1059     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1060     CMTask* the_task = _cm->task(worker_i);
1061     the_task->record_start_time();
1062     if (!_cm->has_aborted()) {
1063       do {
1064         double start_vtime_sec = os::elapsedVTime();
1065         double start_time_sec = os::elapsedTime();
1066         the_task->do_marking_step(10.0);
1067         double end_time_sec = os::elapsedTime();
1068         double end_vtime_sec = os::elapsedVTime();
1069         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1070         double elapsed_time_sec = end_time_sec - start_time_sec;
1071         _cm->clear_has_overflown();
1072 
1073         bool ret = _cm->do_yield_check(worker_i);
1074 
1075         jlong sleep_time_ms;
1076         if (!_cm->has_aborted() && the_task->has_aborted()) {
1077           sleep_time_ms =
1078             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1079           ConcurrentGCThread::stsLeave();
1080           os::sleep(Thread::current(), sleep_time_ms, false);
1081           ConcurrentGCThread::stsJoin();
1082         }
1083         double end_time2_sec = os::elapsedTime();
1084         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1085 
1086 #if 0
1087           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1088                                  "overhead %1.4lf",
1089                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1090                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1091           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1092                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1093 #endif
1094       } while (!_cm->has_aborted() && the_task->has_aborted());
1095     }
1096     the_task->record_end_time();
1097     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1098 
1099     ConcurrentGCThread::stsLeave();
1100 
1101     double end_vtime = os::elapsedVTime();
1102     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1103   }
1104 
1105   CMConcurrentMarkingTask(ConcurrentMark* cm,
1106                           ConcurrentMarkThread* cmt) :
1107       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1108 
1109   ~CMConcurrentMarkingTask() { }
1110 };
1111 
1112 void ConcurrentMark::markFromRoots() {
1113   // we might be tempted to assert that:
1114   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1115   //        "inconsistent argument?");
1116   // However that wouldn't be right, because it's possible that
1117   // a safepoint is indeed in progress as a younger generation
1118   // stop-the-world GC happens even as we mark in this generation.
1119 
1120   _restart_for_overflow = false;
1121 
1122   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1123 
1124   CMConcurrentMarkingTask markingTask(this, cmThread());
1125   if (parallel_marking_threads() > 0)
1126     _parallel_workers->run_task(&markingTask);
1127   else
1128     markingTask.work(0);
1129   print_stats();
1130 }
1131 
1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133   // world is stopped at this checkpoint
1134   assert(SafepointSynchronize::is_at_safepoint(),
1135          "world should be stopped");
1136   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1137 
1138   // If a full collection has happened, we shouldn't do this.
1139   if (has_aborted()) {
1140     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1141     return;
1142   }
1143 
1144   if (VerifyDuringGC) {
1145     HandleMark hm;  // handle scope
1146     gclog_or_tty->print(" VerifyDuringGC:(before)");
1147     Universe::heap()->prepare_for_verify();
1148     Universe::verify(true, false, true);
1149   }
1150 
1151   G1CollectorPolicy* g1p = g1h->g1_policy();
1152   g1p->record_concurrent_mark_remark_start();
1153 
1154   double start = os::elapsedTime();
1155 
1156   checkpointRootsFinalWork();
1157 
1158   double mark_work_end = os::elapsedTime();
1159 
1160   weakRefsWork(clear_all_soft_refs);
1161 
1162   if (has_overflown()) {
1163     // Oops.  We overflowed.  Restart concurrent marking.
1164     _restart_for_overflow = true;
1165     // Clear the flag. We do not need it any more.
1166     clear_has_overflown();
1167     if (G1TraceMarkStackOverflow)
1168       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1169   } else {
1170     // We're done with marking.
1171     // This is the end of  the marking cycle, we're expected all
1172     // threads to have SATB queues with active set to true.
1173     JavaThread::satb_mark_queue_set().set_active_all_threads(
1174                                                   false, /* new active value */
1175                                                   true /* expected_active */);
1176 
1177     if (VerifyDuringGC) {
1178       HandleMark hm;  // handle scope
1179       gclog_or_tty->print(" VerifyDuringGC:(after)");
1180       Universe::heap()->prepare_for_verify();
1181       Universe::heap()->verify(/* allow_dirty */      true,
1182                                /* silent */           false,
1183                                /* use_prev_marking */ false);
1184     }
1185   }
1186 
1187 #if VERIFY_OBJS_PROCESSED
1188   _scan_obj_cl.objs_processed = 0;
1189   ThreadLocalObjQueue::objs_enqueued = 0;
1190 #endif
1191 
1192   // Statistics
1193   double now = os::elapsedTime();
1194   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1195   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1196   _remark_times.add((now - start) * 1000.0);
1197 
1198   g1p->record_concurrent_mark_remark_end();
1199 }
1200 
1201 
1202 #define CARD_BM_TEST_MODE 0
1203 
1204 class CalcLiveObjectsClosure: public HeapRegionClosure {
1205 
1206   CMBitMapRO* _bm;
1207   ConcurrentMark* _cm;
1208   bool _changed;
1209   bool _yield;
1210   size_t _words_done;
1211   size_t _tot_live;
1212   size_t _tot_used;
1213   size_t _regions_done;
1214   double _start_vtime_sec;
1215 
1216   BitMap* _region_bm;
1217   BitMap* _card_bm;
1218   intptr_t _bottom_card_num;
1219   bool _final;
1220 
1221   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1222     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1223 #if CARD_BM_TEST_MODE
1224       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1225 #else
1226       _card_bm->par_at_put(i - _bottom_card_num, 1);
1227 #endif
1228     }
1229   }
1230 
1231 public:
1232   CalcLiveObjectsClosure(bool final,
1233                          CMBitMapRO *bm, ConcurrentMark *cm,
1234                          BitMap* region_bm, BitMap* card_bm) :
1235     _bm(bm), _cm(cm), _changed(false), _yield(true),
1236     _words_done(0), _tot_live(0), _tot_used(0),
1237     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1238     _regions_done(0), _start_vtime_sec(0.0)
1239   {
1240     _bottom_card_num =
1241       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1242                CardTableModRefBS::card_shift);
1243   }
1244 
1245   // It takes a region that's not empty (i.e., it has at least one
1246   // live object in it and sets its corresponding bit on the region
1247   // bitmap to 1. If the region is "starts humongous" it will also set
1248   // to 1 the bits on the region bitmap that correspond to its
1249   // associated "continues humongous" regions.
1250   void set_bit_for_region(HeapRegion* hr) {
1251     assert(!hr->continuesHumongous(), "should have filtered those out");
1252 
1253     size_t index = hr->hrs_index();
1254     if (!hr->startsHumongous()) {
1255       // Normal (non-humongous) case: just set the bit.
1256       _region_bm->par_at_put((BitMap::idx_t) index, true);
1257     } else {
1258       // Starts humongous case: calculate how many regions are part of
1259       // this humongous region and then set the bit range. It might
1260       // have been a bit more efficient to look at the object that
1261       // spans these humongous regions to calculate their number from
1262       // the object's size. However, it's a good idea to calculate
1263       // this based on the metadata itself, and not the region
1264       // contents, so that this code is not aware of what goes into
1265       // the humongous regions (in case this changes in the future).
1266       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1267       size_t end_index = index + 1;
1268       while (end_index < g1h->n_regions()) {
1269         HeapRegion* chr = g1h->region_at(end_index);
1270         if (!chr->continuesHumongous()) {
1271           break;
1272         }
1273         end_index += 1;
1274       }
1275       _region_bm->par_at_put_range((BitMap::idx_t) index,
1276                                    (BitMap::idx_t) end_index, true);
1277     }
1278   }
1279 
1280   bool doHeapRegion(HeapRegion* hr) {
1281     if (!_final && _regions_done == 0)
1282       _start_vtime_sec = os::elapsedVTime();
1283 
1284     if (hr->continuesHumongous()) {
1285       // We will ignore these here and process them when their
1286       // associated "starts humongous" region is processed (see
1287       // set_bit_for_heap_region()). Note that we cannot rely on their
1288       // associated "starts humongous" region to have their bit set to
1289       // 1 since, due to the region chunking in the parallel region
1290       // iteration, a "continues humongous" region might be visited
1291       // before its associated "starts humongous".
1292       return false;
1293     }
1294 
1295     HeapWord* nextTop = hr->next_top_at_mark_start();
1296     HeapWord* start   = hr->top_at_conc_mark_count();
1297     assert(hr->bottom() <= start && start <= hr->end() &&
1298            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1299            start <= nextTop,
1300            "Preconditions.");
1301     // Otherwise, record the number of word's we'll examine.
1302     size_t words_done = (nextTop - start);
1303     // Find the first marked object at or after "start".
1304     start = _bm->getNextMarkedWordAddress(start, nextTop);
1305     size_t marked_bytes = 0;
1306 
1307     // Below, the term "card num" means the result of shifting an address
1308     // by the card shift -- address 0 corresponds to card number 0.  One
1309     // must subtract the card num of the bottom of the heap to obtain a
1310     // card table index.
1311     // The first card num of the sequence of live cards currently being
1312     // constructed.  -1 ==> no sequence.
1313     intptr_t start_card_num = -1;
1314     // The last card num of the sequence of live cards currently being
1315     // constructed.  -1 ==> no sequence.
1316     intptr_t last_card_num = -1;
1317 
1318     while (start < nextTop) {
1319       if (_yield && _cm->do_yield_check()) {
1320         // We yielded.  It might be for a full collection, in which case
1321         // all bets are off; terminate the traversal.
1322         if (_cm->has_aborted()) {
1323           _changed = false;
1324           return true;
1325         } else {
1326           // Otherwise, it might be a collection pause, and the region
1327           // we're looking at might be in the collection set.  We'll
1328           // abandon this region.
1329           return false;
1330         }
1331       }
1332       oop obj = oop(start);
1333       int obj_sz = obj->size();
1334       // The card num of the start of the current object.
1335       intptr_t obj_card_num =
1336         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1337 
1338       HeapWord* obj_last = start + obj_sz - 1;
1339       intptr_t obj_last_card_num =
1340         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1341 
1342       if (obj_card_num != last_card_num) {
1343         if (start_card_num == -1) {
1344           assert(last_card_num == -1, "Both or neither.");
1345           start_card_num = obj_card_num;
1346         } else {
1347           assert(last_card_num != -1, "Both or neither.");
1348           assert(obj_card_num >= last_card_num, "Inv");
1349           if ((obj_card_num - last_card_num) > 1) {
1350             // Mark the last run, and start a new one.
1351             mark_card_num_range(start_card_num, last_card_num);
1352             start_card_num = obj_card_num;
1353           }
1354         }
1355 #if CARD_BM_TEST_MODE
1356         /*
1357         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1358                                obj_card_num - _bottom_card_num,
1359                                obj_last_card_num - _bottom_card_num);
1360         */
1361         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1362           _card_bm->par_at_put(j - _bottom_card_num, 1);
1363         }
1364 #endif
1365       }
1366       // In any case, we set the last card num.
1367       last_card_num = obj_last_card_num;
1368 
1369       marked_bytes += (size_t)obj_sz * HeapWordSize;
1370       // Find the next marked object after this one.
1371       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1372       _changed = true;
1373     }
1374     // Handle the last range, if any.
1375     if (start_card_num != -1)
1376       mark_card_num_range(start_card_num, last_card_num);
1377     if (_final) {
1378       // Mark the allocated-since-marking portion...
1379       HeapWord* tp = hr->top();
1380       if (nextTop < tp) {
1381         start_card_num =
1382           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1383         last_card_num =
1384           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1385         mark_card_num_range(start_card_num, last_card_num);
1386         // This definitely means the region has live objects.
1387         set_bit_for_region(hr);
1388       }
1389     }
1390 
1391     hr->add_to_marked_bytes(marked_bytes);
1392     // Update the live region bitmap.
1393     if (marked_bytes > 0) {
1394       set_bit_for_region(hr);
1395     }
1396     hr->set_top_at_conc_mark_count(nextTop);
1397     _tot_live += hr->next_live_bytes();
1398     _tot_used += hr->used();
1399     _words_done = words_done;
1400 
1401     if (!_final) {
1402       ++_regions_done;
1403       if (_regions_done % 10 == 0) {
1404         double end_vtime_sec = os::elapsedVTime();
1405         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1406         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1407           jlong sleep_time_ms =
1408             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1409           os::sleep(Thread::current(), sleep_time_ms, false);
1410           _start_vtime_sec = end_vtime_sec;
1411         }
1412       }
1413     }
1414 
1415     return false;
1416   }
1417 
1418   bool changed() { return _changed;  }
1419   void reset()   { _changed = false; _words_done = 0; }
1420   void no_yield() { _yield = false; }
1421   size_t words_done() { return _words_done; }
1422   size_t tot_live() { return _tot_live; }
1423   size_t tot_used() { return _tot_used; }
1424 };
1425 
1426 
1427 void ConcurrentMark::calcDesiredRegions() {
1428   _region_bm.clear();
1429   _card_bm.clear();
1430   CalcLiveObjectsClosure calccl(false /*final*/,
1431                                 nextMarkBitMap(), this,
1432                                 &_region_bm, &_card_bm);
1433   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1434   g1h->heap_region_iterate(&calccl);
1435 
1436   do {
1437     calccl.reset();
1438     g1h->heap_region_iterate(&calccl);
1439   } while (calccl.changed());
1440 }
1441 
1442 class G1ParFinalCountTask: public AbstractGangTask {
1443 protected:
1444   G1CollectedHeap* _g1h;
1445   CMBitMap* _bm;
1446   size_t _n_workers;
1447   size_t *_live_bytes;
1448   size_t *_used_bytes;
1449   BitMap* _region_bm;
1450   BitMap* _card_bm;
1451 public:
1452   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1453                       BitMap* region_bm, BitMap* card_bm) :
1454     AbstractGangTask("G1 final counting"), _g1h(g1h),
1455     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1456   {
1457     if (ParallelGCThreads > 0)
1458       _n_workers = _g1h->workers()->total_workers();
1459     else
1460       _n_workers = 1;
1461     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1462     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1463   }
1464 
1465   ~G1ParFinalCountTask() {
1466     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1467     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1468   }
1469 
1470   void work(int i) {
1471     CalcLiveObjectsClosure calccl(true /*final*/,
1472                                   _bm, _g1h->concurrent_mark(),
1473                                   _region_bm, _card_bm);
1474     calccl.no_yield();
1475     if (G1CollectedHeap::use_parallel_gc_threads()) {
1476       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1477                                             HeapRegion::FinalCountClaimValue);
1478     } else {
1479       _g1h->heap_region_iterate(&calccl);
1480     }
1481     assert(calccl.complete(), "Shouldn't have yielded!");
1482 
1483     assert((size_t) i < _n_workers, "invariant");
1484     _live_bytes[i] = calccl.tot_live();
1485     _used_bytes[i] = calccl.tot_used();
1486   }
1487   size_t live_bytes()  {
1488     size_t live_bytes = 0;
1489     for (size_t i = 0; i < _n_workers; ++i)
1490       live_bytes += _live_bytes[i];
1491     return live_bytes;
1492   }
1493   size_t used_bytes()  {
1494     size_t used_bytes = 0;
1495     for (size_t i = 0; i < _n_workers; ++i)
1496       used_bytes += _used_bytes[i];
1497     return used_bytes;
1498   }
1499 };
1500 
1501 class G1ParNoteEndTask;
1502 
1503 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1504   G1CollectedHeap* _g1;
1505   int _worker_num;
1506   size_t _max_live_bytes;
1507   size_t _regions_claimed;
1508   size_t _freed_bytes;
1509   size_t _cleared_h_regions;
1510   size_t _freed_regions;
1511   UncleanRegionList* _unclean_region_list;
1512   double _claimed_region_time;
1513   double _max_region_time;
1514 
1515 public:
1516   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1517                              UncleanRegionList* list,
1518                              int worker_num);
1519   size_t freed_bytes() { return _freed_bytes; }
1520   size_t cleared_h_regions() { return _cleared_h_regions; }
1521   size_t freed_regions() { return  _freed_regions; }
1522   UncleanRegionList* unclean_region_list() {
1523     return _unclean_region_list;
1524   }
1525 
1526   bool doHeapRegion(HeapRegion *r);
1527 
1528   size_t max_live_bytes() { return _max_live_bytes; }
1529   size_t regions_claimed() { return _regions_claimed; }
1530   double claimed_region_time_sec() { return _claimed_region_time; }
1531   double max_region_time_sec() { return _max_region_time; }
1532 };
1533 
1534 class G1ParNoteEndTask: public AbstractGangTask {
1535   friend class G1NoteEndOfConcMarkClosure;
1536 protected:
1537   G1CollectedHeap* _g1h;
1538   size_t _max_live_bytes;
1539   size_t _freed_bytes;
1540   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1541 public:
1542   G1ParNoteEndTask(G1CollectedHeap* g1h,
1543                    ConcurrentMark::ParCleanupThreadState**
1544                    par_cleanup_thread_state) :
1545     AbstractGangTask("G1 note end"), _g1h(g1h),
1546     _max_live_bytes(0), _freed_bytes(0),
1547     _par_cleanup_thread_state(par_cleanup_thread_state)
1548   {}
1549 
1550   void work(int i) {
1551     double start = os::elapsedTime();
1552     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1553                                            &_par_cleanup_thread_state[i]->list,
1554                                            i);
1555     if (G1CollectedHeap::use_parallel_gc_threads()) {
1556       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1557                                             HeapRegion::NoteEndClaimValue);
1558     } else {
1559       _g1h->heap_region_iterate(&g1_note_end);
1560     }
1561     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1562 
1563     // Now finish up freeing the current thread's regions.
1564     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1565                                   g1_note_end.cleared_h_regions(),
1566                                   0, NULL);
1567     {
1568       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1569       _max_live_bytes += g1_note_end.max_live_bytes();
1570       _freed_bytes += g1_note_end.freed_bytes();
1571     }
1572     double end = os::elapsedTime();
1573     if (G1PrintParCleanupStats) {
1574       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1575                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1576                           i, start, end, (end-start)*1000.0,
1577                           g1_note_end.regions_claimed(),
1578                           g1_note_end.claimed_region_time_sec()*1000.0,
1579                           g1_note_end.max_region_time_sec()*1000.0);
1580     }
1581   }
1582   size_t max_live_bytes() { return _max_live_bytes; }
1583   size_t freed_bytes() { return _freed_bytes; }
1584 };
1585 
1586 class G1ParScrubRemSetTask: public AbstractGangTask {
1587 protected:
1588   G1RemSet* _g1rs;
1589   BitMap* _region_bm;
1590   BitMap* _card_bm;
1591 public:
1592   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1593                        BitMap* region_bm, BitMap* card_bm) :
1594     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1595     _region_bm(region_bm), _card_bm(card_bm)
1596   {}
1597 
1598   void work(int i) {
1599     if (G1CollectedHeap::use_parallel_gc_threads()) {
1600       _g1rs->scrub_par(_region_bm, _card_bm, i,
1601                        HeapRegion::ScrubRemSetClaimValue);
1602     } else {
1603       _g1rs->scrub(_region_bm, _card_bm);
1604     }
1605   }
1606 
1607 };
1608 
1609 G1NoteEndOfConcMarkClosure::
1610 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1611                            UncleanRegionList* list,
1612                            int worker_num)
1613   : _g1(g1), _worker_num(worker_num),
1614     _max_live_bytes(0), _regions_claimed(0),
1615     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1616     _claimed_region_time(0.0), _max_region_time(0.0),
1617     _unclean_region_list(list)
1618 {}
1619 
1620 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1621   // We use a claim value of zero here because all regions
1622   // were claimed with value 1 in the FinalCount task.
1623   r->reset_gc_time_stamp();
1624   if (!r->continuesHumongous()) {
1625     double start = os::elapsedTime();
1626     _regions_claimed++;
1627     r->note_end_of_marking();
1628     _max_live_bytes += r->max_live_bytes();
1629     _g1->free_region_if_totally_empty_work(r,
1630                                            _freed_bytes,
1631                                            _cleared_h_regions,
1632                                            _freed_regions,
1633                                            _unclean_region_list,
1634                                            true /*par*/);
1635     double region_time = (os::elapsedTime() - start);
1636     _claimed_region_time += region_time;
1637     if (region_time > _max_region_time) _max_region_time = region_time;
1638   }
1639   return false;
1640 }
1641 
1642 void ConcurrentMark::cleanup() {
1643   // world is stopped at this checkpoint
1644   assert(SafepointSynchronize::is_at_safepoint(),
1645          "world should be stopped");
1646   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1647 
1648   // If a full collection has happened, we shouldn't do this.
1649   if (has_aborted()) {
1650     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1651     return;
1652   }
1653 
1654   if (VerifyDuringGC) {
1655     HandleMark hm;  // handle scope
1656     gclog_or_tty->print(" VerifyDuringGC:(before)");
1657     Universe::heap()->prepare_for_verify();
1658     Universe::verify(/* allow dirty  */ true,
1659                      /* silent       */ false,
1660                      /* prev marking */ true);
1661   }
1662 
1663   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1664   g1p->record_concurrent_mark_cleanup_start();
1665 
1666   double start = os::elapsedTime();
1667 
1668   // Do counting once more with the world stopped for good measure.
1669   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1670                                         &_region_bm, &_card_bm);
1671   if (G1CollectedHeap::use_parallel_gc_threads()) {
1672     assert(g1h->check_heap_region_claim_values(
1673                                                HeapRegion::InitialClaimValue),
1674            "sanity check");
1675 
1676     int n_workers = g1h->workers()->total_workers();
1677     g1h->set_par_threads(n_workers);
1678     g1h->workers()->run_task(&g1_par_count_task);
1679     g1h->set_par_threads(0);
1680 
1681     assert(g1h->check_heap_region_claim_values(
1682                                              HeapRegion::FinalCountClaimValue),
1683            "sanity check");
1684   } else {
1685     g1_par_count_task.work(0);
1686   }
1687 
1688   size_t known_garbage_bytes =
1689     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1690 #if 0
1691   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1692                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1693                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1694                          (double) known_garbage_bytes / (double) (1024 * 1024));
1695 #endif // 0
1696   g1p->set_known_garbage_bytes(known_garbage_bytes);
1697 
1698   size_t start_used_bytes = g1h->used();
1699   _at_least_one_mark_complete = true;
1700   g1h->set_marking_complete();
1701 
1702   double count_end = os::elapsedTime();
1703   double this_final_counting_time = (count_end - start);
1704   if (G1PrintParCleanupStats) {
1705     gclog_or_tty->print_cr("Cleanup:");
1706     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1707                            this_final_counting_time*1000.0);
1708   }
1709   _total_counting_time += this_final_counting_time;
1710 
1711   // Install newly created mark bitMap as "prev".
1712   swapMarkBitMaps();
1713 
1714   g1h->reset_gc_time_stamp();
1715 
1716   // Note end of marking in all heap regions.
1717   double note_end_start = os::elapsedTime();
1718   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1719   if (G1CollectedHeap::use_parallel_gc_threads()) {
1720     int n_workers = g1h->workers()->total_workers();
1721     g1h->set_par_threads(n_workers);
1722     g1h->workers()->run_task(&g1_par_note_end_task);
1723     g1h->set_par_threads(0);
1724 
1725     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1726            "sanity check");
1727   } else {
1728     g1_par_note_end_task.work(0);
1729   }
1730   g1h->set_unclean_regions_coming(true);
1731   double note_end_end = os::elapsedTime();
1732   // Tell the mutators that there might be unclean regions coming...
1733   if (G1PrintParCleanupStats) {
1734     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1735                            (note_end_end - note_end_start)*1000.0);
1736   }
1737 
1738 
1739   // call below, since it affects the metric by which we sort the heap
1740   // regions.
1741   if (G1ScrubRemSets) {
1742     double rs_scrub_start = os::elapsedTime();
1743     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1744     if (G1CollectedHeap::use_parallel_gc_threads()) {
1745       int n_workers = g1h->workers()->total_workers();
1746       g1h->set_par_threads(n_workers);
1747       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1748       g1h->set_par_threads(0);
1749 
1750       assert(g1h->check_heap_region_claim_values(
1751                                             HeapRegion::ScrubRemSetClaimValue),
1752              "sanity check");
1753     } else {
1754       g1_par_scrub_rs_task.work(0);
1755     }
1756 
1757     double rs_scrub_end = os::elapsedTime();
1758     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1759     _total_rs_scrub_time += this_rs_scrub_time;
1760   }
1761 
1762   // this will also free any regions totally full of garbage objects,
1763   // and sort the regions.
1764   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1765                         g1_par_note_end_task.freed_bytes(),
1766                         g1_par_note_end_task.max_live_bytes());
1767 
1768   // Statistics.
1769   double end = os::elapsedTime();
1770   _cleanup_times.add((end - start) * 1000.0);
1771 
1772   // G1CollectedHeap::heap()->print();
1773   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1774   // G1CollectedHeap::heap()->get_gc_time_stamp());
1775 
1776   if (PrintGC || PrintGCDetails) {
1777     g1h->print_size_transition(gclog_or_tty,
1778                                start_used_bytes,
1779                                g1h->used(),
1780                                g1h->capacity());
1781   }
1782 
1783   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1784   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1785 
1786   // We need to make this be a "collection" so any collection pause that
1787   // races with it goes around and waits for completeCleanup to finish.
1788   g1h->increment_total_collections();
1789 
1790   if (VerifyDuringGC) {
1791     HandleMark hm;  // handle scope
1792     gclog_or_tty->print(" VerifyDuringGC:(after)");
1793     Universe::heap()->prepare_for_verify();
1794     Universe::verify(/* allow dirty  */ true,
1795                      /* silent       */ false,
1796                      /* prev marking */ true);
1797   }
1798 }
1799 
1800 void ConcurrentMark::completeCleanup() {
1801   // A full collection intervened.
1802   if (has_aborted()) return;
1803 
1804   int first = 0;
1805   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1806   for (int t = 0; t < last; t++) {
1807     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1808     assert(list->well_formed(), "Inv");
1809     HeapRegion* hd = list->hd();
1810     while (hd != NULL) {
1811       // Now finish up the other stuff.
1812       hd->rem_set()->clear();
1813       HeapRegion* next_hd = hd->next_from_unclean_list();
1814       (void)list->pop();
1815       assert(list->hd() == next_hd, "how not?");
1816       _g1h->put_region_on_unclean_list(hd);
1817       if (!hd->isHumongous()) {
1818         // Add this to the _free_regions count by 1.
1819         _g1h->finish_free_region_work(0, 0, 1, NULL);
1820       }
1821       hd = list->hd();
1822       assert(hd == next_hd, "how not?");
1823     }
1824   }
1825 }
1826 
1827 
1828 class G1CMIsAliveClosure: public BoolObjectClosure {
1829   G1CollectedHeap* _g1;
1830  public:
1831   G1CMIsAliveClosure(G1CollectedHeap* g1) :
1832     _g1(g1)
1833   {}
1834 
1835   void do_object(oop obj) {
1836     assert(false, "not to be invoked");
1837   }
1838   bool do_object_b(oop obj) {
1839     HeapWord* addr = (HeapWord*)obj;
1840     return addr != NULL &&
1841            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1842   }
1843 };
1844 
1845 class G1CMKeepAliveClosure: public OopClosure {
1846   G1CollectedHeap* _g1;
1847   ConcurrentMark*  _cm;
1848   CMBitMap*        _bitMap;
1849  public:
1850   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1851                        CMBitMap* bitMap) :
1852     _g1(g1), _cm(cm),
1853     _bitMap(bitMap) {}
1854 
1855   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1856   virtual void do_oop(      oop* p) { do_oop_work(p); }
1857 
1858   template <class T> void do_oop_work(T* p) {
1859     oop thisOop = oopDesc::load_decode_heap_oop(p);
1860     HeapWord* addr = (HeapWord*)thisOop;
1861     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1862       _bitMap->mark(addr);
1863       _cm->mark_stack_push(thisOop);
1864     }
1865   }
1866 };
1867 
1868 class G1CMDrainMarkingStackClosure: public VoidClosure {
1869   CMMarkStack*                  _markStack;
1870   CMBitMap*                     _bitMap;
1871   G1CMKeepAliveClosure*         _oopClosure;
1872  public:
1873   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1874                                G1CMKeepAliveClosure* oopClosure) :
1875     _bitMap(bitMap),
1876     _markStack(markStack),
1877     _oopClosure(oopClosure)
1878   {}
1879 
1880   void do_void() {
1881     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1882   }
1883 };
1884 
1885 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1886   ResourceMark rm;
1887   HandleMark   hm;
1888   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
1889   ReferenceProcessor* rp = g1h->ref_processor();
1890 
1891   // Process weak references.
1892   rp->setup_policy(clear_all_soft_refs);
1893   assert(_markStack.isEmpty(), "mark stack should be empty");
1894 
1895   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
1896   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1897   G1CMDrainMarkingStackClosure
1898     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1899                                &g1KeepAliveClosure);
1900 
1901   // XXXYYY  Also: copy the parallel ref processing code from CMS.
1902   rp->process_discovered_references(&g1IsAliveClosure,
1903                                     &g1KeepAliveClosure,
1904                                     &g1DrainMarkingStackClosure,
1905                                     NULL);
1906   assert(_markStack.overflow() || _markStack.isEmpty(),
1907          "mark stack should be empty (unless it overflowed)");
1908   if (_markStack.overflow()) {
1909     set_has_overflown();
1910   }
1911 
1912   rp->enqueue_discovered_references();
1913   rp->verify_no_references_recorded();
1914   assert(!rp->discovery_enabled(), "should have been disabled");
1915 
1916   // Now clean up stale oops in SymbolTable and StringTable
1917   SymbolTable::unlink(&g1IsAliveClosure);
1918   StringTable::unlink(&g1IsAliveClosure);
1919 }
1920 
1921 void ConcurrentMark::swapMarkBitMaps() {
1922   CMBitMapRO* temp = _prevMarkBitMap;
1923   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
1924   _nextMarkBitMap  = (CMBitMap*)  temp;
1925 }
1926 
1927 class CMRemarkTask: public AbstractGangTask {
1928 private:
1929   ConcurrentMark *_cm;
1930 
1931 public:
1932   void work(int worker_i) {
1933     // Since all available tasks are actually started, we should
1934     // only proceed if we're supposed to be actived.
1935     if ((size_t)worker_i < _cm->active_tasks()) {
1936       CMTask* task = _cm->task(worker_i);
1937       task->record_start_time();
1938       do {
1939         task->do_marking_step(1000000000.0 /* something very large */);
1940       } while (task->has_aborted() && !_cm->has_overflown());
1941       // If we overflow, then we do not want to restart. We instead
1942       // want to abort remark and do concurrent marking again.
1943       task->record_end_time();
1944     }
1945   }
1946 
1947   CMRemarkTask(ConcurrentMark* cm) :
1948     AbstractGangTask("Par Remark"), _cm(cm) { }
1949 };
1950 
1951 void ConcurrentMark::checkpointRootsFinalWork() {
1952   ResourceMark rm;
1953   HandleMark   hm;
1954   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1955 
1956   g1h->ensure_parsability(false);
1957 
1958   if (G1CollectedHeap::use_parallel_gc_threads()) {
1959     G1CollectedHeap::StrongRootsScope srs(g1h);
1960     // this is remark, so we'll use up all available threads
1961     int active_workers = ParallelGCThreads;
1962     set_phase(active_workers, false);
1963 
1964     CMRemarkTask remarkTask(this);
1965     // We will start all available threads, even if we decide that the
1966     // active_workers will be fewer. The extra ones will just bail out
1967     // immediately.
1968     int n_workers = g1h->workers()->total_workers();
1969     g1h->set_par_threads(n_workers);
1970     g1h->workers()->run_task(&remarkTask);
1971     g1h->set_par_threads(0);
1972   } else {
1973     G1CollectedHeap::StrongRootsScope srs(g1h);
1974     // this is remark, so we'll use up all available threads
1975     int active_workers = 1;
1976     set_phase(active_workers, false);
1977 
1978     CMRemarkTask remarkTask(this);
1979     // We will start all available threads, even if we decide that the
1980     // active_workers will be fewer. The extra ones will just bail out
1981     // immediately.
1982     remarkTask.work(0);
1983   }
1984   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1985   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1986 
1987   print_stats();
1988 
1989   if (!restart_for_overflow())
1990     set_non_marking_state();
1991 
1992 #if VERIFY_OBJS_PROCESSED
1993   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1994     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1995                            _scan_obj_cl.objs_processed,
1996                            ThreadLocalObjQueue::objs_enqueued);
1997     guarantee(_scan_obj_cl.objs_processed ==
1998               ThreadLocalObjQueue::objs_enqueued,
1999               "Different number of objs processed and enqueued.");
2000   }
2001 #endif
2002 }
2003 
2004 #ifndef PRODUCT
2005 
2006 class PrintReachableOopClosure: public OopClosure {
2007 private:
2008   G1CollectedHeap* _g1h;
2009   CMBitMapRO*      _bitmap;
2010   outputStream*    _out;
2011   bool             _use_prev_marking;
2012   bool             _all;
2013 
2014 public:
2015   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2016                            outputStream* out,
2017                            bool          use_prev_marking,
2018                            bool          all) :
2019     _g1h(G1CollectedHeap::heap()),
2020     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2021 
2022   void do_oop(narrowOop* p) { do_oop_work(p); }
2023   void do_oop(      oop* p) { do_oop_work(p); }
2024 
2025   template <class T> void do_oop_work(T* p) {
2026     oop         obj = oopDesc::load_decode_heap_oop(p);
2027     const char* str = NULL;
2028     const char* str2 = "";
2029 
2030     if (obj == NULL) {
2031       str = "";
2032     } else if (!_g1h->is_in_g1_reserved(obj)) {
2033       str = " O";
2034     } else {
2035       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2036       guarantee(hr != NULL, "invariant");
2037       bool over_tams = false;
2038       if (_use_prev_marking) {
2039         over_tams = hr->obj_allocated_since_prev_marking(obj);
2040       } else {
2041         over_tams = hr->obj_allocated_since_next_marking(obj);
2042       }
2043       bool marked = _bitmap->isMarked((HeapWord*) obj);
2044 
2045       if (over_tams) {
2046         str = " >";
2047         if (marked) {
2048           str2 = " AND MARKED";
2049         }
2050       } else if (marked) {
2051         str = " M";
2052       } else {
2053         str = " NOT";
2054       }
2055     }
2056 
2057     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2058                    p, (void*) obj, str, str2);
2059   }
2060 };
2061 
2062 class PrintReachableObjectClosure : public ObjectClosure {
2063 private:
2064   CMBitMapRO*   _bitmap;
2065   outputStream* _out;
2066   bool          _use_prev_marking;
2067   bool          _all;
2068   HeapRegion*   _hr;
2069 
2070 public:
2071   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2072                               outputStream* out,
2073                               bool          use_prev_marking,
2074                               bool          all,
2075                               HeapRegion*   hr) :
2076     _bitmap(bitmap), _out(out),
2077     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2078 
2079   void do_object(oop o) {
2080     bool over_tams;
2081     if (_use_prev_marking) {
2082       over_tams = _hr->obj_allocated_since_prev_marking(o);
2083     } else {
2084       over_tams = _hr->obj_allocated_since_next_marking(o);
2085     }
2086     bool marked = _bitmap->isMarked((HeapWord*) o);
2087     bool print_it = _all || over_tams || marked;
2088 
2089     if (print_it) {
2090       _out->print_cr(" "PTR_FORMAT"%s",
2091                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2092       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2093       o->oop_iterate(&oopCl);
2094     }
2095   }
2096 };
2097 
2098 class PrintReachableRegionClosure : public HeapRegionClosure {
2099 private:
2100   CMBitMapRO*   _bitmap;
2101   outputStream* _out;
2102   bool          _use_prev_marking;
2103   bool          _all;
2104 
2105 public:
2106   bool doHeapRegion(HeapRegion* hr) {
2107     HeapWord* b = hr->bottom();
2108     HeapWord* e = hr->end();
2109     HeapWord* t = hr->top();
2110     HeapWord* p = NULL;
2111     if (_use_prev_marking) {
2112       p = hr->prev_top_at_mark_start();
2113     } else {
2114       p = hr->next_top_at_mark_start();
2115     }
2116     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2117                    "TAMS: "PTR_FORMAT, b, e, t, p);
2118     _out->cr();
2119 
2120     HeapWord* from = b;
2121     HeapWord* to   = t;
2122 
2123     if (to > from) {
2124       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2125       _out->cr();
2126       PrintReachableObjectClosure ocl(_bitmap, _out,
2127                                       _use_prev_marking, _all, hr);
2128       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2129       _out->cr();
2130     }
2131 
2132     return false;
2133   }
2134 
2135   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2136                               outputStream* out,
2137                               bool          use_prev_marking,
2138                               bool          all) :
2139     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2140 };
2141 
2142 void ConcurrentMark::print_reachable(const char* str,
2143                                      bool use_prev_marking,
2144                                      bool all) {
2145   gclog_or_tty->cr();
2146   gclog_or_tty->print_cr("== Doing heap dump... ");
2147 
2148   if (G1PrintReachableBaseFile == NULL) {
2149     gclog_or_tty->print_cr("  #### error: no base file defined");
2150     return;
2151   }
2152 
2153   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2154       (JVM_MAXPATHLEN - 1)) {
2155     gclog_or_tty->print_cr("  #### error: file name too long");
2156     return;
2157   }
2158 
2159   char file_name[JVM_MAXPATHLEN];
2160   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2161   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2162 
2163   fileStream fout(file_name);
2164   if (!fout.is_open()) {
2165     gclog_or_tty->print_cr("  #### error: could not open file");
2166     return;
2167   }
2168 
2169   outputStream* out = &fout;
2170 
2171   CMBitMapRO* bitmap = NULL;
2172   if (use_prev_marking) {
2173     bitmap = _prevMarkBitMap;
2174   } else {
2175     bitmap = _nextMarkBitMap;
2176   }
2177 
2178   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2179   out->cr();
2180 
2181   out->print_cr("--- ITERATING OVER REGIONS");
2182   out->cr();
2183   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2184   _g1h->heap_region_iterate(&rcl);
2185   out->cr();
2186 
2187   gclog_or_tty->print_cr("  done");
2188   gclog_or_tty->flush();
2189 }
2190 
2191 #endif // PRODUCT
2192 
2193 // This note is for drainAllSATBBuffers and the code in between.
2194 // In the future we could reuse a task to do this work during an
2195 // evacuation pause (since now tasks are not active and can be claimed
2196 // during an evacuation pause). This was a late change to the code and
2197 // is currently not being taken advantage of.
2198 
2199 class CMGlobalObjectClosure : public ObjectClosure {
2200 private:
2201   ConcurrentMark* _cm;
2202 
2203 public:
2204   void do_object(oop obj) {
2205     _cm->deal_with_reference(obj);
2206   }
2207 
2208   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2209 };
2210 
2211 void ConcurrentMark::deal_with_reference(oop obj) {
2212   if (verbose_high())
2213     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2214                            (void*) obj);
2215 
2216 
2217   HeapWord* objAddr = (HeapWord*) obj;
2218   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2219   if (_g1h->is_in_g1_reserved(objAddr)) {
2220     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2221     HeapRegion* hr = _g1h->heap_region_containing(obj);
2222     if (_g1h->is_obj_ill(obj, hr)) {
2223       if (verbose_high())
2224         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2225                                "marked", (void*) obj);
2226 
2227       // we need to mark it first
2228       if (_nextMarkBitMap->parMark(objAddr)) {
2229         // No OrderAccess:store_load() is needed. It is implicit in the
2230         // CAS done in parMark(objAddr) above
2231         HeapWord* finger = _finger;
2232         if (objAddr < finger) {
2233           if (verbose_high())
2234             gclog_or_tty->print_cr("[global] below the global finger "
2235                                    "("PTR_FORMAT"), pushing it", finger);
2236           if (!mark_stack_push(obj)) {
2237             if (verbose_low())
2238               gclog_or_tty->print_cr("[global] global stack overflow during "
2239                                      "deal_with_reference");
2240           }
2241         }
2242       }
2243     }
2244   }
2245 }
2246 
2247 void ConcurrentMark::drainAllSATBBuffers() {
2248   CMGlobalObjectClosure oc(this);
2249   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2250   satb_mq_set.set_closure(&oc);
2251 
2252   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2253     if (verbose_medium())
2254       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2255   }
2256 
2257   // no need to check whether we should do this, as this is only
2258   // called during an evacuation pause
2259   satb_mq_set.iterate_closure_all_threads();
2260 
2261   satb_mq_set.set_closure(NULL);
2262   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2263 }
2264 
2265 void ConcurrentMark::markPrev(oop p) {
2266   // Note we are overriding the read-only view of the prev map here, via
2267   // the cast.
2268   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2269 }
2270 
2271 void ConcurrentMark::clear(oop p) {
2272   assert(p != NULL && p->is_oop(), "expected an oop");
2273   HeapWord* addr = (HeapWord*)p;
2274   assert(addr >= _nextMarkBitMap->startWord() ||
2275          addr < _nextMarkBitMap->endWord(), "in a region");
2276 
2277   _nextMarkBitMap->clear(addr);
2278 }
2279 
2280 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2281   // Note we are overriding the read-only view of the prev map here, via
2282   // the cast.
2283   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2284   _nextMarkBitMap->clearRange(mr);
2285 }
2286 
2287 HeapRegion*
2288 ConcurrentMark::claim_region(int task_num) {
2289   // "checkpoint" the finger
2290   HeapWord* finger = _finger;
2291 
2292   // _heap_end will not change underneath our feet; it only changes at
2293   // yield points.
2294   while (finger < _heap_end) {
2295     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2296 
2297     // is the gap between reading the finger and doing the CAS too long?
2298 
2299     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2300     HeapWord*   bottom        = curr_region->bottom();
2301     HeapWord*   end           = curr_region->end();
2302     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2303 
2304     if (verbose_low())
2305       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2306                              "["PTR_FORMAT", "PTR_FORMAT"), "
2307                              "limit = "PTR_FORMAT,
2308                              task_num, curr_region, bottom, end, limit);
2309 
2310     HeapWord* res =
2311       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2312     if (res == finger) {
2313       // we succeeded
2314 
2315       // notice that _finger == end cannot be guaranteed here since,
2316       // someone else might have moved the finger even further
2317       assert(_finger >= end, "the finger should have moved forward");
2318 
2319       if (verbose_low())
2320         gclog_or_tty->print_cr("[%d] we were successful with region = "
2321                                PTR_FORMAT, task_num, curr_region);
2322 
2323       if (limit > bottom) {
2324         if (verbose_low())
2325           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2326                                  "returning it ", task_num, curr_region);
2327         return curr_region;
2328       } else {
2329         assert(limit == bottom,
2330                "the region limit should be at bottom");
2331         if (verbose_low())
2332           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2333                                  "returning NULL", task_num, curr_region);
2334         // we return NULL and the caller should try calling
2335         // claim_region() again.
2336         return NULL;
2337       }
2338     } else {
2339       assert(_finger > finger, "the finger should have moved forward");
2340       if (verbose_low())
2341         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2342                                "global finger = "PTR_FORMAT", "
2343                                "our finger = "PTR_FORMAT,
2344                                task_num, _finger, finger);
2345 
2346       // read it again
2347       finger = _finger;
2348     }
2349   }
2350 
2351   return NULL;
2352 }
2353 
2354 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2355   bool result = false;
2356   for (int i = 0; i < (int)_max_task_num; ++i) {
2357     CMTask* the_task = _tasks[i];
2358     MemRegion mr = the_task->aborted_region();
2359     if (mr.start() != NULL) {
2360       assert(mr.end() != NULL, "invariant");
2361       assert(mr.word_size() > 0, "invariant");
2362       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2363       assert(hr != NULL, "invariant");
2364       if (hr->in_collection_set()) {
2365         // The region points into the collection set
2366         the_task->set_aborted_region(MemRegion());
2367         result = true;
2368       }
2369     }
2370   }
2371   return result;
2372 }
2373 
2374 bool ConcurrentMark::has_aborted_regions() {
2375   for (int i = 0; i < (int)_max_task_num; ++i) {
2376     CMTask* the_task = _tasks[i];
2377     MemRegion mr = the_task->aborted_region();
2378     if (mr.start() != NULL) {
2379       assert(mr.end() != NULL, "invariant");
2380       assert(mr.word_size() > 0, "invariant");
2381       return true;
2382     }
2383   }
2384   return false;
2385 }
2386 
2387 void ConcurrentMark::oops_do(OopClosure* cl) {
2388   if (_markStack.size() > 0 && verbose_low())
2389     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2390                            "size = %d", _markStack.size());
2391   // we first iterate over the contents of the mark stack...
2392   _markStack.oops_do(cl);
2393 
2394   for (int i = 0; i < (int)_max_task_num; ++i) {
2395     OopTaskQueue* queue = _task_queues->queue((int)i);
2396 
2397     if (queue->size() > 0 && verbose_low())
2398       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2399                              "size = %d", i, queue->size());
2400 
2401     // ...then over the contents of the all the task queues.
2402     queue->oops_do(cl);
2403   }
2404 
2405   // Invalidate any entries, that are in the region stack, that
2406   // point into the collection set
2407   if (_regionStack.invalidate_entries_into_cset()) {
2408     // otherwise, any gray objects copied during the evacuation pause
2409     // might not be visited.
2410     assert(_should_gray_objects, "invariant");
2411   }
2412 
2413   // Invalidate any aborted regions, recorded in the individual CM
2414   // tasks, that point into the collection set.
2415   if (invalidate_aborted_regions_in_cset()) {
2416     // otherwise, any gray objects copied during the evacuation pause
2417     // might not be visited.
2418     assert(_should_gray_objects, "invariant");
2419   }
2420 
2421 }
2422 
2423 void ConcurrentMark::clear_marking_state() {
2424   _markStack.setEmpty();
2425   _markStack.clear_overflow();
2426   _regionStack.setEmpty();
2427   _regionStack.clear_overflow();
2428   clear_has_overflown();
2429   _finger = _heap_start;
2430 
2431   for (int i = 0; i < (int)_max_task_num; ++i) {
2432     OopTaskQueue* queue = _task_queues->queue(i);
2433     queue->set_empty();
2434   }
2435 }
2436 
2437 void ConcurrentMark::print_stats() {
2438   if (verbose_stats()) {
2439     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2440     for (size_t i = 0; i < _active_tasks; ++i) {
2441       _tasks[i]->print_stats();
2442       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2443     }
2444   }
2445 }
2446 
2447 class CSMarkOopClosure: public OopClosure {
2448   friend class CSMarkBitMapClosure;
2449 
2450   G1CollectedHeap* _g1h;
2451   CMBitMap*        _bm;
2452   ConcurrentMark*  _cm;
2453   oop*             _ms;
2454   jint*            _array_ind_stack;
2455   int              _ms_size;
2456   int              _ms_ind;
2457   int              _array_increment;
2458 
2459   bool push(oop obj, int arr_ind = 0) {
2460     if (_ms_ind == _ms_size) {
2461       gclog_or_tty->print_cr("Mark stack is full.");
2462       return false;
2463     }
2464     _ms[_ms_ind] = obj;
2465     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2466     _ms_ind++;
2467     return true;
2468   }
2469 
2470   oop pop() {
2471     if (_ms_ind == 0) return NULL;
2472     else {
2473       _ms_ind--;
2474       return _ms[_ms_ind];
2475     }
2476   }
2477 
2478   template <class T> bool drain() {
2479     while (_ms_ind > 0) {
2480       oop obj = pop();
2481       assert(obj != NULL, "Since index was non-zero.");
2482       if (obj->is_objArray()) {
2483         jint arr_ind = _array_ind_stack[_ms_ind];
2484         objArrayOop aobj = objArrayOop(obj);
2485         jint len = aobj->length();
2486         jint next_arr_ind = arr_ind + _array_increment;
2487         if (next_arr_ind < len) {
2488           push(obj, next_arr_ind);
2489         }
2490         // Now process this portion of this one.
2491         int lim = MIN2(next_arr_ind, len);
2492         for (int j = arr_ind; j < lim; j++) {
2493           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2494         }
2495 
2496       } else {
2497         obj->oop_iterate(this);
2498       }
2499       if (abort()) return false;
2500     }
2501     return true;
2502   }
2503 
2504 public:
2505   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2506     _g1h(G1CollectedHeap::heap()),
2507     _cm(cm),
2508     _bm(cm->nextMarkBitMap()),
2509     _ms_size(ms_size), _ms_ind(0),
2510     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2511     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2512     _array_increment(MAX2(ms_size/8, 16))
2513   {}
2514 
2515   ~CSMarkOopClosure() {
2516     FREE_C_HEAP_ARRAY(oop, _ms);
2517     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2518   }
2519 
2520   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2521   virtual void do_oop(      oop* p) { do_oop_work(p); }
2522 
2523   template <class T> void do_oop_work(T* p) {
2524     T heap_oop = oopDesc::load_heap_oop(p);
2525     if (oopDesc::is_null(heap_oop)) return;
2526     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2527     if (obj->is_forwarded()) {
2528       // If the object has already been forwarded, we have to make sure
2529       // that it's marked.  So follow the forwarding pointer.  Note that
2530       // this does the right thing for self-forwarding pointers in the
2531       // evacuation failure case.
2532       obj = obj->forwardee();
2533     }
2534     HeapRegion* hr = _g1h->heap_region_containing(obj);
2535     if (hr != NULL) {
2536       if (hr->in_collection_set()) {
2537         if (_g1h->is_obj_ill(obj)) {
2538           _bm->mark((HeapWord*)obj);
2539           if (!push(obj)) {
2540             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2541             set_abort();
2542           }
2543         }
2544       } else {
2545         // Outside the collection set; we need to gray it
2546         _cm->deal_with_reference(obj);
2547       }
2548     }
2549   }
2550 };
2551 
2552 class CSMarkBitMapClosure: public BitMapClosure {
2553   G1CollectedHeap* _g1h;
2554   CMBitMap*        _bitMap;
2555   ConcurrentMark*  _cm;
2556   CSMarkOopClosure _oop_cl;
2557 public:
2558   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2559     _g1h(G1CollectedHeap::heap()),
2560     _bitMap(cm->nextMarkBitMap()),
2561     _oop_cl(cm, ms_size)
2562   {}
2563 
2564   ~CSMarkBitMapClosure() {}
2565 
2566   bool do_bit(size_t offset) {
2567     // convert offset into a HeapWord*
2568     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2569     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2570            "address out of range");
2571     assert(_bitMap->isMarked(addr), "tautology");
2572     oop obj = oop(addr);
2573     if (!obj->is_forwarded()) {
2574       if (!_oop_cl.push(obj)) return false;
2575       if (UseCompressedOops) {
2576         if (!_oop_cl.drain<narrowOop>()) return false;
2577       } else {
2578         if (!_oop_cl.drain<oop>()) return false;
2579       }
2580     }
2581     // Otherwise...
2582     return true;
2583   }
2584 };
2585 
2586 
2587 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2588   CMBitMap* _bm;
2589   CSMarkBitMapClosure _bit_cl;
2590   enum SomePrivateConstants {
2591     MSSize = 1000
2592   };
2593   bool _completed;
2594 public:
2595   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2596     _bm(cm->nextMarkBitMap()),
2597     _bit_cl(cm, MSSize),
2598     _completed(true)
2599   {}
2600 
2601   ~CompleteMarkingInCSHRClosure() {}
2602 
2603   bool doHeapRegion(HeapRegion* r) {
2604     if (!r->evacuation_failed()) {
2605       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2606       if (!mr.is_empty()) {
2607         if (!_bm->iterate(&_bit_cl, mr)) {
2608           _completed = false;
2609           return true;
2610         }
2611       }
2612     }
2613     return false;
2614   }
2615 
2616   bool completed() { return _completed; }
2617 };
2618 
2619 class ClearMarksInHRClosure: public HeapRegionClosure {
2620   CMBitMap* _bm;
2621 public:
2622   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2623 
2624   bool doHeapRegion(HeapRegion* r) {
2625     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2626       MemRegion usedMR = r->used_region();
2627       _bm->clearRange(r->used_region());
2628     }
2629     return false;
2630   }
2631 };
2632 
2633 void ConcurrentMark::complete_marking_in_collection_set() {
2634   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2635 
2636   if (!g1h->mark_in_progress()) {
2637     g1h->g1_policy()->record_mark_closure_time(0.0);
2638     return;
2639   }
2640 
2641   int i = 1;
2642   double start = os::elapsedTime();
2643   while (true) {
2644     i++;
2645     CompleteMarkingInCSHRClosure cmplt(this);
2646     g1h->collection_set_iterate(&cmplt);
2647     if (cmplt.completed()) break;
2648   }
2649   double end_time = os::elapsedTime();
2650   double elapsed_time_ms = (end_time - start) * 1000.0;
2651   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2652 
2653   ClearMarksInHRClosure clr(nextMarkBitMap());
2654   g1h->collection_set_iterate(&clr);
2655 }
2656 
2657 // The next two methods deal with the following optimisation. Some
2658 // objects are gray by being marked and located above the finger. If
2659 // they are copied, during an evacuation pause, below the finger then
2660 // the need to be pushed on the stack. The observation is that, if
2661 // there are no regions in the collection set located above the
2662 // finger, then the above cannot happen, hence we do not need to
2663 // explicitly gray any objects when copying them to below the
2664 // finger. The global stack will be scanned to ensure that, if it
2665 // points to objects being copied, it will update their
2666 // location. There is a tricky situation with the gray objects in
2667 // region stack that are being coped, however. See the comment in
2668 // newCSet().
2669 
2670 void ConcurrentMark::newCSet() {
2671   if (!concurrent_marking_in_progress())
2672     // nothing to do if marking is not in progress
2673     return;
2674 
2675   // find what the lowest finger is among the global and local fingers
2676   _min_finger = _finger;
2677   for (int i = 0; i < (int)_max_task_num; ++i) {
2678     CMTask* task = _tasks[i];
2679     HeapWord* task_finger = task->finger();
2680     if (task_finger != NULL && task_finger < _min_finger)
2681       _min_finger = task_finger;
2682   }
2683 
2684   _should_gray_objects = false;
2685 
2686   // This fixes a very subtle and fustrating bug. It might be the case
2687   // that, during en evacuation pause, heap regions that contain
2688   // objects that are gray (by being in regions contained in the
2689   // region stack) are included in the collection set. Since such gray
2690   // objects will be moved, and because it's not easy to redirect
2691   // region stack entries to point to a new location (because objects
2692   // in one region might be scattered to multiple regions after they
2693   // are copied), one option is to ensure that all marked objects
2694   // copied during a pause are pushed on the stack. Notice, however,
2695   // that this problem can only happen when the region stack is not
2696   // empty during an evacuation pause. So, we make the fix a bit less
2697   // conservative and ensure that regions are pushed on the stack,
2698   // irrespective whether all collection set regions are below the
2699   // finger, if the region stack is not empty. This is expected to be
2700   // a rare case, so I don't think it's necessary to be smarted about it.
2701   if (!region_stack_empty() || has_aborted_regions())
2702     _should_gray_objects = true;
2703 }
2704 
2705 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2706   if (!concurrent_marking_in_progress())
2707     return;
2708 
2709   HeapWord* region_end = hr->end();
2710   if (region_end > _min_finger)
2711     _should_gray_objects = true;
2712 }
2713 
2714 // abandon current marking iteration due to a Full GC
2715 void ConcurrentMark::abort() {
2716   // Clear all marks to force marking thread to do nothing
2717   _nextMarkBitMap->clearAll();
2718   // Empty mark stack
2719   clear_marking_state();
2720   for (int i = 0; i < (int)_max_task_num; ++i) {
2721     _tasks[i]->clear_region_fields();
2722     _tasks[i]->clear_aborted_region();
2723   }
2724   _has_aborted = true;
2725 
2726   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2727   satb_mq_set.abandon_partial_marking();
2728   // This can be called either during or outside marking, we'll read
2729   // the expected_active value from the SATB queue set.
2730   satb_mq_set.set_active_all_threads(
2731                                  false, /* new active value */
2732                                  satb_mq_set.is_active() /* expected_active */);
2733 }
2734 
2735 static void print_ms_time_info(const char* prefix, const char* name,
2736                                NumberSeq& ns) {
2737   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2738                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2739   if (ns.num() > 0) {
2740     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2741                            prefix, ns.sd(), ns.maximum());
2742   }
2743 }
2744 
2745 void ConcurrentMark::print_summary_info() {
2746   gclog_or_tty->print_cr(" Concurrent marking:");
2747   print_ms_time_info("  ", "init marks", _init_times);
2748   print_ms_time_info("  ", "remarks", _remark_times);
2749   {
2750     print_ms_time_info("     ", "final marks", _remark_mark_times);
2751     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2752 
2753   }
2754   print_ms_time_info("  ", "cleanups", _cleanup_times);
2755   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2756                          _total_counting_time,
2757                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2758                           (double)_cleanup_times.num()
2759                          : 0.0));
2760   if (G1ScrubRemSets) {
2761     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2762                            _total_rs_scrub_time,
2763                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2764                             (double)_cleanup_times.num()
2765                            : 0.0));
2766   }
2767   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2768                          (_init_times.sum() + _remark_times.sum() +
2769                           _cleanup_times.sum())/1000.0);
2770   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2771                 "(%8.2f s marking, %8.2f s counting).",
2772                 cmThread()->vtime_accum(),
2773                 cmThread()->vtime_mark_accum(),
2774                 cmThread()->vtime_count_accum());
2775 }
2776 
2777 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2778   _parallel_workers->print_worker_threads_on(st);
2779 }
2780 
2781 // Closures
2782 // XXX: there seems to be a lot of code  duplication here;
2783 // should refactor and consolidate the shared code.
2784 
2785 // This closure is used to mark refs into the CMS generation in
2786 // the CMS bit map. Called at the first checkpoint.
2787 
2788 // We take a break if someone is trying to stop the world.
2789 bool ConcurrentMark::do_yield_check(int worker_i) {
2790   if (should_yield()) {
2791     if (worker_i == 0)
2792       _g1h->g1_policy()->record_concurrent_pause();
2793     cmThread()->yield();
2794     if (worker_i == 0)
2795       _g1h->g1_policy()->record_concurrent_pause_end();
2796     return true;
2797   } else {
2798     return false;
2799   }
2800 }
2801 
2802 bool ConcurrentMark::should_yield() {
2803   return cmThread()->should_yield();
2804 }
2805 
2806 bool ConcurrentMark::containing_card_is_marked(void* p) {
2807   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2808   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2809 }
2810 
2811 bool ConcurrentMark::containing_cards_are_marked(void* start,
2812                                                  void* last) {
2813   return
2814     containing_card_is_marked(start) &&
2815     containing_card_is_marked(last);
2816 }
2817 
2818 #ifndef PRODUCT
2819 // for debugging purposes
2820 void ConcurrentMark::print_finger() {
2821   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2822                          _heap_start, _heap_end, _finger);
2823   for (int i = 0; i < (int) _max_task_num; ++i) {
2824     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
2825   }
2826   gclog_or_tty->print_cr("");
2827 }
2828 #endif
2829 
2830 // Closure for iteration over bitmaps
2831 class CMBitMapClosure : public BitMapClosure {
2832 private:
2833   // the bitmap that is being iterated over
2834   CMBitMap*                   _nextMarkBitMap;
2835   ConcurrentMark*             _cm;
2836   CMTask*                     _task;
2837   // true if we're scanning a heap region claimed by the task (so that
2838   // we move the finger along), false if we're not, i.e. currently when
2839   // scanning a heap region popped from the region stack (so that we
2840   // do not move the task finger along; it'd be a mistake if we did so).
2841   bool                        _scanning_heap_region;
2842 
2843 public:
2844   CMBitMapClosure(CMTask *task,
2845                   ConcurrentMark* cm,
2846                   CMBitMap* nextMarkBitMap)
2847     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2848 
2849   void set_scanning_heap_region(bool scanning_heap_region) {
2850     _scanning_heap_region = scanning_heap_region;
2851   }
2852 
2853   bool do_bit(size_t offset) {
2854     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2855     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2856     assert( addr < _cm->finger(), "invariant");
2857 
2858     if (_scanning_heap_region) {
2859       statsOnly( _task->increase_objs_found_on_bitmap() );
2860       assert(addr >= _task->finger(), "invariant");
2861       // We move that task's local finger along.
2862       _task->move_finger_to(addr);
2863     } else {
2864       // We move the task's region finger along.
2865       _task->move_region_finger_to(addr);
2866     }
2867 
2868     _task->scan_object(oop(addr));
2869     // we only partially drain the local queue and global stack
2870     _task->drain_local_queue(true);
2871     _task->drain_global_stack(true);
2872 
2873     // if the has_aborted flag has been raised, we need to bail out of
2874     // the iteration
2875     return !_task->has_aborted();
2876   }
2877 };
2878 
2879 // Closure for iterating over objects, currently only used for
2880 // processing SATB buffers.
2881 class CMObjectClosure : public ObjectClosure {
2882 private:
2883   CMTask* _task;
2884 
2885 public:
2886   void do_object(oop obj) {
2887     _task->deal_with_reference(obj);
2888   }
2889 
2890   CMObjectClosure(CMTask* task) : _task(task) { }
2891 };
2892 
2893 // Closure for iterating over object fields
2894 class CMOopClosure : public OopClosure {
2895 private:
2896   G1CollectedHeap*   _g1h;
2897   ConcurrentMark*    _cm;
2898   CMTask*            _task;
2899 
2900 public:
2901   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2902   virtual void do_oop(      oop* p) { do_oop_work(p); }
2903 
2904   template <class T> void do_oop_work(T* p) {
2905     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2906     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2907            "invariant");
2908 
2909     oop obj = oopDesc::load_decode_heap_oop(p);
2910     if (_cm->verbose_high())
2911       gclog_or_tty->print_cr("[%d] we're looking at location "
2912                              "*"PTR_FORMAT" = "PTR_FORMAT,
2913                              _task->task_id(), p, (void*) obj);
2914     _task->deal_with_reference(obj);
2915   }
2916 
2917   CMOopClosure(G1CollectedHeap* g1h,
2918                ConcurrentMark* cm,
2919                CMTask* task)
2920     : _g1h(g1h), _cm(cm), _task(task) { }
2921 };
2922 
2923 void CMTask::setup_for_region(HeapRegion* hr) {
2924   // Separated the asserts so that we know which one fires.
2925   assert(hr != NULL,
2926         "claim_region() should have filtered out continues humongous regions");
2927   assert(!hr->continuesHumongous(),
2928         "claim_region() should have filtered out continues humongous regions");
2929 
2930   if (_cm->verbose_low())
2931     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2932                            _task_id, hr);
2933 
2934   _curr_region  = hr;
2935   _finger       = hr->bottom();
2936   update_region_limit();
2937 }
2938 
2939 void CMTask::update_region_limit() {
2940   HeapRegion* hr            = _curr_region;
2941   HeapWord* bottom          = hr->bottom();
2942   HeapWord* limit           = hr->next_top_at_mark_start();
2943 
2944   if (limit == bottom) {
2945     if (_cm->verbose_low())
2946       gclog_or_tty->print_cr("[%d] found an empty region "
2947                              "["PTR_FORMAT", "PTR_FORMAT")",
2948                              _task_id, bottom, limit);
2949     // The region was collected underneath our feet.
2950     // We set the finger to bottom to ensure that the bitmap
2951     // iteration that will follow this will not do anything.
2952     // (this is not a condition that holds when we set the region up,
2953     // as the region is not supposed to be empty in the first place)
2954     _finger = bottom;
2955   } else if (limit >= _region_limit) {
2956     assert(limit >= _finger, "peace of mind");
2957   } else {
2958     assert(limit < _region_limit, "only way to get here");
2959     // This can happen under some pretty unusual circumstances.  An
2960     // evacuation pause empties the region underneath our feet (NTAMS
2961     // at bottom). We then do some allocation in the region (NTAMS
2962     // stays at bottom), followed by the region being used as a GC
2963     // alloc region (NTAMS will move to top() and the objects
2964     // originally below it will be grayed). All objects now marked in
2965     // the region are explicitly grayed, if below the global finger,
2966     // and we do not need in fact to scan anything else. So, we simply
2967     // set _finger to be limit to ensure that the bitmap iteration
2968     // doesn't do anything.
2969     _finger = limit;
2970   }
2971 
2972   _region_limit = limit;
2973 }
2974 
2975 void CMTask::giveup_current_region() {
2976   assert(_curr_region != NULL, "invariant");
2977   if (_cm->verbose_low())
2978     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2979                            _task_id, _curr_region);
2980   clear_region_fields();
2981 }
2982 
2983 void CMTask::clear_region_fields() {
2984   // Values for these three fields that indicate that we're not
2985   // holding on to a region.
2986   _curr_region   = NULL;
2987   _finger        = NULL;
2988   _region_limit  = NULL;
2989 
2990   _region_finger = NULL;
2991 }
2992 
2993 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2994   guarantee(nextMarkBitMap != NULL, "invariant");
2995 
2996   if (_cm->verbose_low())
2997     gclog_or_tty->print_cr("[%d] resetting", _task_id);
2998 
2999   _nextMarkBitMap                = nextMarkBitMap;
3000   clear_region_fields();
3001   clear_aborted_region();
3002 
3003   _calls                         = 0;
3004   _elapsed_time_ms               = 0.0;
3005   _termination_time_ms           = 0.0;
3006   _termination_start_time_ms     = 0.0;
3007 
3008 #if _MARKING_STATS_
3009   _local_pushes                  = 0;
3010   _local_pops                    = 0;
3011   _local_max_size                = 0;
3012   _objs_scanned                  = 0;
3013   _global_pushes                 = 0;
3014   _global_pops                   = 0;
3015   _global_max_size               = 0;
3016   _global_transfers_to           = 0;
3017   _global_transfers_from         = 0;
3018   _region_stack_pops             = 0;
3019   _regions_claimed               = 0;
3020   _objs_found_on_bitmap          = 0;
3021   _satb_buffers_processed        = 0;
3022   _steal_attempts                = 0;
3023   _steals                        = 0;
3024   _aborted                       = 0;
3025   _aborted_overflow              = 0;
3026   _aborted_cm_aborted            = 0;
3027   _aborted_yield                 = 0;
3028   _aborted_timed_out             = 0;
3029   _aborted_satb                  = 0;
3030   _aborted_termination           = 0;
3031 #endif // _MARKING_STATS_
3032 }
3033 
3034 bool CMTask::should_exit_termination() {
3035   regular_clock_call();
3036   // This is called when we are in the termination protocol. We should
3037   // quit if, for some reason, this task wants to abort or the global
3038   // stack is not empty (this means that we can get work from it).
3039   return !_cm->mark_stack_empty() || has_aborted();
3040 }
3041 
3042 // This determines whether the method below will check both the local
3043 // and global fingers when determining whether to push on the stack a
3044 // gray object (value 1) or whether it will only check the global one
3045 // (value 0). The tradeoffs are that the former will be a bit more
3046 // accurate and possibly push less on the stack, but it might also be
3047 // a little bit slower.
3048 
3049 #define _CHECK_BOTH_FINGERS_      1
3050 
3051 void CMTask::deal_with_reference(oop obj) {
3052   if (_cm->verbose_high())
3053     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3054                            _task_id, (void*) obj);
3055 
3056   ++_refs_reached;
3057 
3058   HeapWord* objAddr = (HeapWord*) obj;
3059   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3060   if (_g1h->is_in_g1_reserved(objAddr)) {
3061     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3062     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3063     if (_g1h->is_obj_ill(obj, hr)) {
3064       if (_cm->verbose_high())
3065         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3066                                _task_id, (void*) obj);
3067 
3068       // we need to mark it first
3069       if (_nextMarkBitMap->parMark(objAddr)) {
3070         // No OrderAccess:store_load() is needed. It is implicit in the
3071         // CAS done in parMark(objAddr) above
3072         HeapWord* global_finger = _cm->finger();
3073 
3074 #if _CHECK_BOTH_FINGERS_
3075         // we will check both the local and global fingers
3076 
3077         if (_finger != NULL && objAddr < _finger) {
3078           if (_cm->verbose_high())
3079             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3080                                    "pushing it", _task_id, _finger);
3081           push(obj);
3082         } else if (_curr_region != NULL && objAddr < _region_limit) {
3083           // do nothing
3084         } else if (objAddr < global_finger) {
3085           // Notice that the global finger might be moving forward
3086           // concurrently. This is not a problem. In the worst case, we
3087           // mark the object while it is above the global finger and, by
3088           // the time we read the global finger, it has moved forward
3089           // passed this object. In this case, the object will probably
3090           // be visited when a task is scanning the region and will also
3091           // be pushed on the stack. So, some duplicate work, but no
3092           // correctness problems.
3093 
3094           if (_cm->verbose_high())
3095             gclog_or_tty->print_cr("[%d] below the global finger "
3096                                    "("PTR_FORMAT"), pushing it",
3097                                    _task_id, global_finger);
3098           push(obj);
3099         } else {
3100           // do nothing
3101         }
3102 #else // _CHECK_BOTH_FINGERS_
3103       // we will only check the global finger
3104 
3105         if (objAddr < global_finger) {
3106           // see long comment above
3107 
3108           if (_cm->verbose_high())
3109             gclog_or_tty->print_cr("[%d] below the global finger "
3110                                    "("PTR_FORMAT"), pushing it",
3111                                    _task_id, global_finger);
3112           push(obj);
3113         }
3114 #endif // _CHECK_BOTH_FINGERS_
3115       }
3116     }
3117   }
3118 }
3119 
3120 void CMTask::push(oop obj) {
3121   HeapWord* objAddr = (HeapWord*) obj;
3122   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3123   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3124          "invariant");
3125   assert(!_g1h->is_obj_ill(obj), "invariant");
3126   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3127 
3128   if (_cm->verbose_high())
3129     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3130 
3131   if (!_task_queue->push(obj)) {
3132     // The local task queue looks full. We need to push some entries
3133     // to the global stack.
3134 
3135     if (_cm->verbose_medium())
3136       gclog_or_tty->print_cr("[%d] task queue overflow, "
3137                              "moving entries to the global stack",
3138                              _task_id);
3139     move_entries_to_global_stack();
3140 
3141     // this should succeed since, even if we overflow the global
3142     // stack, we should have definitely removed some entries from the
3143     // local queue. So, there must be space on it.
3144     bool success = _task_queue->push(obj);
3145     assert(success, "invariant");
3146   }
3147 
3148   statsOnly( int tmp_size = _task_queue->size();
3149              if (tmp_size > _local_max_size)
3150                _local_max_size = tmp_size;
3151              ++_local_pushes );
3152 }
3153 
3154 void CMTask::reached_limit() {
3155   assert(_words_scanned >= _words_scanned_limit ||
3156          _refs_reached >= _refs_reached_limit ,
3157          "shouldn't have been called otherwise");
3158   regular_clock_call();
3159 }
3160 
3161 void CMTask::regular_clock_call() {
3162   if (has_aborted())
3163     return;
3164 
3165   // First, we need to recalculate the words scanned and refs reached
3166   // limits for the next clock call.
3167   recalculate_limits();
3168 
3169   // During the regular clock call we do the following
3170 
3171   // (1) If an overflow has been flagged, then we abort.
3172   if (_cm->has_overflown()) {
3173     set_has_aborted();
3174     return;
3175   }
3176 
3177   // If we are not concurrent (i.e. we're doing remark) we don't need
3178   // to check anything else. The other steps are only needed during
3179   // the concurrent marking phase.
3180   if (!concurrent())
3181     return;
3182 
3183   // (2) If marking has been aborted for Full GC, then we also abort.
3184   if (_cm->has_aborted()) {
3185     set_has_aborted();
3186     statsOnly( ++_aborted_cm_aborted );
3187     return;
3188   }
3189 
3190   double curr_time_ms = os::elapsedVTime() * 1000.0;
3191 
3192   // (3) If marking stats are enabled, then we update the step history.
3193 #if _MARKING_STATS_
3194   if (_words_scanned >= _words_scanned_limit)
3195     ++_clock_due_to_scanning;
3196   if (_refs_reached >= _refs_reached_limit)
3197     ++_clock_due_to_marking;
3198 
3199   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3200   _interval_start_time_ms = curr_time_ms;
3201   _all_clock_intervals_ms.add(last_interval_ms);
3202 
3203   if (_cm->verbose_medium()) {
3204     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3205                            "scanned = %d%s, refs reached = %d%s",
3206                            _task_id, last_interval_ms,
3207                            _words_scanned,
3208                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3209                            _refs_reached,
3210                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3211   }
3212 #endif // _MARKING_STATS_
3213 
3214   // (4) We check whether we should yield. If we have to, then we abort.
3215   if (_cm->should_yield()) {
3216     // We should yield. To do this we abort the task. The caller is
3217     // responsible for yielding.
3218     set_has_aborted();
3219     statsOnly( ++_aborted_yield );
3220     return;
3221   }
3222 
3223   // (5) We check whether we've reached our time quota. If we have,
3224   // then we abort.
3225   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3226   if (elapsed_time_ms > _time_target_ms) {
3227     set_has_aborted();
3228     _has_aborted_timed_out = true;
3229     statsOnly( ++_aborted_timed_out );
3230     return;
3231   }
3232 
3233   // (6) Finally, we check whether there are enough completed STAB
3234   // buffers available for processing. If there are, we abort.
3235   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3236   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3237     if (_cm->verbose_low())
3238       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3239                              _task_id);
3240     // we do need to process SATB buffers, we'll abort and restart
3241     // the marking task to do so
3242     set_has_aborted();
3243     statsOnly( ++_aborted_satb );
3244     return;
3245   }
3246 }
3247 
3248 void CMTask::recalculate_limits() {
3249   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3250   _words_scanned_limit      = _real_words_scanned_limit;
3251 
3252   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3253   _refs_reached_limit       = _real_refs_reached_limit;
3254 }
3255 
3256 void CMTask::decrease_limits() {
3257   // This is called when we believe that we're going to do an infrequent
3258   // operation which will increase the per byte scanned cost (i.e. move
3259   // entries to/from the global stack). It basically tries to decrease the
3260   // scanning limit so that the clock is called earlier.
3261 
3262   if (_cm->verbose_medium())
3263     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3264 
3265   _words_scanned_limit = _real_words_scanned_limit -
3266     3 * words_scanned_period / 4;
3267   _refs_reached_limit  = _real_refs_reached_limit -
3268     3 * refs_reached_period / 4;
3269 }
3270 
3271 void CMTask::move_entries_to_global_stack() {
3272   // local array where we'll store the entries that will be popped
3273   // from the local queue
3274   oop buffer[global_stack_transfer_size];
3275 
3276   int n = 0;
3277   oop obj;
3278   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3279     buffer[n] = obj;
3280     ++n;
3281   }
3282 
3283   if (n > 0) {
3284     // we popped at least one entry from the local queue
3285 
3286     statsOnly( ++_global_transfers_to; _local_pops += n );
3287 
3288     if (!_cm->mark_stack_push(buffer, n)) {
3289       if (_cm->verbose_low())
3290         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3291       set_has_aborted();
3292     } else {
3293       // the transfer was successful
3294 
3295       if (_cm->verbose_medium())
3296         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3297                                _task_id, n);
3298       statsOnly( int tmp_size = _cm->mark_stack_size();
3299                  if (tmp_size > _global_max_size)
3300                    _global_max_size = tmp_size;
3301                  _global_pushes += n );
3302     }
3303   }
3304 
3305   // this operation was quite expensive, so decrease the limits
3306   decrease_limits();
3307 }
3308 
3309 void CMTask::get_entries_from_global_stack() {
3310   // local array where we'll store the entries that will be popped
3311   // from the global stack.
3312   oop buffer[global_stack_transfer_size];
3313   int n;
3314   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3315   assert(n <= global_stack_transfer_size,
3316          "we should not pop more than the given limit");
3317   if (n > 0) {
3318     // yes, we did actually pop at least one entry
3319 
3320     statsOnly( ++_global_transfers_from; _global_pops += n );
3321     if (_cm->verbose_medium())
3322       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3323                              _task_id, n);
3324     for (int i = 0; i < n; ++i) {
3325       bool success = _task_queue->push(buffer[i]);
3326       // We only call this when the local queue is empty or under a
3327       // given target limit. So, we do not expect this push to fail.
3328       assert(success, "invariant");
3329     }
3330 
3331     statsOnly( int tmp_size = _task_queue->size();
3332                if (tmp_size > _local_max_size)
3333                  _local_max_size = tmp_size;
3334                _local_pushes += n );
3335   }
3336 
3337   // this operation was quite expensive, so decrease the limits
3338   decrease_limits();
3339 }
3340 
3341 void CMTask::drain_local_queue(bool partially) {
3342   if (has_aborted())
3343     return;
3344 
3345   // Decide what the target size is, depending whether we're going to
3346   // drain it partially (so that other tasks can steal if they run out
3347   // of things to do) or totally (at the very end).
3348   size_t target_size;
3349   if (partially)
3350     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3351   else
3352     target_size = 0;
3353 
3354   if (_task_queue->size() > target_size) {
3355     if (_cm->verbose_high())
3356       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3357                              _task_id, target_size);
3358 
3359     oop obj;
3360     bool ret = _task_queue->pop_local(obj);
3361     while (ret) {
3362       statsOnly( ++_local_pops );
3363 
3364       if (_cm->verbose_high())
3365         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3366                                (void*) obj);
3367 
3368       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3369       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3370              "invariant");
3371 
3372       scan_object(obj);
3373 
3374       if (_task_queue->size() <= target_size || has_aborted())
3375         ret = false;
3376       else
3377         ret = _task_queue->pop_local(obj);
3378     }
3379 
3380     if (_cm->verbose_high())
3381       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3382                              _task_id, _task_queue->size());
3383   }
3384 }
3385 
3386 void CMTask::drain_global_stack(bool partially) {
3387   if (has_aborted())
3388     return;
3389 
3390   // We have a policy to drain the local queue before we attempt to
3391   // drain the global stack.
3392   assert(partially || _task_queue->size() == 0, "invariant");
3393 
3394   // Decide what the target size is, depending whether we're going to
3395   // drain it partially (so that other tasks can steal if they run out
3396   // of things to do) or totally (at the very end).  Notice that,
3397   // because we move entries from the global stack in chunks or
3398   // because another task might be doing the same, we might in fact
3399   // drop below the target. But, this is not a problem.
3400   size_t target_size;
3401   if (partially)
3402     target_size = _cm->partial_mark_stack_size_target();
3403   else
3404     target_size = 0;
3405 
3406   if (_cm->mark_stack_size() > target_size) {
3407     if (_cm->verbose_low())
3408       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3409                              _task_id, target_size);
3410 
3411     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3412       get_entries_from_global_stack();
3413       drain_local_queue(partially);
3414     }
3415 
3416     if (_cm->verbose_low())
3417       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3418                              _task_id, _cm->mark_stack_size());
3419   }
3420 }
3421 
3422 // SATB Queue has several assumptions on whether to call the par or
3423 // non-par versions of the methods. this is why some of the code is
3424 // replicated. We should really get rid of the single-threaded version
3425 // of the code to simplify things.
3426 void CMTask::drain_satb_buffers() {
3427   if (has_aborted())
3428     return;
3429 
3430   // We set this so that the regular clock knows that we're in the
3431   // middle of draining buffers and doesn't set the abort flag when it
3432   // notices that SATB buffers are available for draining. It'd be
3433   // very counter productive if it did that. :-)
3434   _draining_satb_buffers = true;
3435 
3436   CMObjectClosure oc(this);
3437   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3438   if (G1CollectedHeap::use_parallel_gc_threads())
3439     satb_mq_set.set_par_closure(_task_id, &oc);
3440   else
3441     satb_mq_set.set_closure(&oc);
3442 
3443   // This keeps claiming and applying the closure to completed buffers
3444   // until we run out of buffers or we need to abort.
3445   if (G1CollectedHeap::use_parallel_gc_threads()) {
3446     while (!has_aborted() &&
3447            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3448       if (_cm->verbose_medium())
3449         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3450       statsOnly( ++_satb_buffers_processed );
3451       regular_clock_call();
3452     }
3453   } else {
3454     while (!has_aborted() &&
3455            satb_mq_set.apply_closure_to_completed_buffer()) {
3456       if (_cm->verbose_medium())
3457         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3458       statsOnly( ++_satb_buffers_processed );
3459       regular_clock_call();
3460     }
3461   }
3462 
3463   if (!concurrent() && !has_aborted()) {
3464     // We should only do this during remark.
3465     if (G1CollectedHeap::use_parallel_gc_threads())
3466       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3467     else
3468       satb_mq_set.iterate_closure_all_threads();
3469   }
3470 
3471   _draining_satb_buffers = false;
3472 
3473   assert(has_aborted() ||
3474          concurrent() ||
3475          satb_mq_set.completed_buffers_num() == 0, "invariant");
3476 
3477   if (G1CollectedHeap::use_parallel_gc_threads())
3478     satb_mq_set.set_par_closure(_task_id, NULL);
3479   else
3480     satb_mq_set.set_closure(NULL);
3481 
3482   // again, this was a potentially expensive operation, decrease the
3483   // limits to get the regular clock call early
3484   decrease_limits();
3485 }
3486 
3487 void CMTask::drain_region_stack(BitMapClosure* bc) {
3488   if (has_aborted())
3489     return;
3490 
3491   assert(_region_finger == NULL,
3492          "it should be NULL when we're not scanning a region");
3493 
3494   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3495     if (_cm->verbose_low())
3496       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3497                              _task_id, _cm->region_stack_size());
3498 
3499     MemRegion mr;
3500 
3501     if (!_aborted_region.is_empty()) {
3502       mr = _aborted_region;
3503       _aborted_region = MemRegion();
3504 
3505       if (_cm->verbose_low())
3506         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3507                              _task_id, mr.start(), mr.end());
3508     } else {
3509       mr = _cm->region_stack_pop_lock_free();
3510       // it returns MemRegion() if the pop fails
3511       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3512     }
3513 
3514     while (mr.start() != NULL) {
3515       if (_cm->verbose_medium())
3516         gclog_or_tty->print_cr("[%d] we are scanning region "
3517                                "["PTR_FORMAT", "PTR_FORMAT")",
3518                                _task_id, mr.start(), mr.end());
3519 
3520       assert(mr.end() <= _cm->finger(),
3521              "otherwise the region shouldn't be on the stack");
3522       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3523       if (_nextMarkBitMap->iterate(bc, mr)) {
3524         assert(!has_aborted(),
3525                "cannot abort the task without aborting the bitmap iteration");
3526 
3527         // We finished iterating over the region without aborting.
3528         regular_clock_call();
3529         if (has_aborted())
3530           mr = MemRegion();
3531         else {
3532           mr = _cm->region_stack_pop_lock_free();
3533           // it returns MemRegion() if the pop fails
3534           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3535         }
3536       } else {
3537         assert(has_aborted(), "currently the only way to do so");
3538 
3539         // The only way to abort the bitmap iteration is to return
3540         // false from the do_bit() method. However, inside the
3541         // do_bit() method we move the _region_finger to point to the
3542         // object currently being looked at. So, if we bail out, we
3543         // have definitely set _region_finger to something non-null.
3544         assert(_region_finger != NULL, "invariant");
3545 
3546         // Make sure that any previously aborted region has been
3547         // cleared.
3548         assert(_aborted_region.is_empty(), "aborted region not cleared");
3549 
3550         // The iteration was actually aborted. So now _region_finger
3551         // points to the address of the object we last scanned. If we
3552         // leave it there, when we restart this task, we will rescan
3553         // the object. It is easy to avoid this. We move the finger by
3554         // enough to point to the next possible object header (the
3555         // bitmap knows by how much we need to move it as it knows its
3556         // granularity).
3557         MemRegion newRegion =
3558           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3559 
3560         if (!newRegion.is_empty()) {
3561           if (_cm->verbose_low()) {
3562             gclog_or_tty->print_cr("[%d] recording unscanned region"
3563                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3564                                    _task_id,
3565                                    newRegion.start(), newRegion.end());
3566           }
3567           // Now record the part of the region we didn't scan to
3568           // make sure this task scans it later.
3569           _aborted_region = newRegion;
3570         }
3571         // break from while
3572         mr = MemRegion();
3573       }
3574       _region_finger = NULL;
3575     }
3576 
3577     if (_cm->verbose_low())
3578       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3579                              _task_id, _cm->region_stack_size());
3580   }
3581 }
3582 
3583 void CMTask::print_stats() {
3584   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3585                          _task_id, _calls);
3586   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3587                          _elapsed_time_ms, _termination_time_ms);
3588   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3589                          _step_times_ms.num(), _step_times_ms.avg(),
3590                          _step_times_ms.sd());
3591   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3592                          _step_times_ms.maximum(), _step_times_ms.sum());
3593 
3594 #if _MARKING_STATS_
3595   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3596                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3597                          _all_clock_intervals_ms.sd());
3598   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3599                          _all_clock_intervals_ms.maximum(),
3600                          _all_clock_intervals_ms.sum());
3601   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3602                          _clock_due_to_scanning, _clock_due_to_marking);
3603   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3604                          _objs_scanned, _objs_found_on_bitmap);
3605   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3606                          _local_pushes, _local_pops, _local_max_size);
3607   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3608                          _global_pushes, _global_pops, _global_max_size);
3609   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3610                          _global_transfers_to,_global_transfers_from);
3611   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3612                          _regions_claimed, _region_stack_pops);
3613   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3614   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3615                          _steal_attempts, _steals);
3616   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3617   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3618                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3619   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3620                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3621 #endif // _MARKING_STATS_
3622 }
3623 
3624 /*****************************************************************************
3625 
3626     The do_marking_step(time_target_ms) method is the building block
3627     of the parallel marking framework. It can be called in parallel
3628     with other invocations of do_marking_step() on different tasks
3629     (but only one per task, obviously) and concurrently with the
3630     mutator threads, or during remark, hence it eliminates the need
3631     for two versions of the code. When called during remark, it will
3632     pick up from where the task left off during the concurrent marking
3633     phase. Interestingly, tasks are also claimable during evacuation
3634     pauses too, since do_marking_step() ensures that it aborts before
3635     it needs to yield.
3636 
3637     The data structures that is uses to do marking work are the
3638     following:
3639 
3640       (1) Marking Bitmap. If there are gray objects that appear only
3641       on the bitmap (this happens either when dealing with an overflow
3642       or when the initial marking phase has simply marked the roots
3643       and didn't push them on the stack), then tasks claim heap
3644       regions whose bitmap they then scan to find gray objects. A
3645       global finger indicates where the end of the last claimed region
3646       is. A local finger indicates how far into the region a task has
3647       scanned. The two fingers are used to determine how to gray an
3648       object (i.e. whether simply marking it is OK, as it will be
3649       visited by a task in the future, or whether it needs to be also
3650       pushed on a stack).
3651 
3652       (2) Local Queue. The local queue of the task which is accessed
3653       reasonably efficiently by the task. Other tasks can steal from
3654       it when they run out of work. Throughout the marking phase, a
3655       task attempts to keep its local queue short but not totally
3656       empty, so that entries are available for stealing by other
3657       tasks. Only when there is no more work, a task will totally
3658       drain its local queue.
3659 
3660       (3) Global Mark Stack. This handles local queue overflow. During
3661       marking only sets of entries are moved between it and the local
3662       queues, as access to it requires a mutex and more fine-grain
3663       interaction with it which might cause contention. If it
3664       overflows, then the marking phase should restart and iterate
3665       over the bitmap to identify gray objects. Throughout the marking
3666       phase, tasks attempt to keep the global mark stack at a small
3667       length but not totally empty, so that entries are available for
3668       popping by other tasks. Only when there is no more work, tasks
3669       will totally drain the global mark stack.
3670 
3671       (4) Global Region Stack. Entries on it correspond to areas of
3672       the bitmap that need to be scanned since they contain gray
3673       objects. Pushes on the region stack only happen during
3674       evacuation pauses and typically correspond to areas covered by
3675       GC LABS. If it overflows, then the marking phase should restart
3676       and iterate over the bitmap to identify gray objects. Tasks will
3677       try to totally drain the region stack as soon as possible.
3678 
3679       (5) SATB Buffer Queue. This is where completed SATB buffers are
3680       made available. Buffers are regularly removed from this queue
3681       and scanned for roots, so that the queue doesn't get too
3682       long. During remark, all completed buffers are processed, as
3683       well as the filled in parts of any uncompleted buffers.
3684 
3685     The do_marking_step() method tries to abort when the time target
3686     has been reached. There are a few other cases when the
3687     do_marking_step() method also aborts:
3688 
3689       (1) When the marking phase has been aborted (after a Full GC).
3690 
3691       (2) When a global overflow (either on the global stack or the
3692       region stack) has been triggered. Before the task aborts, it
3693       will actually sync up with the other tasks to ensure that all
3694       the marking data structures (local queues, stacks, fingers etc.)
3695       are re-initialised so that when do_marking_step() completes,
3696       the marking phase can immediately restart.
3697 
3698       (3) When enough completed SATB buffers are available. The
3699       do_marking_step() method only tries to drain SATB buffers right
3700       at the beginning. So, if enough buffers are available, the
3701       marking step aborts and the SATB buffers are processed at
3702       the beginning of the next invocation.
3703 
3704       (4) To yield. when we have to yield then we abort and yield
3705       right at the end of do_marking_step(). This saves us from a lot
3706       of hassle as, by yielding we might allow a Full GC. If this
3707       happens then objects will be compacted underneath our feet, the
3708       heap might shrink, etc. We save checking for this by just
3709       aborting and doing the yield right at the end.
3710 
3711     From the above it follows that the do_marking_step() method should
3712     be called in a loop (or, otherwise, regularly) until it completes.
3713 
3714     If a marking step completes without its has_aborted() flag being
3715     true, it means it has completed the current marking phase (and
3716     also all other marking tasks have done so and have all synced up).
3717 
3718     A method called regular_clock_call() is invoked "regularly" (in
3719     sub ms intervals) throughout marking. It is this clock method that
3720     checks all the abort conditions which were mentioned above and
3721     decides when the task should abort. A work-based scheme is used to
3722     trigger this clock method: when the number of object words the
3723     marking phase has scanned or the number of references the marking
3724     phase has visited reach a given limit. Additional invocations to
3725     the method clock have been planted in a few other strategic places
3726     too. The initial reason for the clock method was to avoid calling
3727     vtime too regularly, as it is quite expensive. So, once it was in
3728     place, it was natural to piggy-back all the other conditions on it
3729     too and not constantly check them throughout the code.
3730 
3731  *****************************************************************************/
3732 
3733 void CMTask::do_marking_step(double time_target_ms) {
3734   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3735   assert(concurrent() == _cm->concurrent(), "they should be the same");
3736 
3737   assert(concurrent() || _cm->region_stack_empty(),
3738          "the region stack should have been cleared before remark");
3739   assert(concurrent() || !_cm->has_aborted_regions(),
3740          "aborted regions should have been cleared before remark");
3741   assert(_region_finger == NULL,
3742          "this should be non-null only when a region is being scanned");
3743 
3744   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3745   assert(_task_queues != NULL, "invariant");
3746   assert(_task_queue != NULL, "invariant");
3747   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3748 
3749   assert(!_claimed,
3750          "only one thread should claim this task at any one time");
3751 
3752   // OK, this doesn't safeguard again all possible scenarios, as it is
3753   // possible for two threads to set the _claimed flag at the same
3754   // time. But it is only for debugging purposes anyway and it will
3755   // catch most problems.
3756   _claimed = true;
3757 
3758   _start_time_ms = os::elapsedVTime() * 1000.0;
3759   statsOnly( _interval_start_time_ms = _start_time_ms );
3760 
3761   double diff_prediction_ms =
3762     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3763   _time_target_ms = time_target_ms - diff_prediction_ms;
3764 
3765   // set up the variables that are used in the work-based scheme to
3766   // call the regular clock method
3767   _words_scanned = 0;
3768   _refs_reached  = 0;
3769   recalculate_limits();
3770 
3771   // clear all flags
3772   clear_has_aborted();
3773   _has_aborted_timed_out = false;
3774   _draining_satb_buffers = false;
3775 
3776   ++_calls;
3777 
3778   if (_cm->verbose_low())
3779     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3780                            "target = %1.2lfms >>>>>>>>>>",
3781                            _task_id, _calls, _time_target_ms);
3782 
3783   // Set up the bitmap and oop closures. Anything that uses them is
3784   // eventually called from this method, so it is OK to allocate these
3785   // statically.
3786   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3787   CMOopClosure    oop_closure(_g1h, _cm, this);
3788   set_oop_closure(&oop_closure);
3789 
3790   if (_cm->has_overflown()) {
3791     // This can happen if the region stack or the mark stack overflows
3792     // during a GC pause and this task, after a yield point,
3793     // restarts. We have to abort as we need to get into the overflow
3794     // protocol which happens right at the end of this task.
3795     set_has_aborted();
3796   }
3797 
3798   // First drain any available SATB buffers. After this, we will not
3799   // look at SATB buffers before the next invocation of this method.
3800   // If enough completed SATB buffers are queued up, the regular clock
3801   // will abort this task so that it restarts.
3802   drain_satb_buffers();
3803   // ...then partially drain the local queue and the global stack
3804   drain_local_queue(true);
3805   drain_global_stack(true);
3806 
3807   // Then totally drain the region stack.  We will not look at
3808   // it again before the next invocation of this method. Entries on
3809   // the region stack are only added during evacuation pauses, for
3810   // which we have to yield. When we do, we abort the task anyway so
3811   // it will look at the region stack again when it restarts.
3812   bitmap_closure.set_scanning_heap_region(false);
3813   drain_region_stack(&bitmap_closure);
3814   // ...then partially drain the local queue and the global stack
3815   drain_local_queue(true);
3816   drain_global_stack(true);
3817 
3818   do {
3819     if (!has_aborted() && _curr_region != NULL) {
3820       // This means that we're already holding on to a region.
3821       assert(_finger != NULL, "if region is not NULL, then the finger "
3822              "should not be NULL either");
3823 
3824       // We might have restarted this task after an evacuation pause
3825       // which might have evacuated the region we're holding on to
3826       // underneath our feet. Let's read its limit again to make sure
3827       // that we do not iterate over a region of the heap that
3828       // contains garbage (update_region_limit() will also move
3829       // _finger to the start of the region if it is found empty).
3830       update_region_limit();
3831       // We will start from _finger not from the start of the region,
3832       // as we might be restarting this task after aborting half-way
3833       // through scanning this region. In this case, _finger points to
3834       // the address where we last found a marked object. If this is a
3835       // fresh region, _finger points to start().
3836       MemRegion mr = MemRegion(_finger, _region_limit);
3837 
3838       if (_cm->verbose_low())
3839         gclog_or_tty->print_cr("[%d] we're scanning part "
3840                                "["PTR_FORMAT", "PTR_FORMAT") "
3841                                "of region "PTR_FORMAT,
3842                                _task_id, _finger, _region_limit, _curr_region);
3843 
3844       // Let's iterate over the bitmap of the part of the
3845       // region that is left.
3846       bitmap_closure.set_scanning_heap_region(true);
3847       if (mr.is_empty() ||
3848           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3849         // We successfully completed iterating over the region. Now,
3850         // let's give up the region.
3851         giveup_current_region();
3852         regular_clock_call();
3853       } else {
3854         assert(has_aborted(), "currently the only way to do so");
3855         // The only way to abort the bitmap iteration is to return
3856         // false from the do_bit() method. However, inside the
3857         // do_bit() method we move the _finger to point to the
3858         // object currently being looked at. So, if we bail out, we
3859         // have definitely set _finger to something non-null.
3860         assert(_finger != NULL, "invariant");
3861 
3862         // Region iteration was actually aborted. So now _finger
3863         // points to the address of the object we last scanned. If we
3864         // leave it there, when we restart this task, we will rescan
3865         // the object. It is easy to avoid this. We move the finger by
3866         // enough to point to the next possible object header (the
3867         // bitmap knows by how much we need to move it as it knows its
3868         // granularity).
3869         assert(_finger < _region_limit, "invariant");
3870         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3871         // Check if bitmap iteration was aborted while scanning the last object
3872         if (new_finger >= _region_limit) {
3873             giveup_current_region();
3874         } else {
3875             move_finger_to(new_finger);
3876         }
3877       }
3878     }
3879     // At this point we have either completed iterating over the
3880     // region we were holding on to, or we have aborted.
3881 
3882     // We then partially drain the local queue and the global stack.
3883     // (Do we really need this?)
3884     drain_local_queue(true);
3885     drain_global_stack(true);
3886 
3887     // Read the note on the claim_region() method on why it might
3888     // return NULL with potentially more regions available for
3889     // claiming and why we have to check out_of_regions() to determine
3890     // whether we're done or not.
3891     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3892       // We are going to try to claim a new region. We should have
3893       // given up on the previous one.
3894       // Separated the asserts so that we know which one fires.
3895       assert(_curr_region  == NULL, "invariant");
3896       assert(_finger       == NULL, "invariant");
3897       assert(_region_limit == NULL, "invariant");
3898       if (_cm->verbose_low())
3899         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3900       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3901       if (claimed_region != NULL) {
3902         // Yes, we managed to claim one
3903         statsOnly( ++_regions_claimed );
3904 
3905         if (_cm->verbose_low())
3906           gclog_or_tty->print_cr("[%d] we successfully claimed "
3907                                  "region "PTR_FORMAT,
3908                                  _task_id, claimed_region);
3909 
3910         setup_for_region(claimed_region);
3911         assert(_curr_region == claimed_region, "invariant");
3912       }
3913       // It is important to call the regular clock here. It might take
3914       // a while to claim a region if, for example, we hit a large
3915       // block of empty regions. So we need to call the regular clock
3916       // method once round the loop to make sure it's called
3917       // frequently enough.
3918       regular_clock_call();
3919     }
3920 
3921     if (!has_aborted() && _curr_region == NULL) {
3922       assert(_cm->out_of_regions(),
3923              "at this point we should be out of regions");
3924     }
3925   } while ( _curr_region != NULL && !has_aborted());
3926 
3927   if (!has_aborted()) {
3928     // We cannot check whether the global stack is empty, since other
3929     // tasks might be pushing objects to it concurrently. We also cannot
3930     // check if the region stack is empty because if a thread is aborting
3931     // it can push a partially done region back.
3932     assert(_cm->out_of_regions(),
3933            "at this point we should be out of regions");
3934 
3935     if (_cm->verbose_low())
3936       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3937 
3938     // Try to reduce the number of available SATB buffers so that
3939     // remark has less work to do.
3940     drain_satb_buffers();
3941   }
3942 
3943   // Since we've done everything else, we can now totally drain the
3944   // local queue and global stack.
3945   drain_local_queue(false);
3946   drain_global_stack(false);
3947 
3948   // Attempt at work stealing from other task's queues.
3949   if (!has_aborted()) {
3950     // We have not aborted. This means that we have finished all that
3951     // we could. Let's try to do some stealing...
3952 
3953     // We cannot check whether the global stack is empty, since other
3954     // tasks might be pushing objects to it concurrently. We also cannot
3955     // check if the region stack is empty because if a thread is aborting
3956     // it can push a partially done region back.
3957     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3958            "only way to reach here");
3959 
3960     if (_cm->verbose_low())
3961       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3962 
3963     while (!has_aborted()) {
3964       oop obj;
3965       statsOnly( ++_steal_attempts );
3966 
3967       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3968         if (_cm->verbose_medium())
3969           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3970                                  _task_id, (void*) obj);
3971 
3972         statsOnly( ++_steals );
3973 
3974         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3975                "any stolen object should be marked");
3976         scan_object(obj);
3977 
3978         // And since we're towards the end, let's totally drain the
3979         // local queue and global stack.
3980         drain_local_queue(false);
3981         drain_global_stack(false);
3982       } else {
3983         break;
3984       }
3985     }
3986   }
3987 
3988   // We still haven't aborted. Now, let's try to get into the
3989   // termination protocol.
3990   if (!has_aborted()) {
3991     // We cannot check whether the global stack is empty, since other
3992     // tasks might be concurrently pushing objects on it. We also cannot
3993     // check if the region stack is empty because if a thread is aborting
3994     // it can push a partially done region back.
3995     // Separated the asserts so that we know which one fires.
3996     assert(_cm->out_of_regions(), "only way to reach here");
3997     assert(_task_queue->size() == 0, "only way to reach here");
3998 
3999     if (_cm->verbose_low())
4000       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4001 
4002     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4003     // The CMTask class also extends the TerminatorTerminator class,
4004     // hence its should_exit_termination() method will also decide
4005     // whether to exit the termination protocol or not.
4006     bool finished = _cm->terminator()->offer_termination(this);
4007     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4008     _termination_time_ms +=
4009       termination_end_time_ms - _termination_start_time_ms;
4010 
4011     if (finished) {
4012       // We're all done.
4013 
4014       if (_task_id == 0) {
4015         // let's allow task 0 to do this
4016         if (concurrent()) {
4017           assert(_cm->concurrent_marking_in_progress(), "invariant");
4018           // we need to set this to false before the next
4019           // safepoint. This way we ensure that the marking phase
4020           // doesn't observe any more heap expansions.
4021           _cm->clear_concurrent_marking_in_progress();
4022         }
4023       }
4024 
4025       // We can now guarantee that the global stack is empty, since
4026       // all other tasks have finished. We separated the guarantees so
4027       // that, if a condition is false, we can immediately find out
4028       // which one.
4029       guarantee(_cm->out_of_regions(), "only way to reach here");
4030       guarantee(_aborted_region.is_empty(), "only way to reach here");
4031       guarantee(_cm->region_stack_empty(), "only way to reach here");
4032       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4033       guarantee(_task_queue->size() == 0, "only way to reach here");
4034       guarantee(!_cm->has_overflown(), "only way to reach here");
4035       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4036       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4037 
4038       if (_cm->verbose_low())
4039         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4040     } else {
4041       // Apparently there's more work to do. Let's abort this task. It
4042       // will restart it and we can hopefully find more things to do.
4043 
4044       if (_cm->verbose_low())
4045         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4046 
4047       set_has_aborted();
4048       statsOnly( ++_aborted_termination );
4049     }
4050   }
4051 
4052   // Mainly for debugging purposes to make sure that a pointer to the
4053   // closure which was statically allocated in this frame doesn't
4054   // escape it by accident.
4055   set_oop_closure(NULL);
4056   double end_time_ms = os::elapsedVTime() * 1000.0;
4057   double elapsed_time_ms = end_time_ms - _start_time_ms;
4058   // Update the step history.
4059   _step_times_ms.add(elapsed_time_ms);
4060 
4061   if (has_aborted()) {
4062     // The task was aborted for some reason.
4063 
4064     statsOnly( ++_aborted );
4065 
4066     if (_has_aborted_timed_out) {
4067       double diff_ms = elapsed_time_ms - _time_target_ms;
4068       // Keep statistics of how well we did with respect to hitting
4069       // our target only if we actually timed out (if we aborted for
4070       // other reasons, then the results might get skewed).
4071       _marking_step_diffs_ms.add(diff_ms);
4072     }
4073 
4074     if (_cm->has_overflown()) {
4075       // This is the interesting one. We aborted because a global
4076       // overflow was raised. This means we have to restart the
4077       // marking phase and start iterating over regions. However, in
4078       // order to do this we have to make sure that all tasks stop
4079       // what they are doing and re-initialise in a safe manner. We
4080       // will achieve this with the use of two barrier sync points.
4081 
4082       if (_cm->verbose_low())
4083         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4084 
4085       _cm->enter_first_sync_barrier(_task_id);
4086       // When we exit this sync barrier we know that all tasks have
4087       // stopped doing marking work. So, it's now safe to
4088       // re-initialise our data structures. At the end of this method,
4089       // task 0 will clear the global data structures.
4090 
4091       statsOnly( ++_aborted_overflow );
4092 
4093       // We clear the local state of this task...
4094       clear_region_fields();
4095 
4096       // ...and enter the second barrier.
4097       _cm->enter_second_sync_barrier(_task_id);
4098       // At this point everything has bee re-initialised and we're
4099       // ready to restart.
4100     }
4101 
4102     if (_cm->verbose_low()) {
4103       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4104                              "elapsed = %1.2lfms <<<<<<<<<<",
4105                              _task_id, _time_target_ms, elapsed_time_ms);
4106       if (_cm->has_aborted())
4107         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4108                                _task_id);
4109     }
4110   } else {
4111     if (_cm->verbose_low())
4112       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4113                              "elapsed = %1.2lfms <<<<<<<<<<",
4114                              _task_id, _time_target_ms, elapsed_time_ms);
4115   }
4116 
4117   _claimed = false;
4118 }
4119 
4120 CMTask::CMTask(int task_id,
4121                ConcurrentMark* cm,
4122                CMTaskQueue* task_queue,
4123                CMTaskQueueSet* task_queues)
4124   : _g1h(G1CollectedHeap::heap()),
4125     _task_id(task_id), _cm(cm),
4126     _claimed(false),
4127     _nextMarkBitMap(NULL), _hash_seed(17),
4128     _task_queue(task_queue),
4129     _task_queues(task_queues),
4130     _oop_closure(NULL),
4131     _aborted_region(MemRegion()) {
4132   guarantee(task_queue != NULL, "invariant");
4133   guarantee(task_queues != NULL, "invariant");
4134 
4135   statsOnly( _clock_due_to_scanning = 0;
4136              _clock_due_to_marking  = 0 );
4137 
4138   _marking_step_diffs_ms.add(0.5);
4139 }