1 /*
   2  * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "incls/_precompiled.incl"
  26 #include "incls/_concurrentMark.cpp.incl"
  27 
  28 //
  29 // CMS Bit Map Wrapper
  30 
  31 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  32   _bm((uintptr_t*)NULL,0),
  33   _shifter(shifter) {
  34   _bmStartWord = (HeapWord*)(rs.base());
  35   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  36   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  37                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  38 
  39   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  40   // For now we'll just commit all of the bit map up fromt.
  41   // Later on we'll try to be more parsimonious with swap.
  42   guarantee(_virtual_space.initialize(brs, brs.size()),
  43             "couldn't reseve backing store for CMS bit map");
  44   assert(_virtual_space.committed_size() == brs.size(),
  45          "didn't reserve backing store for all of CMS bit map?");
  46   _bm.set_map((uintptr_t*)_virtual_space.low());
  47   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  48          _bmWordSize, "inconsistency in bit map sizing");
  49   _bm.set_size(_bmWordSize >> _shifter);
  50 }
  51 
  52 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  53                                                HeapWord* limit) const {
  54   // First we must round addr *up* to a possible object boundary.
  55   addr = (HeapWord*)align_size_up((intptr_t)addr,
  56                                   HeapWordSize << _shifter);
  57   size_t addrOffset = heapWordToOffset(addr);
  58   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  59   size_t limitOffset = heapWordToOffset(limit);
  60   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  61   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  62   assert(nextAddr >= addr, "get_next_one postcondition");
  63   assert(nextAddr == limit || isMarked(nextAddr),
  64          "get_next_one postcondition");
  65   return nextAddr;
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  69                                                  HeapWord* limit) const {
  70   size_t addrOffset = heapWordToOffset(addr);
  71   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  72   size_t limitOffset = heapWordToOffset(limit);
  73   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  74   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  75   assert(nextAddr >= addr, "get_next_one postcondition");
  76   assert(nextAddr == limit || !isMarked(nextAddr),
  77          "get_next_one postcondition");
  78   return nextAddr;
  79 }
  80 
  81 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  82   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  83   return (int) (diff >> _shifter);
  84 }
  85 
  86 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
  87   HeapWord* left  = MAX2(_bmStartWord, mr.start());
  88   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
  89   if (right > left) {
  90     // Right-open interval [leftOffset, rightOffset).
  91     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
  92   } else {
  93     return true;
  94   }
  95 }
  96 
  97 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
  98                                              size_t    from_start_index,
  99                                              HeapWord* to_start_word,
 100                                              size_t    word_num) {
 101   _bm.mostly_disjoint_range_union(from_bitmap,
 102                                   from_start_index,
 103                                   heapWordToOffset(to_start_word),
 104                                   word_num);
 105 }
 106 
 107 #ifndef PRODUCT
 108 bool CMBitMapRO::covers(ReservedSpace rs) const {
 109   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 110   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 111          "size inconsistency");
 112   return _bmStartWord == (HeapWord*)(rs.base()) &&
 113          _bmWordSize  == rs.size()>>LogHeapWordSize;
 114 }
 115 #endif
 116 
 117 void CMBitMap::clearAll() {
 118   _bm.clear();
 119   return;
 120 }
 121 
 122 void CMBitMap::markRange(MemRegion mr) {
 123   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 124   assert(!mr.is_empty(), "unexpected empty region");
 125   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 126           ((HeapWord *) mr.end())),
 127          "markRange memory region end is not card aligned");
 128   // convert address range into offset range
 129   _bm.at_put_range(heapWordToOffset(mr.start()),
 130                    heapWordToOffset(mr.end()), true);
 131 }
 132 
 133 void CMBitMap::clearRange(MemRegion mr) {
 134   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 135   assert(!mr.is_empty(), "unexpected empty region");
 136   // convert address range into offset range
 137   _bm.at_put_range(heapWordToOffset(mr.start()),
 138                    heapWordToOffset(mr.end()), false);
 139 }
 140 
 141 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 142                                             HeapWord* end_addr) {
 143   HeapWord* start = getNextMarkedWordAddress(addr);
 144   start = MIN2(start, end_addr);
 145   HeapWord* end   = getNextUnmarkedWordAddress(start);
 146   end = MIN2(end, end_addr);
 147   assert(start <= end, "Consistency check");
 148   MemRegion mr(start, end);
 149   if (!mr.is_empty()) {
 150     clearRange(mr);
 151   }
 152   return mr;
 153 }
 154 
 155 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 156   _base(NULL), _cm(cm)
 157 #ifdef ASSERT
 158   , _drain_in_progress(false)
 159   , _drain_in_progress_yields(false)
 160 #endif
 161 {}
 162 
 163 void CMMarkStack::allocate(size_t size) {
 164   _base = NEW_C_HEAP_ARRAY(oop, size);
 165   if (_base == NULL)
 166     vm_exit_during_initialization("Failed to allocate "
 167                                   "CM region mark stack");
 168   _index = 0;
 169   // QQQQ cast ...
 170   _capacity = (jint) size;
 171   _oops_do_bound = -1;
 172   NOT_PRODUCT(_max_depth = 0);
 173 }
 174 
 175 CMMarkStack::~CMMarkStack() {
 176   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 177 }
 178 
 179 void CMMarkStack::par_push(oop ptr) {
 180   while (true) {
 181     if (isFull()) {
 182       _overflow = true;
 183       return;
 184     }
 185     // Otherwise...
 186     jint index = _index;
 187     jint next_index = index+1;
 188     jint res = Atomic::cmpxchg(next_index, &_index, index);
 189     if (res == index) {
 190       _base[index] = ptr;
 191       // Note that we don't maintain this atomically.  We could, but it
 192       // doesn't seem necessary.
 193       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 194       return;
 195     }
 196     // Otherwise, we need to try again.
 197   }
 198 }
 199 
 200 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 201   while (true) {
 202     if (isFull()) {
 203       _overflow = true;
 204       return;
 205     }
 206     // Otherwise...
 207     jint index = _index;
 208     jint next_index = index + n;
 209     if (next_index > _capacity) {
 210       _overflow = true;
 211       return;
 212     }
 213     jint res = Atomic::cmpxchg(next_index, &_index, index);
 214     if (res == index) {
 215       for (int i = 0; i < n; i++) {
 216         int ind = index + i;
 217         assert(ind < _capacity, "By overflow test above.");
 218         _base[ind] = ptr_arr[i];
 219       }
 220       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 221       return;
 222     }
 223     // Otherwise, we need to try again.
 224   }
 225 }
 226 
 227 
 228 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 229   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 230   jint start = _index;
 231   jint next_index = start + n;
 232   if (next_index > _capacity) {
 233     _overflow = true;
 234     return;
 235   }
 236   // Otherwise.
 237   _index = next_index;
 238   for (int i = 0; i < n; i++) {
 239     int ind = start + i;
 240     assert(ind < _capacity, "By overflow test above.");
 241     _base[ind] = ptr_arr[i];
 242   }
 243 }
 244 
 245 
 246 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 247   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 248   jint index = _index;
 249   if (index == 0) {
 250     *n = 0;
 251     return false;
 252   } else {
 253     int k = MIN2(max, index);
 254     jint new_ind = index - k;
 255     for (int j = 0; j < k; j++) {
 256       ptr_arr[j] = _base[new_ind + j];
 257     }
 258     _index = new_ind;
 259     *n = k;
 260     return true;
 261   }
 262 }
 263 
 264 
 265 CMRegionStack::CMRegionStack() : _base(NULL) {}
 266 
 267 void CMRegionStack::allocate(size_t size) {
 268   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 269   if (_base == NULL)
 270     vm_exit_during_initialization("Failed to allocate "
 271                                   "CM region mark stack");
 272   _index = 0;
 273   // QQQQ cast ...
 274   _capacity = (jint) size;
 275 }
 276 
 277 CMRegionStack::~CMRegionStack() {
 278   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 279 }
 280 
 281 void CMRegionStack::push_lock_free(MemRegion mr) {
 282   assert(mr.word_size() > 0, "Precondition");
 283   while (true) {
 284     jint index = _index;
 285 
 286     if (index >= _capacity) {
 287       _overflow = true;
 288       return;
 289     }
 290     // Otherwise...
 291     jint next_index = index+1;
 292     jint res = Atomic::cmpxchg(next_index, &_index, index);
 293     if (res == index) {
 294       _base[index] = mr;
 295       return;
 296     }
 297     // Otherwise, we need to try again.
 298   }
 299 }
 300 
 301 // Lock-free pop of the region stack. Called during the concurrent
 302 // marking / remark phases. Should only be called in tandem with
 303 // other lock-free pops.
 304 MemRegion CMRegionStack::pop_lock_free() {
 305   while (true) {
 306     jint index = _index;
 307 
 308     if (index == 0) {
 309       return MemRegion();
 310     }
 311     // Otherwise...
 312     jint next_index = index-1;
 313     jint res = Atomic::cmpxchg(next_index, &_index, index);
 314     if (res == index) {
 315       MemRegion mr = _base[next_index];
 316       if (mr.start() != NULL) {
 317         assert(mr.end() != NULL, "invariant");
 318         assert(mr.word_size() > 0, "invariant");
 319         return mr;
 320       } else {
 321         // that entry was invalidated... let's skip it
 322         assert(mr.end() == NULL, "invariant");
 323       }
 324     }
 325     // Otherwise, we need to try again.
 326   }
 327 }
 328 
 329 #if 0
 330 // The routines that manipulate the region stack with a lock are
 331 // not currently used. They should be retained, however, as a
 332 // diagnostic aid.
 333 
 334 void CMRegionStack::push_with_lock(MemRegion mr) {
 335   assert(mr.word_size() > 0, "Precondition");
 336   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 337 
 338   if (isFull()) {
 339     _overflow = true;
 340     return;
 341   }
 342 
 343   _base[_index] = mr;
 344   _index += 1;
 345 }
 346 
 347 MemRegion CMRegionStack::pop_with_lock() {
 348   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 349 
 350   while (true) {
 351     if (_index == 0) {
 352       return MemRegion();
 353     }
 354     _index -= 1;
 355 
 356     MemRegion mr = _base[_index];
 357     if (mr.start() != NULL) {
 358       assert(mr.end() != NULL, "invariant");
 359       assert(mr.word_size() > 0, "invariant");
 360       return mr;
 361     } else {
 362       // that entry was invalidated... let's skip it
 363       assert(mr.end() == NULL, "invariant");
 364     }
 365   }
 366 }
 367 #endif
 368 
 369 bool CMRegionStack::invalidate_entries_into_cset() {
 370   bool result = false;
 371   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 372   for (int i = 0; i < _oops_do_bound; ++i) {
 373     MemRegion mr = _base[i];
 374     if (mr.start() != NULL) {
 375       assert(mr.end() != NULL, "invariant");
 376       assert(mr.word_size() > 0, "invariant");
 377       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 378       assert(hr != NULL, "invariant");
 379       if (hr->in_collection_set()) {
 380         // The region points into the collection set
 381         _base[i] = MemRegion();
 382         result = true;
 383       }
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389   return result;
 390 }
 391 
 392 template<class OopClosureClass>
 393 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 394   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 395          || SafepointSynchronize::is_at_safepoint(),
 396          "Drain recursion must be yield-safe.");
 397   bool res = true;
 398   debug_only(_drain_in_progress = true);
 399   debug_only(_drain_in_progress_yields = yield_after);
 400   while (!isEmpty()) {
 401     oop newOop = pop();
 402     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 403     assert(newOop->is_oop(), "Expected an oop");
 404     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 405            "only grey objects on this stack");
 406     // iterate over the oops in this oop, marking and pushing
 407     // the ones in CMS generation.
 408     newOop->oop_iterate(cl);
 409     if (yield_after && _cm->do_yield_check()) {
 410       res = false; break;
 411     }
 412   }
 413   debug_only(_drain_in_progress = false);
 414   return res;
 415 }
 416 
 417 void CMMarkStack::oops_do(OopClosure* f) {
 418   if (_index == 0) return;
 419   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 420          "Bound must be set.");
 421   for (int i = 0; i < _oops_do_bound; i++) {
 422     f->do_oop(&_base[i]);
 423   }
 424   _oops_do_bound = -1;
 425 }
 426 
 427 bool ConcurrentMark::not_yet_marked(oop obj) const {
 428   return (_g1h->is_obj_ill(obj)
 429           || (_g1h->is_in_permanent(obj)
 430               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 431 }
 432 
 433 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 434 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 435 #endif // _MSC_VER
 436 
 437 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 438                                int max_regions) :
 439   _markBitMap1(rs, MinObjAlignment - 1),
 440   _markBitMap2(rs, MinObjAlignment - 1),
 441 
 442   _parallel_marking_threads(0),
 443   _sleep_factor(0.0),
 444   _marking_task_overhead(1.0),
 445   _cleanup_sleep_factor(0.0),
 446   _cleanup_task_overhead(1.0),
 447   _region_bm(max_regions, false /* in_resource_area*/),
 448   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 449            CardTableModRefBS::card_shift,
 450            false /* in_resource_area*/),
 451   _prevMarkBitMap(&_markBitMap1),
 452   _nextMarkBitMap(&_markBitMap2),
 453   _at_least_one_mark_complete(false),
 454 
 455   _markStack(this),
 456   _regionStack(),
 457   // _finger set in set_non_marking_state
 458 
 459   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 460   // _active_tasks set in set_non_marking_state
 461   // _tasks set inside the constructor
 462   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 463   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 464 
 465   _has_overflown(false),
 466   _concurrent(false),
 467   _has_aborted(false),
 468   _restart_for_overflow(false),
 469   _concurrent_marking_in_progress(false),
 470   _should_gray_objects(false),
 471 
 472   // _verbose_level set below
 473 
 474   _init_times(),
 475   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 476   _cleanup_times(),
 477   _total_counting_time(0.0),
 478   _total_rs_scrub_time(0.0),
 479 
 480   _parallel_workers(NULL)
 481 {
 482   CMVerboseLevel verbose_level =
 483     (CMVerboseLevel) G1MarkingVerboseLevel;
 484   if (verbose_level < no_verbose)
 485     verbose_level = no_verbose;
 486   if (verbose_level > high_verbose)
 487     verbose_level = high_verbose;
 488   _verbose_level = verbose_level;
 489 
 490   if (verbose_low())
 491     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 492                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 493 
 494   _markStack.allocate(MarkStackSize);
 495   _regionStack.allocate(G1MarkRegionStackSize);
 496 
 497   // Create & start a ConcurrentMark thread.
 498   _cmThread = new ConcurrentMarkThread(this);
 499   assert(cmThread() != NULL, "CM Thread should have been created");
 500   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 501 
 502   _g1h = G1CollectedHeap::heap();
 503   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 504   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 505   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 506 
 507   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 508   satb_qs.set_buffer_size(G1SATBBufferSize);
 509 
 510   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 511   _par_cleanup_thread_state = NEW_C_HEAP_ARRAY(ParCleanupThreadState*, size);
 512   for (int i = 0 ; i < size; i++) {
 513     _par_cleanup_thread_state[i] = new ParCleanupThreadState;
 514   }
 515 
 516   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 517   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 518 
 519   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 520   _active_tasks = _max_task_num;
 521   for (int i = 0; i < (int) _max_task_num; ++i) {
 522     CMTaskQueue* task_queue = new CMTaskQueue();
 523     task_queue->initialize();
 524     _task_queues->register_queue(i, task_queue);
 525 
 526     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 527     _accum_task_vtime[i] = 0.0;
 528   }
 529 
 530   if (ConcGCThreads > ParallelGCThreads) {
 531     vm_exit_during_initialization("Can't have more ConcGCThreads "
 532                                   "than ParallelGCThreads.");
 533   }
 534   if (ParallelGCThreads == 0) {
 535     // if we are not running with any parallel GC threads we will not
 536     // spawn any marking threads either
 537     _parallel_marking_threads =   0;
 538     _sleep_factor             = 0.0;
 539     _marking_task_overhead    = 1.0;
 540   } else {
 541     if (ConcGCThreads > 0) {
 542       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 543       // if both are set
 544 
 545       _parallel_marking_threads = ConcGCThreads;
 546       _sleep_factor             = 0.0;
 547       _marking_task_overhead    = 1.0;
 548     } else if (G1MarkingOverheadPercent > 0) {
 549       // we will calculate the number of parallel marking threads
 550       // based on a target overhead with respect to the soft real-time
 551       // goal
 552 
 553       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 554       double overall_cm_overhead =
 555         (double) MaxGCPauseMillis * marking_overhead /
 556         (double) GCPauseIntervalMillis;
 557       double cpu_ratio = 1.0 / (double) os::processor_count();
 558       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 559       double marking_task_overhead =
 560         overall_cm_overhead / marking_thread_num *
 561                                                 (double) os::processor_count();
 562       double sleep_factor =
 563                          (1.0 - marking_task_overhead) / marking_task_overhead;
 564 
 565       _parallel_marking_threads = (size_t) marking_thread_num;
 566       _sleep_factor             = sleep_factor;
 567       _marking_task_overhead    = marking_task_overhead;
 568     } else {
 569       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 570       _sleep_factor             = 0.0;
 571       _marking_task_overhead    = 1.0;
 572     }
 573 
 574     if (parallel_marking_threads() > 1)
 575       _cleanup_task_overhead = 1.0;
 576     else
 577       _cleanup_task_overhead = marking_task_overhead();
 578     _cleanup_sleep_factor =
 579                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 580 
 581 #if 0
 582     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 583     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 584     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 585     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 586     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 587 #endif
 588 
 589     guarantee(parallel_marking_threads() > 0, "peace of mind");
 590     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 591          (int) _parallel_marking_threads, false, true);
 592     if (_parallel_workers == NULL) {
 593       vm_exit_during_initialization("Failed necessary allocation.");
 594     } else {
 595       _parallel_workers->initialize_workers();
 596     }
 597   }
 598 
 599   // so that the call below can read a sensible value
 600   _heap_start = (HeapWord*) rs.base();
 601   set_non_marking_state();
 602 }
 603 
 604 void ConcurrentMark::update_g1_committed(bool force) {
 605   // If concurrent marking is not in progress, then we do not need to
 606   // update _heap_end. This has a subtle and important
 607   // side-effect. Imagine that two evacuation pauses happen between
 608   // marking completion and remark. The first one can grow the
 609   // heap (hence now the finger is below the heap end). Then, the
 610   // second one could unnecessarily push regions on the region
 611   // stack. This causes the invariant that the region stack is empty
 612   // at the beginning of remark to be false. By ensuring that we do
 613   // not observe heap expansions after marking is complete, then we do
 614   // not have this problem.
 615   if (!concurrent_marking_in_progress() && !force)
 616     return;
 617 
 618   MemRegion committed = _g1h->g1_committed();
 619   assert(committed.start() == _heap_start, "start shouldn't change");
 620   HeapWord* new_end = committed.end();
 621   if (new_end > _heap_end) {
 622     // The heap has been expanded.
 623 
 624     _heap_end = new_end;
 625   }
 626   // Notice that the heap can also shrink. However, this only happens
 627   // during a Full GC (at least currently) and the entire marking
 628   // phase will bail out and the task will not be restarted. So, let's
 629   // do nothing.
 630 }
 631 
 632 void ConcurrentMark::reset() {
 633   // Starting values for these two. This should be called in a STW
 634   // phase. CM will be notified of any future g1_committed expansions
 635   // will be at the end of evacuation pauses, when tasks are
 636   // inactive.
 637   MemRegion committed = _g1h->g1_committed();
 638   _heap_start = committed.start();
 639   _heap_end   = committed.end();
 640 
 641   // Separated the asserts so that we know which one fires.
 642   assert(_heap_start != NULL, "heap bounds should look ok");
 643   assert(_heap_end != NULL, "heap bounds should look ok");
 644   assert(_heap_start < _heap_end, "heap bounds should look ok");
 645 
 646   // reset all the marking data structures and any necessary flags
 647   clear_marking_state();
 648 
 649   if (verbose_low())
 650     gclog_or_tty->print_cr("[global] resetting");
 651 
 652   // We do reset all of them, since different phases will use
 653   // different number of active threads. So, it's easiest to have all
 654   // of them ready.
 655   for (int i = 0; i < (int) _max_task_num; ++i) {
 656     _tasks[i]->reset(_nextMarkBitMap);
 657   }
 658 
 659   // we need this to make sure that the flag is on during the evac
 660   // pause with initial mark piggy-backed
 661   set_concurrent_marking_in_progress();
 662 }
 663 
 664 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 665   assert(active_tasks <= _max_task_num, "we should not have more");
 666 
 667   _active_tasks = active_tasks;
 668   // Need to update the three data structures below according to the
 669   // number of active threads for this phase.
 670   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 671   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 672   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 673 
 674   _concurrent = concurrent;
 675   // We propagate this to all tasks, not just the active ones.
 676   for (int i = 0; i < (int) _max_task_num; ++i)
 677     _tasks[i]->set_concurrent(concurrent);
 678 
 679   if (concurrent) {
 680     set_concurrent_marking_in_progress();
 681   } else {
 682     // We currently assume that the concurrent flag has been set to
 683     // false before we start remark. At this point we should also be
 684     // in a STW phase.
 685     assert(!concurrent_marking_in_progress(), "invariant");
 686     assert(_finger == _heap_end, "only way to get here");
 687     update_g1_committed(true);
 688   }
 689 }
 690 
 691 void ConcurrentMark::set_non_marking_state() {
 692   // We set the global marking state to some default values when we're
 693   // not doing marking.
 694   clear_marking_state();
 695   _active_tasks = 0;
 696   clear_concurrent_marking_in_progress();
 697 }
 698 
 699 ConcurrentMark::~ConcurrentMark() {
 700   int size = (int) MAX2(ParallelGCThreads, (size_t)1);
 701   for (int i = 0; i < size; i++) delete _par_cleanup_thread_state[i];
 702   FREE_C_HEAP_ARRAY(ParCleanupThreadState*,
 703                     _par_cleanup_thread_state);
 704 
 705   for (int i = 0; i < (int) _max_task_num; ++i) {
 706     delete _task_queues->queue(i);
 707     delete _tasks[i];
 708   }
 709   delete _task_queues;
 710   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 711 }
 712 
 713 // This closure is used to mark refs into the g1 generation
 714 // from external roots in the CMS bit map.
 715 // Called at the first checkpoint.
 716 //
 717 
 718 void ConcurrentMark::clearNextBitmap() {
 719   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 720   G1CollectorPolicy* g1p = g1h->g1_policy();
 721 
 722   // Make sure that the concurrent mark thread looks to still be in
 723   // the current cycle.
 724   guarantee(cmThread()->during_cycle(), "invariant");
 725 
 726   // We are finishing up the current cycle by clearing the next
 727   // marking bitmap and getting it ready for the next cycle. During
 728   // this time no other cycle can start. So, let's make sure that this
 729   // is the case.
 730   guarantee(!g1h->mark_in_progress(), "invariant");
 731 
 732   // clear the mark bitmap (no grey objects to start with).
 733   // We need to do this in chunks and offer to yield in between
 734   // each chunk.
 735   HeapWord* start  = _nextMarkBitMap->startWord();
 736   HeapWord* end    = _nextMarkBitMap->endWord();
 737   HeapWord* cur    = start;
 738   size_t chunkSize = M;
 739   while (cur < end) {
 740     HeapWord* next = cur + chunkSize;
 741     if (next > end)
 742       next = end;
 743     MemRegion mr(cur,next);
 744     _nextMarkBitMap->clearRange(mr);
 745     cur = next;
 746     do_yield_check();
 747 
 748     // Repeat the asserts from above. We'll do them as asserts here to
 749     // minimize their overhead on the product. However, we'll have
 750     // them as guarantees at the beginning / end of the bitmap
 751     // clearing to get some checking in the product.
 752     assert(cmThread()->during_cycle(), "invariant");
 753     assert(!g1h->mark_in_progress(), "invariant");
 754   }
 755 
 756   // Repeat the asserts from above.
 757   guarantee(cmThread()->during_cycle(), "invariant");
 758   guarantee(!g1h->mark_in_progress(), "invariant");
 759 }
 760 
 761 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 762 public:
 763   bool doHeapRegion(HeapRegion* r) {
 764     if (!r->continuesHumongous()) {
 765       r->note_start_of_marking(true);
 766     }
 767     return false;
 768   }
 769 };
 770 
 771 void ConcurrentMark::checkpointRootsInitialPre() {
 772   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 773   G1CollectorPolicy* g1p = g1h->g1_policy();
 774 
 775   _has_aborted = false;
 776 
 777 #ifndef PRODUCT
 778   if (G1PrintReachableAtInitialMark) {
 779     print_reachable("at-cycle-start",
 780                     true /* use_prev_marking */, true /* all */);
 781   }
 782 #endif
 783 
 784   // Initialise marking structures. This has to be done in a STW phase.
 785   reset();
 786 }
 787 
 788 class CMMarkRootsClosure: public OopsInGenClosure {
 789 private:
 790   ConcurrentMark*  _cm;
 791   G1CollectedHeap* _g1h;
 792   bool             _do_barrier;
 793 
 794 public:
 795   CMMarkRootsClosure(ConcurrentMark* cm,
 796                      G1CollectedHeap* g1h,
 797                      bool do_barrier) : _cm(cm), _g1h(g1h),
 798                                         _do_barrier(do_barrier) { }
 799 
 800   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 801   virtual void do_oop(      oop* p) { do_oop_work(p); }
 802 
 803   template <class T> void do_oop_work(T* p) {
 804     T heap_oop = oopDesc::load_heap_oop(p);
 805     if (!oopDesc::is_null(heap_oop)) {
 806       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 807       assert(obj->is_oop() || obj->mark() == NULL,
 808              "expected an oop, possibly with mark word displaced");
 809       HeapWord* addr = (HeapWord*)obj;
 810       if (_g1h->is_in_g1_reserved(addr)) {
 811         _cm->grayRoot(obj);
 812       }
 813     }
 814     if (_do_barrier) {
 815       assert(!_g1h->is_in_g1_reserved(p),
 816              "Should be called on external roots");
 817       do_barrier(p);
 818     }
 819   }
 820 };
 821 
 822 void ConcurrentMark::checkpointRootsInitialPost() {
 823   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 824 
 825   // For each region note start of marking.
 826   NoteStartOfMarkHRClosure startcl;
 827   g1h->heap_region_iterate(&startcl);
 828 
 829   // Start weak-reference discovery.
 830   ReferenceProcessor* rp = g1h->ref_processor();
 831   rp->verify_no_references_recorded();
 832   rp->enable_discovery(); // enable ("weak") refs discovery
 833   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 834 
 835   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 836   // This is the start of  the marking cycle, we're expected all
 837   // threads to have SATB queues with active set to false.
 838   satb_mq_set.set_active_all_threads(true, /* new active value */
 839                                      false /* expected_active */);
 840 
 841   // update_g1_committed() will be called at the end of an evac pause
 842   // when marking is on. So, it's also called at the end of the
 843   // initial-mark pause to update the heap end, if the heap expands
 844   // during it. No need to call it here.
 845 }
 846 
 847 // Checkpoint the roots into this generation from outside
 848 // this generation. [Note this initial checkpoint need only
 849 // be approximate -- we'll do a catch up phase subsequently.]
 850 void ConcurrentMark::checkpointRootsInitial() {
 851   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 852   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 853 
 854   double start = os::elapsedTime();
 855 
 856   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 857   g1p->record_concurrent_mark_init_start();
 858   checkpointRootsInitialPre();
 859 
 860   // YSR: when concurrent precleaning is in place, we'll
 861   // need to clear the cached card table here
 862 
 863   ResourceMark rm;
 864   HandleMark  hm;
 865 
 866   g1h->ensure_parsability(false);
 867   g1h->perm_gen()->save_marks();
 868 
 869   CMMarkRootsClosure notOlder(this, g1h, false);
 870   CMMarkRootsClosure older(this, g1h, true);
 871 
 872   g1h->set_marking_started();
 873   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 874 
 875   g1h->process_strong_roots(true,    // activate StrongRootsScope
 876                             false,   // fake perm gen collection
 877                             SharedHeap::SO_AllClasses,
 878                             &notOlder, // Regular roots
 879                             NULL,     // do not visit active blobs
 880                             &older    // Perm Gen Roots
 881                             );
 882   checkpointRootsInitialPost();
 883 
 884   // Statistics.
 885   double end = os::elapsedTime();
 886   _init_times.add((end - start) * 1000.0);
 887 
 888   g1p->record_concurrent_mark_init_end();
 889 }
 890 
 891 /*
 892    Notice that in the next two methods, we actually leave the STS
 893    during the barrier sync and join it immediately afterwards. If we
 894    do not do this, this then the following deadlock can occur: one
 895    thread could be in the barrier sync code, waiting for the other
 896    thread to also sync up, whereas another one could be trying to
 897    yield, while also waiting for the other threads to sync up too.
 898 
 899    Because the thread that does the sync barrier has left the STS, it
 900    is possible to be suspended for a Full GC or an evacuation pause
 901    could occur. This is actually safe, since the entering the sync
 902    barrier is one of the last things do_marking_step() does, and it
 903    doesn't manipulate any data structures afterwards.
 904 */
 905 
 906 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 907   if (verbose_low())
 908     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 909 
 910   ConcurrentGCThread::stsLeave();
 911   _first_overflow_barrier_sync.enter();
 912   ConcurrentGCThread::stsJoin();
 913   // at this point everyone should have synced up and not be doing any
 914   // more work
 915 
 916   if (verbose_low())
 917     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 918 
 919   // let task 0 do this
 920   if (task_num == 0) {
 921     // task 0 is responsible for clearing the global data structures
 922     clear_marking_state();
 923 
 924     if (PrintGC) {
 925       gclog_or_tty->date_stamp(PrintGCDateStamps);
 926       gclog_or_tty->stamp(PrintGCTimeStamps);
 927       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 928     }
 929   }
 930 
 931   // after this, each task should reset its own data structures then
 932   // then go into the second barrier
 933 }
 934 
 935 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 936   if (verbose_low())
 937     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 938 
 939   ConcurrentGCThread::stsLeave();
 940   _second_overflow_barrier_sync.enter();
 941   ConcurrentGCThread::stsJoin();
 942   // at this point everything should be re-initialised and ready to go
 943 
 944   if (verbose_low())
 945     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 946 }
 947 
 948 void ConcurrentMark::grayRoot(oop p) {
 949   HeapWord* addr = (HeapWord*) p;
 950   // We can't really check against _heap_start and _heap_end, since it
 951   // is possible during an evacuation pause with piggy-backed
 952   // initial-mark that the committed space is expanded during the
 953   // pause without CM observing this change. So the assertions below
 954   // is a bit conservative; but better than nothing.
 955   assert(_g1h->g1_committed().contains(addr),
 956          "address should be within the heap bounds");
 957 
 958   if (!_nextMarkBitMap->isMarked(addr))
 959     _nextMarkBitMap->parMark(addr);
 960 }
 961 
 962 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 963   // The objects on the region have already been marked "in bulk" by
 964   // the caller. We only need to decide whether to push the region on
 965   // the region stack or not.
 966 
 967   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 968     // We're done with marking and waiting for remark. We do not need to
 969     // push anything else on the region stack.
 970     return;
 971 
 972   HeapWord* finger = _finger;
 973 
 974   if (verbose_low())
 975     gclog_or_tty->print_cr("[global] attempting to push "
 976                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 977                            PTR_FORMAT, mr.start(), mr.end(), finger);
 978 
 979   if (mr.start() < finger) {
 980     // The finger is always heap region aligned and it is not possible
 981     // for mr to span heap regions.
 982     assert(mr.end() <= finger, "invariant");
 983 
 984     // Separated the asserts so that we know which one fires.
 985     assert(mr.start() <= mr.end(),
 986            "region boundaries should fall within the committed space");
 987     assert(_heap_start <= mr.start(),
 988            "region boundaries should fall within the committed space");
 989     assert(mr.end() <= _heap_end,
 990            "region boundaries should fall within the committed space");
 991     if (verbose_low())
 992       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 993                              "below the finger, pushing it",
 994                              mr.start(), mr.end());
 995 
 996     if (!region_stack_push_lock_free(mr)) {
 997       if (verbose_low())
 998         gclog_or_tty->print_cr("[global] region stack has overflown.");
 999     }
1000   }
1001 }
1002 
1003 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1004   // The object is not marked by the caller. We need to at least mark
1005   // it and maybe push in on the stack.
1006 
1007   HeapWord* addr = (HeapWord*)p;
1008   if (!_nextMarkBitMap->isMarked(addr)) {
1009     // We definitely need to mark it, irrespective whether we bail out
1010     // because we're done with marking.
1011     if (_nextMarkBitMap->parMark(addr)) {
1012       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1013         // If we're done with concurrent marking and we're waiting for
1014         // remark, then we're not pushing anything on the stack.
1015         return;
1016 
1017       // No OrderAccess:store_load() is needed. It is implicit in the
1018       // CAS done in parMark(addr) above
1019       HeapWord* finger = _finger;
1020 
1021       if (addr < finger) {
1022         if (!mark_stack_push(oop(addr))) {
1023           if (verbose_low())
1024             gclog_or_tty->print_cr("[global] global stack overflow "
1025                                    "during parMark");
1026         }
1027       }
1028     }
1029   }
1030 }
1031 
1032 class CMConcurrentMarkingTask: public AbstractGangTask {
1033 private:
1034   ConcurrentMark*       _cm;
1035   ConcurrentMarkThread* _cmt;
1036 
1037 public:
1038   void work(int worker_i) {
1039     assert(Thread::current()->is_ConcurrentGC_thread(),
1040            "this should only be done by a conc GC thread");
1041 
1042     double start_vtime = os::elapsedVTime();
1043 
1044     ConcurrentGCThread::stsJoin();
1045 
1046     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1047     CMTask* the_task = _cm->task(worker_i);
1048     the_task->record_start_time();
1049     if (!_cm->has_aborted()) {
1050       do {
1051         double start_vtime_sec = os::elapsedVTime();
1052         double start_time_sec = os::elapsedTime();
1053         the_task->do_marking_step(10.0);
1054         double end_time_sec = os::elapsedTime();
1055         double end_vtime_sec = os::elapsedVTime();
1056         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1057         double elapsed_time_sec = end_time_sec - start_time_sec;
1058         _cm->clear_has_overflown();
1059 
1060         bool ret = _cm->do_yield_check(worker_i);
1061 
1062         jlong sleep_time_ms;
1063         if (!_cm->has_aborted() && the_task->has_aborted()) {
1064           sleep_time_ms =
1065             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1066           ConcurrentGCThread::stsLeave();
1067           os::sleep(Thread::current(), sleep_time_ms, false);
1068           ConcurrentGCThread::stsJoin();
1069         }
1070         double end_time2_sec = os::elapsedTime();
1071         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1072 
1073 #if 0
1074           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1075                                  "overhead %1.4lf",
1076                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1077                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1078           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1079                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1080 #endif
1081       } while (!_cm->has_aborted() && the_task->has_aborted());
1082     }
1083     the_task->record_end_time();
1084     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1085 
1086     ConcurrentGCThread::stsLeave();
1087 
1088     double end_vtime = os::elapsedVTime();
1089     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1090   }
1091 
1092   CMConcurrentMarkingTask(ConcurrentMark* cm,
1093                           ConcurrentMarkThread* cmt) :
1094       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1095 
1096   ~CMConcurrentMarkingTask() { }
1097 };
1098 
1099 void ConcurrentMark::markFromRoots() {
1100   // we might be tempted to assert that:
1101   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1102   //        "inconsistent argument?");
1103   // However that wouldn't be right, because it's possible that
1104   // a safepoint is indeed in progress as a younger generation
1105   // stop-the-world GC happens even as we mark in this generation.
1106 
1107   _restart_for_overflow = false;
1108 
1109   set_phase(MAX2((size_t) 1, parallel_marking_threads()), true);
1110 
1111   CMConcurrentMarkingTask markingTask(this, cmThread());
1112   if (parallel_marking_threads() > 0)
1113     _parallel_workers->run_task(&markingTask);
1114   else
1115     markingTask.work(0);
1116   print_stats();
1117 }
1118 
1119 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1120   // world is stopped at this checkpoint
1121   assert(SafepointSynchronize::is_at_safepoint(),
1122          "world should be stopped");
1123   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1124 
1125   // If a full collection has happened, we shouldn't do this.
1126   if (has_aborted()) {
1127     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1128     return;
1129   }
1130 
1131   if (VerifyDuringGC) {
1132     HandleMark hm;  // handle scope
1133     gclog_or_tty->print(" VerifyDuringGC:(before)");
1134     Universe::heap()->prepare_for_verify();
1135     Universe::verify(true, false, true);
1136   }
1137 
1138   G1CollectorPolicy* g1p = g1h->g1_policy();
1139   g1p->record_concurrent_mark_remark_start();
1140 
1141   double start = os::elapsedTime();
1142 
1143   checkpointRootsFinalWork();
1144 
1145   double mark_work_end = os::elapsedTime();
1146 
1147   weakRefsWork(clear_all_soft_refs);
1148 
1149   if (has_overflown()) {
1150     // Oops.  We overflowed.  Restart concurrent marking.
1151     _restart_for_overflow = true;
1152     // Clear the flag. We do not need it any more.
1153     clear_has_overflown();
1154     if (G1TraceMarkStackOverflow)
1155       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1156   } else {
1157     // We're done with marking.
1158     // This is the end of  the marking cycle, we're expected all
1159     // threads to have SATB queues with active set to true.
1160     JavaThread::satb_mark_queue_set().set_active_all_threads(
1161                                                   false, /* new active value */
1162                                                   true /* expected_active */);
1163 
1164     if (VerifyDuringGC) {
1165       HandleMark hm;  // handle scope
1166       gclog_or_tty->print(" VerifyDuringGC:(after)");
1167       Universe::heap()->prepare_for_verify();
1168       Universe::heap()->verify(/* allow_dirty */      true,
1169                                /* silent */           false,
1170                                /* use_prev_marking */ false);
1171     }
1172   }
1173 
1174 #if VERIFY_OBJS_PROCESSED
1175   _scan_obj_cl.objs_processed = 0;
1176   ThreadLocalObjQueue::objs_enqueued = 0;
1177 #endif
1178 
1179   // Statistics
1180   double now = os::elapsedTime();
1181   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1182   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1183   _remark_times.add((now - start) * 1000.0);
1184 
1185   g1p->record_concurrent_mark_remark_end();
1186 }
1187 
1188 
1189 #define CARD_BM_TEST_MODE 0
1190 
1191 class CalcLiveObjectsClosure: public HeapRegionClosure {
1192 
1193   CMBitMapRO* _bm;
1194   ConcurrentMark* _cm;
1195   bool _changed;
1196   bool _yield;
1197   size_t _words_done;
1198   size_t _tot_live;
1199   size_t _tot_used;
1200   size_t _regions_done;
1201   double _start_vtime_sec;
1202 
1203   BitMap* _region_bm;
1204   BitMap* _card_bm;
1205   intptr_t _bottom_card_num;
1206   bool _final;
1207 
1208   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1209     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1210 #if CARD_BM_TEST_MODE
1211       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1212 #else
1213       _card_bm->par_at_put(i - _bottom_card_num, 1);
1214 #endif
1215     }
1216   }
1217 
1218 public:
1219   CalcLiveObjectsClosure(bool final,
1220                          CMBitMapRO *bm, ConcurrentMark *cm,
1221                          BitMap* region_bm, BitMap* card_bm) :
1222     _bm(bm), _cm(cm), _changed(false), _yield(true),
1223     _words_done(0), _tot_live(0), _tot_used(0),
1224     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1225     _regions_done(0), _start_vtime_sec(0.0)
1226   {
1227     _bottom_card_num =
1228       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1229                CardTableModRefBS::card_shift);
1230   }
1231 
1232   // It takes a region that's not empty (i.e., it has at least one
1233   // live object in it and sets its corresponding bit on the region
1234   // bitmap to 1. If the region is "starts humongous" it will also set
1235   // to 1 the bits on the region bitmap that correspond to its
1236   // associated "continues humongous" regions.
1237   void set_bit_for_region(HeapRegion* hr) {
1238     assert(!hr->continuesHumongous(), "should have filtered those out");
1239 
1240     size_t index = hr->hrs_index();
1241     if (!hr->startsHumongous()) {
1242       // Normal (non-humongous) case: just set the bit.
1243       _region_bm->par_at_put((BitMap::idx_t) index, true);
1244     } else {
1245       // Starts humongous case: calculate how many regions are part of
1246       // this humongous region and then set the bit range. It might
1247       // have been a bit more efficient to look at the object that
1248       // spans these humongous regions to calculate their number from
1249       // the object's size. However, it's a good idea to calculate
1250       // this based on the metadata itself, and not the region
1251       // contents, so that this code is not aware of what goes into
1252       // the humongous regions (in case this changes in the future).
1253       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1254       size_t end_index = index + 1;
1255       while (end_index < g1h->n_regions()) {
1256         HeapRegion* chr = g1h->region_at(end_index);
1257         if (!chr->continuesHumongous()) {
1258           break;
1259         }
1260         end_index += 1;
1261       }
1262       _region_bm->par_at_put_range((BitMap::idx_t) index,
1263                                    (BitMap::idx_t) end_index, true);
1264     }
1265   }
1266 
1267   bool doHeapRegion(HeapRegion* hr) {
1268     if (!_final && _regions_done == 0)
1269       _start_vtime_sec = os::elapsedVTime();
1270 
1271     if (hr->continuesHumongous()) {
1272       // We will ignore these here and process them when their
1273       // associated "starts humongous" region is processed (see
1274       // set_bit_for_heap_region()). Note that we cannot rely on their
1275       // associated "starts humongous" region to have their bit set to
1276       // 1 since, due to the region chunking in the parallel region
1277       // iteration, a "continues humongous" region might be visited
1278       // before its associated "starts humongous".
1279       return false;
1280     }
1281 
1282     HeapWord* nextTop = hr->next_top_at_mark_start();
1283     HeapWord* start   = hr->top_at_conc_mark_count();
1284     assert(hr->bottom() <= start && start <= hr->end() &&
1285            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1286            start <= nextTop,
1287            "Preconditions.");
1288     // Otherwise, record the number of word's we'll examine.
1289     size_t words_done = (nextTop - start);
1290     // Find the first marked object at or after "start".
1291     start = _bm->getNextMarkedWordAddress(start, nextTop);
1292     size_t marked_bytes = 0;
1293 
1294     // Below, the term "card num" means the result of shifting an address
1295     // by the card shift -- address 0 corresponds to card number 0.  One
1296     // must subtract the card num of the bottom of the heap to obtain a
1297     // card table index.
1298     // The first card num of the sequence of live cards currently being
1299     // constructed.  -1 ==> no sequence.
1300     intptr_t start_card_num = -1;
1301     // The last card num of the sequence of live cards currently being
1302     // constructed.  -1 ==> no sequence.
1303     intptr_t last_card_num = -1;
1304 
1305     while (start < nextTop) {
1306       if (_yield && _cm->do_yield_check()) {
1307         // We yielded.  It might be for a full collection, in which case
1308         // all bets are off; terminate the traversal.
1309         if (_cm->has_aborted()) {
1310           _changed = false;
1311           return true;
1312         } else {
1313           // Otherwise, it might be a collection pause, and the region
1314           // we're looking at might be in the collection set.  We'll
1315           // abandon this region.
1316           return false;
1317         }
1318       }
1319       oop obj = oop(start);
1320       int obj_sz = obj->size();
1321       // The card num of the start of the current object.
1322       intptr_t obj_card_num =
1323         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1324 
1325       HeapWord* obj_last = start + obj_sz - 1;
1326       intptr_t obj_last_card_num =
1327         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1328 
1329       if (obj_card_num != last_card_num) {
1330         if (start_card_num == -1) {
1331           assert(last_card_num == -1, "Both or neither.");
1332           start_card_num = obj_card_num;
1333         } else {
1334           assert(last_card_num != -1, "Both or neither.");
1335           assert(obj_card_num >= last_card_num, "Inv");
1336           if ((obj_card_num - last_card_num) > 1) {
1337             // Mark the last run, and start a new one.
1338             mark_card_num_range(start_card_num, last_card_num);
1339             start_card_num = obj_card_num;
1340           }
1341         }
1342 #if CARD_BM_TEST_MODE
1343         /*
1344         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1345                                obj_card_num - _bottom_card_num,
1346                                obj_last_card_num - _bottom_card_num);
1347         */
1348         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1349           _card_bm->par_at_put(j - _bottom_card_num, 1);
1350         }
1351 #endif
1352       }
1353       // In any case, we set the last card num.
1354       last_card_num = obj_last_card_num;
1355 
1356       marked_bytes += (size_t)obj_sz * HeapWordSize;
1357       // Find the next marked object after this one.
1358       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1359       _changed = true;
1360     }
1361     // Handle the last range, if any.
1362     if (start_card_num != -1)
1363       mark_card_num_range(start_card_num, last_card_num);
1364     if (_final) {
1365       // Mark the allocated-since-marking portion...
1366       HeapWord* tp = hr->top();
1367       if (nextTop < tp) {
1368         start_card_num =
1369           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1370         last_card_num =
1371           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1372         mark_card_num_range(start_card_num, last_card_num);
1373         // This definitely means the region has live objects.
1374         set_bit_for_region(hr);
1375       }
1376     }
1377 
1378     hr->add_to_marked_bytes(marked_bytes);
1379     // Update the live region bitmap.
1380     if (marked_bytes > 0) {
1381       set_bit_for_region(hr);
1382     }
1383     hr->set_top_at_conc_mark_count(nextTop);
1384     _tot_live += hr->next_live_bytes();
1385     _tot_used += hr->used();
1386     _words_done = words_done;
1387 
1388     if (!_final) {
1389       ++_regions_done;
1390       if (_regions_done % 10 == 0) {
1391         double end_vtime_sec = os::elapsedVTime();
1392         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1393         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1394           jlong sleep_time_ms =
1395             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1396           os::sleep(Thread::current(), sleep_time_ms, false);
1397           _start_vtime_sec = end_vtime_sec;
1398         }
1399       }
1400     }
1401 
1402     return false;
1403   }
1404 
1405   bool changed() { return _changed;  }
1406   void reset()   { _changed = false; _words_done = 0; }
1407   void no_yield() { _yield = false; }
1408   size_t words_done() { return _words_done; }
1409   size_t tot_live() { return _tot_live; }
1410   size_t tot_used() { return _tot_used; }
1411 };
1412 
1413 
1414 void ConcurrentMark::calcDesiredRegions() {
1415   _region_bm.clear();
1416   _card_bm.clear();
1417   CalcLiveObjectsClosure calccl(false /*final*/,
1418                                 nextMarkBitMap(), this,
1419                                 &_region_bm, &_card_bm);
1420   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1421   g1h->heap_region_iterate(&calccl);
1422 
1423   do {
1424     calccl.reset();
1425     g1h->heap_region_iterate(&calccl);
1426   } while (calccl.changed());
1427 }
1428 
1429 class G1ParFinalCountTask: public AbstractGangTask {
1430 protected:
1431   G1CollectedHeap* _g1h;
1432   CMBitMap* _bm;
1433   size_t _n_workers;
1434   size_t *_live_bytes;
1435   size_t *_used_bytes;
1436   BitMap* _region_bm;
1437   BitMap* _card_bm;
1438 public:
1439   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1440                       BitMap* region_bm, BitMap* card_bm) :
1441     AbstractGangTask("G1 final counting"), _g1h(g1h),
1442     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1443   {
1444     if (ParallelGCThreads > 0)
1445       _n_workers = _g1h->workers()->total_workers();
1446     else
1447       _n_workers = 1;
1448     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1449     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1450   }
1451 
1452   ~G1ParFinalCountTask() {
1453     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1454     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1455   }
1456 
1457   void work(int i) {
1458     CalcLiveObjectsClosure calccl(true /*final*/,
1459                                   _bm, _g1h->concurrent_mark(),
1460                                   _region_bm, _card_bm);
1461     calccl.no_yield();
1462     if (G1CollectedHeap::use_parallel_gc_threads()) {
1463       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1464                                             HeapRegion::FinalCountClaimValue);
1465     } else {
1466       _g1h->heap_region_iterate(&calccl);
1467     }
1468     assert(calccl.complete(), "Shouldn't have yielded!");
1469 
1470     assert((size_t) i < _n_workers, "invariant");
1471     _live_bytes[i] = calccl.tot_live();
1472     _used_bytes[i] = calccl.tot_used();
1473   }
1474   size_t live_bytes()  {
1475     size_t live_bytes = 0;
1476     for (size_t i = 0; i < _n_workers; ++i)
1477       live_bytes += _live_bytes[i];
1478     return live_bytes;
1479   }
1480   size_t used_bytes()  {
1481     size_t used_bytes = 0;
1482     for (size_t i = 0; i < _n_workers; ++i)
1483       used_bytes += _used_bytes[i];
1484     return used_bytes;
1485   }
1486 };
1487 
1488 class G1ParNoteEndTask;
1489 
1490 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1491   G1CollectedHeap* _g1;
1492   int _worker_num;
1493   size_t _max_live_bytes;
1494   size_t _regions_claimed;
1495   size_t _freed_bytes;
1496   size_t _cleared_h_regions;
1497   size_t _freed_regions;
1498   UncleanRegionList* _unclean_region_list;
1499   double _claimed_region_time;
1500   double _max_region_time;
1501 
1502 public:
1503   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1504                              UncleanRegionList* list,
1505                              int worker_num);
1506   size_t freed_bytes() { return _freed_bytes; }
1507   size_t cleared_h_regions() { return _cleared_h_regions; }
1508   size_t freed_regions() { return  _freed_regions; }
1509   UncleanRegionList* unclean_region_list() {
1510     return _unclean_region_list;
1511   }
1512 
1513   bool doHeapRegion(HeapRegion *r);
1514 
1515   size_t max_live_bytes() { return _max_live_bytes; }
1516   size_t regions_claimed() { return _regions_claimed; }
1517   double claimed_region_time_sec() { return _claimed_region_time; }
1518   double max_region_time_sec() { return _max_region_time; }
1519 };
1520 
1521 class G1ParNoteEndTask: public AbstractGangTask {
1522   friend class G1NoteEndOfConcMarkClosure;
1523 protected:
1524   G1CollectedHeap* _g1h;
1525   size_t _max_live_bytes;
1526   size_t _freed_bytes;
1527   ConcurrentMark::ParCleanupThreadState** _par_cleanup_thread_state;
1528 public:
1529   G1ParNoteEndTask(G1CollectedHeap* g1h,
1530                    ConcurrentMark::ParCleanupThreadState**
1531                    par_cleanup_thread_state) :
1532     AbstractGangTask("G1 note end"), _g1h(g1h),
1533     _max_live_bytes(0), _freed_bytes(0),
1534     _par_cleanup_thread_state(par_cleanup_thread_state)
1535   {}
1536 
1537   void work(int i) {
1538     double start = os::elapsedTime();
1539     G1NoteEndOfConcMarkClosure g1_note_end(_g1h,
1540                                            &_par_cleanup_thread_state[i]->list,
1541                                            i);
1542     if (G1CollectedHeap::use_parallel_gc_threads()) {
1543       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1544                                             HeapRegion::NoteEndClaimValue);
1545     } else {
1546       _g1h->heap_region_iterate(&g1_note_end);
1547     }
1548     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1549 
1550     // Now finish up freeing the current thread's regions.
1551     _g1h->finish_free_region_work(g1_note_end.freed_bytes(),
1552                                   g1_note_end.cleared_h_regions(),
1553                                   0, NULL);
1554     {
1555       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1556       _max_live_bytes += g1_note_end.max_live_bytes();
1557       _freed_bytes += g1_note_end.freed_bytes();
1558     }
1559     double end = os::elapsedTime();
1560     if (G1PrintParCleanupStats) {
1561       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1562                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1563                           i, start, end, (end-start)*1000.0,
1564                           g1_note_end.regions_claimed(),
1565                           g1_note_end.claimed_region_time_sec()*1000.0,
1566                           g1_note_end.max_region_time_sec()*1000.0);
1567     }
1568   }
1569   size_t max_live_bytes() { return _max_live_bytes; }
1570   size_t freed_bytes() { return _freed_bytes; }
1571 };
1572 
1573 class G1ParScrubRemSetTask: public AbstractGangTask {
1574 protected:
1575   G1RemSet* _g1rs;
1576   BitMap* _region_bm;
1577   BitMap* _card_bm;
1578 public:
1579   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1580                        BitMap* region_bm, BitMap* card_bm) :
1581     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1582     _region_bm(region_bm), _card_bm(card_bm)
1583   {}
1584 
1585   void work(int i) {
1586     if (G1CollectedHeap::use_parallel_gc_threads()) {
1587       _g1rs->scrub_par(_region_bm, _card_bm, i,
1588                        HeapRegion::ScrubRemSetClaimValue);
1589     } else {
1590       _g1rs->scrub(_region_bm, _card_bm);
1591     }
1592   }
1593 
1594 };
1595 
1596 G1NoteEndOfConcMarkClosure::
1597 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1598                            UncleanRegionList* list,
1599                            int worker_num)
1600   : _g1(g1), _worker_num(worker_num),
1601     _max_live_bytes(0), _regions_claimed(0),
1602     _freed_bytes(0), _cleared_h_regions(0), _freed_regions(0),
1603     _claimed_region_time(0.0), _max_region_time(0.0),
1604     _unclean_region_list(list)
1605 {}
1606 
1607 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *r) {
1608   // We use a claim value of zero here because all regions
1609   // were claimed with value 1 in the FinalCount task.
1610   r->reset_gc_time_stamp();
1611   if (!r->continuesHumongous()) {
1612     double start = os::elapsedTime();
1613     _regions_claimed++;
1614     r->note_end_of_marking();
1615     _max_live_bytes += r->max_live_bytes();
1616     _g1->free_region_if_totally_empty_work(r,
1617                                            _freed_bytes,
1618                                            _cleared_h_regions,
1619                                            _freed_regions,
1620                                            _unclean_region_list,
1621                                            true /*par*/);
1622     double region_time = (os::elapsedTime() - start);
1623     _claimed_region_time += region_time;
1624     if (region_time > _max_region_time) _max_region_time = region_time;
1625   }
1626   return false;
1627 }
1628 
1629 void ConcurrentMark::cleanup() {
1630   // world is stopped at this checkpoint
1631   assert(SafepointSynchronize::is_at_safepoint(),
1632          "world should be stopped");
1633   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1634 
1635   // If a full collection has happened, we shouldn't do this.
1636   if (has_aborted()) {
1637     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1638     return;
1639   }
1640 
1641   if (VerifyDuringGC) {
1642     HandleMark hm;  // handle scope
1643     gclog_or_tty->print(" VerifyDuringGC:(before)");
1644     Universe::heap()->prepare_for_verify();
1645     Universe::verify(/* allow dirty  */ true,
1646                      /* silent       */ false,
1647                      /* prev marking */ true);
1648   }
1649 
1650   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1651   g1p->record_concurrent_mark_cleanup_start();
1652 
1653   double start = os::elapsedTime();
1654 
1655   // Do counting once more with the world stopped for good measure.
1656   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1657                                         &_region_bm, &_card_bm);
1658   if (G1CollectedHeap::use_parallel_gc_threads()) {
1659     assert(g1h->check_heap_region_claim_values(
1660                                                HeapRegion::InitialClaimValue),
1661            "sanity check");
1662 
1663     int n_workers = g1h->workers()->total_workers();
1664     g1h->set_par_threads(n_workers);
1665     g1h->workers()->run_task(&g1_par_count_task);
1666     g1h->set_par_threads(0);
1667 
1668     assert(g1h->check_heap_region_claim_values(
1669                                              HeapRegion::FinalCountClaimValue),
1670            "sanity check");
1671   } else {
1672     g1_par_count_task.work(0);
1673   }
1674 
1675   size_t known_garbage_bytes =
1676     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1677 #if 0
1678   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1679                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1680                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1681                          (double) known_garbage_bytes / (double) (1024 * 1024));
1682 #endif // 0
1683   g1p->set_known_garbage_bytes(known_garbage_bytes);
1684 
1685   size_t start_used_bytes = g1h->used();
1686   _at_least_one_mark_complete = true;
1687   g1h->set_marking_complete();
1688 
1689   double count_end = os::elapsedTime();
1690   double this_final_counting_time = (count_end - start);
1691   if (G1PrintParCleanupStats) {
1692     gclog_or_tty->print_cr("Cleanup:");
1693     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1694                            this_final_counting_time*1000.0);
1695   }
1696   _total_counting_time += this_final_counting_time;
1697 
1698   // Install newly created mark bitMap as "prev".
1699   swapMarkBitMaps();
1700 
1701   g1h->reset_gc_time_stamp();
1702 
1703   // Note end of marking in all heap regions.
1704   double note_end_start = os::elapsedTime();
1705   G1ParNoteEndTask g1_par_note_end_task(g1h, _par_cleanup_thread_state);
1706   if (G1CollectedHeap::use_parallel_gc_threads()) {
1707     int n_workers = g1h->workers()->total_workers();
1708     g1h->set_par_threads(n_workers);
1709     g1h->workers()->run_task(&g1_par_note_end_task);
1710     g1h->set_par_threads(0);
1711 
1712     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1713            "sanity check");
1714   } else {
1715     g1_par_note_end_task.work(0);
1716   }
1717   g1h->set_unclean_regions_coming(true);
1718   double note_end_end = os::elapsedTime();
1719   // Tell the mutators that there might be unclean regions coming...
1720   if (G1PrintParCleanupStats) {
1721     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1722                            (note_end_end - note_end_start)*1000.0);
1723   }
1724 
1725 
1726   // call below, since it affects the metric by which we sort the heap
1727   // regions.
1728   if (G1ScrubRemSets) {
1729     double rs_scrub_start = os::elapsedTime();
1730     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1731     if (G1CollectedHeap::use_parallel_gc_threads()) {
1732       int n_workers = g1h->workers()->total_workers();
1733       g1h->set_par_threads(n_workers);
1734       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1735       g1h->set_par_threads(0);
1736 
1737       assert(g1h->check_heap_region_claim_values(
1738                                             HeapRegion::ScrubRemSetClaimValue),
1739              "sanity check");
1740     } else {
1741       g1_par_scrub_rs_task.work(0);
1742     }
1743 
1744     double rs_scrub_end = os::elapsedTime();
1745     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1746     _total_rs_scrub_time += this_rs_scrub_time;
1747   }
1748 
1749   // this will also free any regions totally full of garbage objects,
1750   // and sort the regions.
1751   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1752                         g1_par_note_end_task.freed_bytes(),
1753                         g1_par_note_end_task.max_live_bytes());
1754 
1755   // Statistics.
1756   double end = os::elapsedTime();
1757   _cleanup_times.add((end - start) * 1000.0);
1758 
1759   // G1CollectedHeap::heap()->print();
1760   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1761   // G1CollectedHeap::heap()->get_gc_time_stamp());
1762 
1763   if (PrintGC || PrintGCDetails) {
1764     g1h->print_size_transition(gclog_or_tty,
1765                                start_used_bytes,
1766                                g1h->used(),
1767                                g1h->capacity());
1768   }
1769 
1770   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1771   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1772 
1773   // We need to make this be a "collection" so any collection pause that
1774   // races with it goes around and waits for completeCleanup to finish.
1775   g1h->increment_total_collections();
1776 
1777   if (VerifyDuringGC) {
1778     HandleMark hm;  // handle scope
1779     gclog_or_tty->print(" VerifyDuringGC:(after)");
1780     Universe::heap()->prepare_for_verify();
1781     Universe::verify(/* allow dirty  */ true,
1782                      /* silent       */ false,
1783                      /* prev marking */ true);
1784   }
1785 }
1786 
1787 void ConcurrentMark::completeCleanup() {
1788   // A full collection intervened.
1789   if (has_aborted()) return;
1790 
1791   int first = 0;
1792   int last = (int)MAX2(ParallelGCThreads, (size_t)1);
1793   for (int t = 0; t < last; t++) {
1794     UncleanRegionList* list = &_par_cleanup_thread_state[t]->list;
1795     assert(list->well_formed(), "Inv");
1796     HeapRegion* hd = list->hd();
1797     while (hd != NULL) {
1798       // Now finish up the other stuff.
1799       hd->rem_set()->clear();
1800       HeapRegion* next_hd = hd->next_from_unclean_list();
1801       (void)list->pop();
1802       assert(list->hd() == next_hd, "how not?");
1803       _g1h->put_region_on_unclean_list(hd);
1804       if (!hd->isHumongous()) {
1805         // Add this to the _free_regions count by 1.
1806         _g1h->finish_free_region_work(0, 0, 1, NULL);
1807       }
1808       hd = list->hd();
1809       assert(hd == next_hd, "how not?");
1810     }
1811   }
1812 }
1813 
1814 
1815 class G1CMIsAliveClosure: public BoolObjectClosure {
1816   G1CollectedHeap* _g1;
1817  public:
1818   G1CMIsAliveClosure(G1CollectedHeap* g1) :
1819     _g1(g1)
1820   {}
1821 
1822   void do_object(oop obj) {
1823     assert(false, "not to be invoked");
1824   }
1825   bool do_object_b(oop obj) {
1826     HeapWord* addr = (HeapWord*)obj;
1827     return addr != NULL &&
1828            (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1829   }
1830 };
1831 
1832 class G1CMKeepAliveClosure: public OopClosure {
1833   G1CollectedHeap* _g1;
1834   ConcurrentMark*  _cm;
1835   CMBitMap*        _bitMap;
1836  public:
1837   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1838                        CMBitMap* bitMap) :
1839     _g1(g1), _cm(cm),
1840     _bitMap(bitMap) {}
1841 
1842   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1843   virtual void do_oop(      oop* p) { do_oop_work(p); }
1844 
1845   template <class T> void do_oop_work(T* p) {
1846     oop thisOop = oopDesc::load_decode_heap_oop(p);
1847     HeapWord* addr = (HeapWord*)thisOop;
1848     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(thisOop)) {
1849       _bitMap->mark(addr);
1850       _cm->mark_stack_push(thisOop);
1851     }
1852   }
1853 };
1854 
1855 class G1CMDrainMarkingStackClosure: public VoidClosure {
1856   CMMarkStack*                  _markStack;
1857   CMBitMap*                     _bitMap;
1858   G1CMKeepAliveClosure*         _oopClosure;
1859  public:
1860   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1861                                G1CMKeepAliveClosure* oopClosure) :
1862     _bitMap(bitMap),
1863     _markStack(markStack),
1864     _oopClosure(oopClosure)
1865   {}
1866 
1867   void do_void() {
1868     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1869   }
1870 };
1871 
1872 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
1873   ResourceMark rm;
1874   HandleMark   hm;
1875   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
1876   ReferenceProcessor* rp = g1h->ref_processor();
1877 
1878   // Process weak references.
1879   rp->setup_policy(clear_all_soft_refs);
1880   assert(_markStack.isEmpty(), "mark stack should be empty");
1881 
1882   G1CMIsAliveClosure   g1IsAliveClosure  (g1h);
1883   G1CMKeepAliveClosure g1KeepAliveClosure(g1h, this, nextMarkBitMap());
1884   G1CMDrainMarkingStackClosure
1885     g1DrainMarkingStackClosure(nextMarkBitMap(), &_markStack,
1886                                &g1KeepAliveClosure);
1887 
1888   // XXXYYY  Also: copy the parallel ref processing code from CMS.
1889   rp->process_discovered_references(&g1IsAliveClosure,
1890                                     &g1KeepAliveClosure,
1891                                     &g1DrainMarkingStackClosure,
1892                                     NULL);
1893   assert(_markStack.overflow() || _markStack.isEmpty(),
1894          "mark stack should be empty (unless it overflowed)");
1895   if (_markStack.overflow()) {
1896     set_has_overflown();
1897   }
1898 
1899   rp->enqueue_discovered_references();
1900   rp->verify_no_references_recorded();
1901   assert(!rp->discovery_enabled(), "should have been disabled");
1902 
1903   // Now clean up stale oops in SymbolTable and StringTable
1904   SymbolTable::unlink(&g1IsAliveClosure);
1905   StringTable::unlink(&g1IsAliveClosure);
1906 }
1907 
1908 void ConcurrentMark::swapMarkBitMaps() {
1909   CMBitMapRO* temp = _prevMarkBitMap;
1910   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
1911   _nextMarkBitMap  = (CMBitMap*)  temp;
1912 }
1913 
1914 class CMRemarkTask: public AbstractGangTask {
1915 private:
1916   ConcurrentMark *_cm;
1917 
1918 public:
1919   void work(int worker_i) {
1920     // Since all available tasks are actually started, we should
1921     // only proceed if we're supposed to be actived.
1922     if ((size_t)worker_i < _cm->active_tasks()) {
1923       CMTask* task = _cm->task(worker_i);
1924       task->record_start_time();
1925       do {
1926         task->do_marking_step(1000000000.0 /* something very large */);
1927       } while (task->has_aborted() && !_cm->has_overflown());
1928       // If we overflow, then we do not want to restart. We instead
1929       // want to abort remark and do concurrent marking again.
1930       task->record_end_time();
1931     }
1932   }
1933 
1934   CMRemarkTask(ConcurrentMark* cm) :
1935     AbstractGangTask("Par Remark"), _cm(cm) { }
1936 };
1937 
1938 void ConcurrentMark::checkpointRootsFinalWork() {
1939   ResourceMark rm;
1940   HandleMark   hm;
1941   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1942 
1943   g1h->ensure_parsability(false);
1944 
1945   if (G1CollectedHeap::use_parallel_gc_threads()) {
1946     G1CollectedHeap::StrongRootsScope srs(g1h);
1947     // this is remark, so we'll use up all available threads
1948     int active_workers = ParallelGCThreads;
1949     set_phase(active_workers, false);
1950 
1951     CMRemarkTask remarkTask(this);
1952     // We will start all available threads, even if we decide that the
1953     // active_workers will be fewer. The extra ones will just bail out
1954     // immediately.
1955     int n_workers = g1h->workers()->total_workers();
1956     g1h->set_par_threads(n_workers);
1957     g1h->workers()->run_task(&remarkTask);
1958     g1h->set_par_threads(0);
1959   } else {
1960     G1CollectedHeap::StrongRootsScope srs(g1h);
1961     // this is remark, so we'll use up all available threads
1962     int active_workers = 1;
1963     set_phase(active_workers, false);
1964 
1965     CMRemarkTask remarkTask(this);
1966     // We will start all available threads, even if we decide that the
1967     // active_workers will be fewer. The extra ones will just bail out
1968     // immediately.
1969     remarkTask.work(0);
1970   }
1971   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1972   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
1973 
1974   print_stats();
1975 
1976   if (!restart_for_overflow())
1977     set_non_marking_state();
1978 
1979 #if VERIFY_OBJS_PROCESSED
1980   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
1981     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
1982                            _scan_obj_cl.objs_processed,
1983                            ThreadLocalObjQueue::objs_enqueued);
1984     guarantee(_scan_obj_cl.objs_processed ==
1985               ThreadLocalObjQueue::objs_enqueued,
1986               "Different number of objs processed and enqueued.");
1987   }
1988 #endif
1989 }
1990 
1991 #ifndef PRODUCT
1992 
1993 class PrintReachableOopClosure: public OopClosure {
1994 private:
1995   G1CollectedHeap* _g1h;
1996   CMBitMapRO*      _bitmap;
1997   outputStream*    _out;
1998   bool             _use_prev_marking;
1999   bool             _all;
2000 
2001 public:
2002   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2003                            outputStream* out,
2004                            bool          use_prev_marking,
2005                            bool          all) :
2006     _g1h(G1CollectedHeap::heap()),
2007     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2008 
2009   void do_oop(narrowOop* p) { do_oop_work(p); }
2010   void do_oop(      oop* p) { do_oop_work(p); }
2011 
2012   template <class T> void do_oop_work(T* p) {
2013     oop         obj = oopDesc::load_decode_heap_oop(p);
2014     const char* str = NULL;
2015     const char* str2 = "";
2016 
2017     if (obj == NULL) {
2018       str = "";
2019     } else if (!_g1h->is_in_g1_reserved(obj)) {
2020       str = " O";
2021     } else {
2022       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2023       guarantee(hr != NULL, "invariant");
2024       bool over_tams = false;
2025       if (_use_prev_marking) {
2026         over_tams = hr->obj_allocated_since_prev_marking(obj);
2027       } else {
2028         over_tams = hr->obj_allocated_since_next_marking(obj);
2029       }
2030       bool marked = _bitmap->isMarked((HeapWord*) obj);
2031 
2032       if (over_tams) {
2033         str = " >";
2034         if (marked) {
2035           str2 = " AND MARKED";
2036         }
2037       } else if (marked) {
2038         str = " M";
2039       } else {
2040         str = " NOT";
2041       }
2042     }
2043 
2044     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2045                    p, (void*) obj, str, str2);
2046   }
2047 };
2048 
2049 class PrintReachableObjectClosure : public ObjectClosure {
2050 private:
2051   CMBitMapRO*   _bitmap;
2052   outputStream* _out;
2053   bool          _use_prev_marking;
2054   bool          _all;
2055   HeapRegion*   _hr;
2056 
2057 public:
2058   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2059                               outputStream* out,
2060                               bool          use_prev_marking,
2061                               bool          all,
2062                               HeapRegion*   hr) :
2063     _bitmap(bitmap), _out(out),
2064     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2065 
2066   void do_object(oop o) {
2067     bool over_tams;
2068     if (_use_prev_marking) {
2069       over_tams = _hr->obj_allocated_since_prev_marking(o);
2070     } else {
2071       over_tams = _hr->obj_allocated_since_next_marking(o);
2072     }
2073     bool marked = _bitmap->isMarked((HeapWord*) o);
2074     bool print_it = _all || over_tams || marked;
2075 
2076     if (print_it) {
2077       _out->print_cr(" "PTR_FORMAT"%s",
2078                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2079       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2080       o->oop_iterate(&oopCl);
2081     }
2082   }
2083 };
2084 
2085 class PrintReachableRegionClosure : public HeapRegionClosure {
2086 private:
2087   CMBitMapRO*   _bitmap;
2088   outputStream* _out;
2089   bool          _use_prev_marking;
2090   bool          _all;
2091 
2092 public:
2093   bool doHeapRegion(HeapRegion* hr) {
2094     HeapWord* b = hr->bottom();
2095     HeapWord* e = hr->end();
2096     HeapWord* t = hr->top();
2097     HeapWord* p = NULL;
2098     if (_use_prev_marking) {
2099       p = hr->prev_top_at_mark_start();
2100     } else {
2101       p = hr->next_top_at_mark_start();
2102     }
2103     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2104                    "TAMS: "PTR_FORMAT, b, e, t, p);
2105     _out->cr();
2106 
2107     HeapWord* from = b;
2108     HeapWord* to   = t;
2109 
2110     if (to > from) {
2111       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2112       _out->cr();
2113       PrintReachableObjectClosure ocl(_bitmap, _out,
2114                                       _use_prev_marking, _all, hr);
2115       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2116       _out->cr();
2117     }
2118 
2119     return false;
2120   }
2121 
2122   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2123                               outputStream* out,
2124                               bool          use_prev_marking,
2125                               bool          all) :
2126     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2127 };
2128 
2129 void ConcurrentMark::print_reachable(const char* str,
2130                                      bool use_prev_marking,
2131                                      bool all) {
2132   gclog_or_tty->cr();
2133   gclog_or_tty->print_cr("== Doing heap dump... ");
2134 
2135   if (G1PrintReachableBaseFile == NULL) {
2136     gclog_or_tty->print_cr("  #### error: no base file defined");
2137     return;
2138   }
2139 
2140   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2141       (JVM_MAXPATHLEN - 1)) {
2142     gclog_or_tty->print_cr("  #### error: file name too long");
2143     return;
2144   }
2145 
2146   char file_name[JVM_MAXPATHLEN];
2147   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2148   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2149 
2150   fileStream fout(file_name);
2151   if (!fout.is_open()) {
2152     gclog_or_tty->print_cr("  #### error: could not open file");
2153     return;
2154   }
2155 
2156   outputStream* out = &fout;
2157 
2158   CMBitMapRO* bitmap = NULL;
2159   if (use_prev_marking) {
2160     bitmap = _prevMarkBitMap;
2161   } else {
2162     bitmap = _nextMarkBitMap;
2163   }
2164 
2165   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2166   out->cr();
2167 
2168   out->print_cr("--- ITERATING OVER REGIONS");
2169   out->cr();
2170   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2171   _g1h->heap_region_iterate(&rcl);
2172   out->cr();
2173 
2174   gclog_or_tty->print_cr("  done");
2175   gclog_or_tty->flush();
2176 }
2177 
2178 #endif // PRODUCT
2179 
2180 // This note is for drainAllSATBBuffers and the code in between.
2181 // In the future we could reuse a task to do this work during an
2182 // evacuation pause (since now tasks are not active and can be claimed
2183 // during an evacuation pause). This was a late change to the code and
2184 // is currently not being taken advantage of.
2185 
2186 class CMGlobalObjectClosure : public ObjectClosure {
2187 private:
2188   ConcurrentMark* _cm;
2189 
2190 public:
2191   void do_object(oop obj) {
2192     _cm->deal_with_reference(obj);
2193   }
2194 
2195   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2196 };
2197 
2198 void ConcurrentMark::deal_with_reference(oop obj) {
2199   if (verbose_high())
2200     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2201                            (void*) obj);
2202 
2203 
2204   HeapWord* objAddr = (HeapWord*) obj;
2205   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2206   if (_g1h->is_in_g1_reserved(objAddr)) {
2207     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2208     HeapRegion* hr = _g1h->heap_region_containing(obj);
2209     if (_g1h->is_obj_ill(obj, hr)) {
2210       if (verbose_high())
2211         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2212                                "marked", (void*) obj);
2213 
2214       // we need to mark it first
2215       if (_nextMarkBitMap->parMark(objAddr)) {
2216         // No OrderAccess:store_load() is needed. It is implicit in the
2217         // CAS done in parMark(objAddr) above
2218         HeapWord* finger = _finger;
2219         if (objAddr < finger) {
2220           if (verbose_high())
2221             gclog_or_tty->print_cr("[global] below the global finger "
2222                                    "("PTR_FORMAT"), pushing it", finger);
2223           if (!mark_stack_push(obj)) {
2224             if (verbose_low())
2225               gclog_or_tty->print_cr("[global] global stack overflow during "
2226                                      "deal_with_reference");
2227           }
2228         }
2229       }
2230     }
2231   }
2232 }
2233 
2234 void ConcurrentMark::drainAllSATBBuffers() {
2235   CMGlobalObjectClosure oc(this);
2236   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2237   satb_mq_set.set_closure(&oc);
2238 
2239   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2240     if (verbose_medium())
2241       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2242   }
2243 
2244   // no need to check whether we should do this, as this is only
2245   // called during an evacuation pause
2246   satb_mq_set.iterate_closure_all_threads();
2247 
2248   satb_mq_set.set_closure(NULL);
2249   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2250 }
2251 
2252 void ConcurrentMark::markPrev(oop p) {
2253   // Note we are overriding the read-only view of the prev map here, via
2254   // the cast.
2255   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2256 }
2257 
2258 void ConcurrentMark::clear(oop p) {
2259   assert(p != NULL && p->is_oop(), "expected an oop");
2260   HeapWord* addr = (HeapWord*)p;
2261   assert(addr >= _nextMarkBitMap->startWord() ||
2262          addr < _nextMarkBitMap->endWord(), "in a region");
2263 
2264   _nextMarkBitMap->clear(addr);
2265 }
2266 
2267 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2268   // Note we are overriding the read-only view of the prev map here, via
2269   // the cast.
2270   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2271   _nextMarkBitMap->clearRange(mr);
2272 }
2273 
2274 HeapRegion*
2275 ConcurrentMark::claim_region(int task_num) {
2276   // "checkpoint" the finger
2277   HeapWord* finger = _finger;
2278 
2279   // _heap_end will not change underneath our feet; it only changes at
2280   // yield points.
2281   while (finger < _heap_end) {
2282     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2283 
2284     // is the gap between reading the finger and doing the CAS too long?
2285 
2286     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2287     HeapWord*   bottom        = curr_region->bottom();
2288     HeapWord*   end           = curr_region->end();
2289     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2290 
2291     if (verbose_low())
2292       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2293                              "["PTR_FORMAT", "PTR_FORMAT"), "
2294                              "limit = "PTR_FORMAT,
2295                              task_num, curr_region, bottom, end, limit);
2296 
2297     HeapWord* res =
2298       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2299     if (res == finger) {
2300       // we succeeded
2301 
2302       // notice that _finger == end cannot be guaranteed here since,
2303       // someone else might have moved the finger even further
2304       assert(_finger >= end, "the finger should have moved forward");
2305 
2306       if (verbose_low())
2307         gclog_or_tty->print_cr("[%d] we were successful with region = "
2308                                PTR_FORMAT, task_num, curr_region);
2309 
2310       if (limit > bottom) {
2311         if (verbose_low())
2312           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2313                                  "returning it ", task_num, curr_region);
2314         return curr_region;
2315       } else {
2316         assert(limit == bottom,
2317                "the region limit should be at bottom");
2318         if (verbose_low())
2319           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2320                                  "returning NULL", task_num, curr_region);
2321         // we return NULL and the caller should try calling
2322         // claim_region() again.
2323         return NULL;
2324       }
2325     } else {
2326       assert(_finger > finger, "the finger should have moved forward");
2327       if (verbose_low())
2328         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2329                                "global finger = "PTR_FORMAT", "
2330                                "our finger = "PTR_FORMAT,
2331                                task_num, _finger, finger);
2332 
2333       // read it again
2334       finger = _finger;
2335     }
2336   }
2337 
2338   return NULL;
2339 }
2340 
2341 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2342   bool result = false;
2343   for (int i = 0; i < (int)_max_task_num; ++i) {
2344     CMTask* the_task = _tasks[i];
2345     MemRegion mr = the_task->aborted_region();
2346     if (mr.start() != NULL) {
2347       assert(mr.end() != NULL, "invariant");
2348       assert(mr.word_size() > 0, "invariant");
2349       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2350       assert(hr != NULL, "invariant");
2351       if (hr->in_collection_set()) {
2352         // The region points into the collection set
2353         the_task->set_aborted_region(MemRegion());
2354         result = true;
2355       }
2356     }
2357   }
2358   return result;
2359 }
2360 
2361 bool ConcurrentMark::has_aborted_regions() {
2362   for (int i = 0; i < (int)_max_task_num; ++i) {
2363     CMTask* the_task = _tasks[i];
2364     MemRegion mr = the_task->aborted_region();
2365     if (mr.start() != NULL) {
2366       assert(mr.end() != NULL, "invariant");
2367       assert(mr.word_size() > 0, "invariant");
2368       return true;
2369     }
2370   }
2371   return false;
2372 }
2373 
2374 void ConcurrentMark::oops_do(OopClosure* cl) {
2375   if (_markStack.size() > 0 && verbose_low())
2376     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2377                            "size = %d", _markStack.size());
2378   // we first iterate over the contents of the mark stack...
2379   _markStack.oops_do(cl);
2380 
2381   for (int i = 0; i < (int)_max_task_num; ++i) {
2382     OopTaskQueue* queue = _task_queues->queue((int)i);
2383 
2384     if (queue->size() > 0 && verbose_low())
2385       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2386                              "size = %d", i, queue->size());
2387 
2388     // ...then over the contents of the all the task queues.
2389     queue->oops_do(cl);
2390   }
2391 
2392   // Invalidate any entries, that are in the region stack, that
2393   // point into the collection set
2394   if (_regionStack.invalidate_entries_into_cset()) {
2395     // otherwise, any gray objects copied during the evacuation pause
2396     // might not be visited.
2397     assert(_should_gray_objects, "invariant");
2398   }
2399 
2400   // Invalidate any aborted regions, recorded in the individual CM
2401   // tasks, that point into the collection set.
2402   if (invalidate_aborted_regions_in_cset()) {
2403     // otherwise, any gray objects copied during the evacuation pause
2404     // might not be visited.
2405     assert(_should_gray_objects, "invariant");
2406   }
2407 
2408 }
2409 
2410 void ConcurrentMark::clear_marking_state() {
2411   _markStack.setEmpty();
2412   _markStack.clear_overflow();
2413   _regionStack.setEmpty();
2414   _regionStack.clear_overflow();
2415   clear_has_overflown();
2416   _finger = _heap_start;
2417 
2418   for (int i = 0; i < (int)_max_task_num; ++i) {
2419     OopTaskQueue* queue = _task_queues->queue(i);
2420     queue->set_empty();
2421   }
2422 }
2423 
2424 void ConcurrentMark::print_stats() {
2425   if (verbose_stats()) {
2426     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2427     for (size_t i = 0; i < _active_tasks; ++i) {
2428       _tasks[i]->print_stats();
2429       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2430     }
2431   }
2432 }
2433 
2434 class CSMarkOopClosure: public OopClosure {
2435   friend class CSMarkBitMapClosure;
2436 
2437   G1CollectedHeap* _g1h;
2438   CMBitMap*        _bm;
2439   ConcurrentMark*  _cm;
2440   oop*             _ms;
2441   jint*            _array_ind_stack;
2442   int              _ms_size;
2443   int              _ms_ind;
2444   int              _array_increment;
2445 
2446   bool push(oop obj, int arr_ind = 0) {
2447     if (_ms_ind == _ms_size) {
2448       gclog_or_tty->print_cr("Mark stack is full.");
2449       return false;
2450     }
2451     _ms[_ms_ind] = obj;
2452     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2453     _ms_ind++;
2454     return true;
2455   }
2456 
2457   oop pop() {
2458     if (_ms_ind == 0) return NULL;
2459     else {
2460       _ms_ind--;
2461       return _ms[_ms_ind];
2462     }
2463   }
2464 
2465   template <class T> bool drain() {
2466     while (_ms_ind > 0) {
2467       oop obj = pop();
2468       assert(obj != NULL, "Since index was non-zero.");
2469       if (obj->is_objArray()) {
2470         jint arr_ind = _array_ind_stack[_ms_ind];
2471         objArrayOop aobj = objArrayOop(obj);
2472         jint len = aobj->length();
2473         jint next_arr_ind = arr_ind + _array_increment;
2474         if (next_arr_ind < len) {
2475           push(obj, next_arr_ind);
2476         }
2477         // Now process this portion of this one.
2478         int lim = MIN2(next_arr_ind, len);
2479         for (int j = arr_ind; j < lim; j++) {
2480           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2481         }
2482 
2483       } else {
2484         obj->oop_iterate(this);
2485       }
2486       if (abort()) return false;
2487     }
2488     return true;
2489   }
2490 
2491 public:
2492   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2493     _g1h(G1CollectedHeap::heap()),
2494     _cm(cm),
2495     _bm(cm->nextMarkBitMap()),
2496     _ms_size(ms_size), _ms_ind(0),
2497     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2498     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2499     _array_increment(MAX2(ms_size/8, 16))
2500   {}
2501 
2502   ~CSMarkOopClosure() {
2503     FREE_C_HEAP_ARRAY(oop, _ms);
2504     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2505   }
2506 
2507   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2508   virtual void do_oop(      oop* p) { do_oop_work(p); }
2509 
2510   template <class T> void do_oop_work(T* p) {
2511     T heap_oop = oopDesc::load_heap_oop(p);
2512     if (oopDesc::is_null(heap_oop)) return;
2513     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2514     if (obj->is_forwarded()) {
2515       // If the object has already been forwarded, we have to make sure
2516       // that it's marked.  So follow the forwarding pointer.  Note that
2517       // this does the right thing for self-forwarding pointers in the
2518       // evacuation failure case.
2519       obj = obj->forwardee();
2520     }
2521     HeapRegion* hr = _g1h->heap_region_containing(obj);
2522     if (hr != NULL) {
2523       if (hr->in_collection_set()) {
2524         if (_g1h->is_obj_ill(obj)) {
2525           _bm->mark((HeapWord*)obj);
2526           if (!push(obj)) {
2527             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2528             set_abort();
2529           }
2530         }
2531       } else {
2532         // Outside the collection set; we need to gray it
2533         _cm->deal_with_reference(obj);
2534       }
2535     }
2536   }
2537 };
2538 
2539 class CSMarkBitMapClosure: public BitMapClosure {
2540   G1CollectedHeap* _g1h;
2541   CMBitMap*        _bitMap;
2542   ConcurrentMark*  _cm;
2543   CSMarkOopClosure _oop_cl;
2544 public:
2545   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2546     _g1h(G1CollectedHeap::heap()),
2547     _bitMap(cm->nextMarkBitMap()),
2548     _oop_cl(cm, ms_size)
2549   {}
2550 
2551   ~CSMarkBitMapClosure() {}
2552 
2553   bool do_bit(size_t offset) {
2554     // convert offset into a HeapWord*
2555     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2556     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2557            "address out of range");
2558     assert(_bitMap->isMarked(addr), "tautology");
2559     oop obj = oop(addr);
2560     if (!obj->is_forwarded()) {
2561       if (!_oop_cl.push(obj)) return false;
2562       if (UseCompressedOops) {
2563         if (!_oop_cl.drain<narrowOop>()) return false;
2564       } else {
2565         if (!_oop_cl.drain<oop>()) return false;
2566       }
2567     }
2568     // Otherwise...
2569     return true;
2570   }
2571 };
2572 
2573 
2574 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2575   CMBitMap* _bm;
2576   CSMarkBitMapClosure _bit_cl;
2577   enum SomePrivateConstants {
2578     MSSize = 1000
2579   };
2580   bool _completed;
2581 public:
2582   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2583     _bm(cm->nextMarkBitMap()),
2584     _bit_cl(cm, MSSize),
2585     _completed(true)
2586   {}
2587 
2588   ~CompleteMarkingInCSHRClosure() {}
2589 
2590   bool doHeapRegion(HeapRegion* r) {
2591     if (!r->evacuation_failed()) {
2592       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2593       if (!mr.is_empty()) {
2594         if (!_bm->iterate(&_bit_cl, mr)) {
2595           _completed = false;
2596           return true;
2597         }
2598       }
2599     }
2600     return false;
2601   }
2602 
2603   bool completed() { return _completed; }
2604 };
2605 
2606 class ClearMarksInHRClosure: public HeapRegionClosure {
2607   CMBitMap* _bm;
2608 public:
2609   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2610 
2611   bool doHeapRegion(HeapRegion* r) {
2612     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2613       MemRegion usedMR = r->used_region();
2614       _bm->clearRange(r->used_region());
2615     }
2616     return false;
2617   }
2618 };
2619 
2620 void ConcurrentMark::complete_marking_in_collection_set() {
2621   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2622 
2623   if (!g1h->mark_in_progress()) {
2624     g1h->g1_policy()->record_mark_closure_time(0.0);
2625     return;
2626   }
2627 
2628   int i = 1;
2629   double start = os::elapsedTime();
2630   while (true) {
2631     i++;
2632     CompleteMarkingInCSHRClosure cmplt(this);
2633     g1h->collection_set_iterate(&cmplt);
2634     if (cmplt.completed()) break;
2635   }
2636   double end_time = os::elapsedTime();
2637   double elapsed_time_ms = (end_time - start) * 1000.0;
2638   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2639 
2640   ClearMarksInHRClosure clr(nextMarkBitMap());
2641   g1h->collection_set_iterate(&clr);
2642 }
2643 
2644 // The next two methods deal with the following optimisation. Some
2645 // objects are gray by being marked and located above the finger. If
2646 // they are copied, during an evacuation pause, below the finger then
2647 // the need to be pushed on the stack. The observation is that, if
2648 // there are no regions in the collection set located above the
2649 // finger, then the above cannot happen, hence we do not need to
2650 // explicitly gray any objects when copying them to below the
2651 // finger. The global stack will be scanned to ensure that, if it
2652 // points to objects being copied, it will update their
2653 // location. There is a tricky situation with the gray objects in
2654 // region stack that are being coped, however. See the comment in
2655 // newCSet().
2656 
2657 void ConcurrentMark::newCSet() {
2658   if (!concurrent_marking_in_progress())
2659     // nothing to do if marking is not in progress
2660     return;
2661 
2662   // find what the lowest finger is among the global and local fingers
2663   _min_finger = _finger;
2664   for (int i = 0; i < (int)_max_task_num; ++i) {
2665     CMTask* task = _tasks[i];
2666     HeapWord* task_finger = task->finger();
2667     if (task_finger != NULL && task_finger < _min_finger)
2668       _min_finger = task_finger;
2669   }
2670 
2671   _should_gray_objects = false;
2672 
2673   // This fixes a very subtle and fustrating bug. It might be the case
2674   // that, during en evacuation pause, heap regions that contain
2675   // objects that are gray (by being in regions contained in the
2676   // region stack) are included in the collection set. Since such gray
2677   // objects will be moved, and because it's not easy to redirect
2678   // region stack entries to point to a new location (because objects
2679   // in one region might be scattered to multiple regions after they
2680   // are copied), one option is to ensure that all marked objects
2681   // copied during a pause are pushed on the stack. Notice, however,
2682   // that this problem can only happen when the region stack is not
2683   // empty during an evacuation pause. So, we make the fix a bit less
2684   // conservative and ensure that regions are pushed on the stack,
2685   // irrespective whether all collection set regions are below the
2686   // finger, if the region stack is not empty. This is expected to be
2687   // a rare case, so I don't think it's necessary to be smarted about it.
2688   if (!region_stack_empty() || has_aborted_regions())
2689     _should_gray_objects = true;
2690 }
2691 
2692 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2693   if (!concurrent_marking_in_progress())
2694     return;
2695 
2696   HeapWord* region_end = hr->end();
2697   if (region_end > _min_finger)
2698     _should_gray_objects = true;
2699 }
2700 
2701 // abandon current marking iteration due to a Full GC
2702 void ConcurrentMark::abort() {
2703   // Clear all marks to force marking thread to do nothing
2704   _nextMarkBitMap->clearAll();
2705   // Empty mark stack
2706   clear_marking_state();
2707   for (int i = 0; i < (int)_max_task_num; ++i) {
2708     _tasks[i]->clear_region_fields();
2709     _tasks[i]->clear_aborted_region();
2710   }
2711   _has_aborted = true;
2712 
2713   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2714   satb_mq_set.abandon_partial_marking();
2715   // This can be called either during or outside marking, we'll read
2716   // the expected_active value from the SATB queue set.
2717   satb_mq_set.set_active_all_threads(
2718                                  false, /* new active value */
2719                                  satb_mq_set.is_active() /* expected_active */);
2720 }
2721 
2722 static void print_ms_time_info(const char* prefix, const char* name,
2723                                NumberSeq& ns) {
2724   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
2725                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
2726   if (ns.num() > 0) {
2727     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
2728                            prefix, ns.sd(), ns.maximum());
2729   }
2730 }
2731 
2732 void ConcurrentMark::print_summary_info() {
2733   gclog_or_tty->print_cr(" Concurrent marking:");
2734   print_ms_time_info("  ", "init marks", _init_times);
2735   print_ms_time_info("  ", "remarks", _remark_times);
2736   {
2737     print_ms_time_info("     ", "final marks", _remark_mark_times);
2738     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
2739 
2740   }
2741   print_ms_time_info("  ", "cleanups", _cleanup_times);
2742   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
2743                          _total_counting_time,
2744                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
2745                           (double)_cleanup_times.num()
2746                          : 0.0));
2747   if (G1ScrubRemSets) {
2748     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
2749                            _total_rs_scrub_time,
2750                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
2751                             (double)_cleanup_times.num()
2752                            : 0.0));
2753   }
2754   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
2755                          (_init_times.sum() + _remark_times.sum() +
2756                           _cleanup_times.sum())/1000.0);
2757   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
2758                 "(%8.2f s marking, %8.2f s counting).",
2759                 cmThread()->vtime_accum(),
2760                 cmThread()->vtime_mark_accum(),
2761                 cmThread()->vtime_count_accum());
2762 }
2763 
2764 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
2765   _parallel_workers->print_worker_threads_on(st);
2766 }
2767 
2768 // Closures
2769 // XXX: there seems to be a lot of code  duplication here;
2770 // should refactor and consolidate the shared code.
2771 
2772 // This closure is used to mark refs into the CMS generation in
2773 // the CMS bit map. Called at the first checkpoint.
2774 
2775 // We take a break if someone is trying to stop the world.
2776 bool ConcurrentMark::do_yield_check(int worker_i) {
2777   if (should_yield()) {
2778     if (worker_i == 0)
2779       _g1h->g1_policy()->record_concurrent_pause();
2780     cmThread()->yield();
2781     if (worker_i == 0)
2782       _g1h->g1_policy()->record_concurrent_pause_end();
2783     return true;
2784   } else {
2785     return false;
2786   }
2787 }
2788 
2789 bool ConcurrentMark::should_yield() {
2790   return cmThread()->should_yield();
2791 }
2792 
2793 bool ConcurrentMark::containing_card_is_marked(void* p) {
2794   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
2795   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
2796 }
2797 
2798 bool ConcurrentMark::containing_cards_are_marked(void* start,
2799                                                  void* last) {
2800   return
2801     containing_card_is_marked(start) &&
2802     containing_card_is_marked(last);
2803 }
2804 
2805 #ifndef PRODUCT
2806 // for debugging purposes
2807 void ConcurrentMark::print_finger() {
2808   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
2809                          _heap_start, _heap_end, _finger);
2810   for (int i = 0; i < (int) _max_task_num; ++i) {
2811     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
2812   }
2813   gclog_or_tty->print_cr("");
2814 }
2815 #endif
2816 
2817 // Closure for iteration over bitmaps
2818 class CMBitMapClosure : public BitMapClosure {
2819 private:
2820   // the bitmap that is being iterated over
2821   CMBitMap*                   _nextMarkBitMap;
2822   ConcurrentMark*             _cm;
2823   CMTask*                     _task;
2824   // true if we're scanning a heap region claimed by the task (so that
2825   // we move the finger along), false if we're not, i.e. currently when
2826   // scanning a heap region popped from the region stack (so that we
2827   // do not move the task finger along; it'd be a mistake if we did so).
2828   bool                        _scanning_heap_region;
2829 
2830 public:
2831   CMBitMapClosure(CMTask *task,
2832                   ConcurrentMark* cm,
2833                   CMBitMap* nextMarkBitMap)
2834     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
2835 
2836   void set_scanning_heap_region(bool scanning_heap_region) {
2837     _scanning_heap_region = scanning_heap_region;
2838   }
2839 
2840   bool do_bit(size_t offset) {
2841     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
2842     assert(_nextMarkBitMap->isMarked(addr), "invariant");
2843     assert( addr < _cm->finger(), "invariant");
2844 
2845     if (_scanning_heap_region) {
2846       statsOnly( _task->increase_objs_found_on_bitmap() );
2847       assert(addr >= _task->finger(), "invariant");
2848       // We move that task's local finger along.
2849       _task->move_finger_to(addr);
2850     } else {
2851       // We move the task's region finger along.
2852       _task->move_region_finger_to(addr);
2853     }
2854 
2855     _task->scan_object(oop(addr));
2856     // we only partially drain the local queue and global stack
2857     _task->drain_local_queue(true);
2858     _task->drain_global_stack(true);
2859 
2860     // if the has_aborted flag has been raised, we need to bail out of
2861     // the iteration
2862     return !_task->has_aborted();
2863   }
2864 };
2865 
2866 // Closure for iterating over objects, currently only used for
2867 // processing SATB buffers.
2868 class CMObjectClosure : public ObjectClosure {
2869 private:
2870   CMTask* _task;
2871 
2872 public:
2873   void do_object(oop obj) {
2874     _task->deal_with_reference(obj);
2875   }
2876 
2877   CMObjectClosure(CMTask* task) : _task(task) { }
2878 };
2879 
2880 // Closure for iterating over object fields
2881 class CMOopClosure : public OopClosure {
2882 private:
2883   G1CollectedHeap*   _g1h;
2884   ConcurrentMark*    _cm;
2885   CMTask*            _task;
2886 
2887 public:
2888   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2889   virtual void do_oop(      oop* p) { do_oop_work(p); }
2890 
2891   template <class T> void do_oop_work(T* p) {
2892     assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
2893     assert(!_g1h->heap_region_containing((HeapWord*) p)->is_on_free_list(),
2894            "invariant");
2895 
2896     oop obj = oopDesc::load_decode_heap_oop(p);
2897     if (_cm->verbose_high())
2898       gclog_or_tty->print_cr("[%d] we're looking at location "
2899                              "*"PTR_FORMAT" = "PTR_FORMAT,
2900                              _task->task_id(), p, (void*) obj);
2901     _task->deal_with_reference(obj);
2902   }
2903 
2904   CMOopClosure(G1CollectedHeap* g1h,
2905                ConcurrentMark* cm,
2906                CMTask* task)
2907     : _g1h(g1h), _cm(cm), _task(task) { }
2908 };
2909 
2910 void CMTask::setup_for_region(HeapRegion* hr) {
2911   // Separated the asserts so that we know which one fires.
2912   assert(hr != NULL,
2913         "claim_region() should have filtered out continues humongous regions");
2914   assert(!hr->continuesHumongous(),
2915         "claim_region() should have filtered out continues humongous regions");
2916 
2917   if (_cm->verbose_low())
2918     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
2919                            _task_id, hr);
2920 
2921   _curr_region  = hr;
2922   _finger       = hr->bottom();
2923   update_region_limit();
2924 }
2925 
2926 void CMTask::update_region_limit() {
2927   HeapRegion* hr            = _curr_region;
2928   HeapWord* bottom          = hr->bottom();
2929   HeapWord* limit           = hr->next_top_at_mark_start();
2930 
2931   if (limit == bottom) {
2932     if (_cm->verbose_low())
2933       gclog_or_tty->print_cr("[%d] found an empty region "
2934                              "["PTR_FORMAT", "PTR_FORMAT")",
2935                              _task_id, bottom, limit);
2936     // The region was collected underneath our feet.
2937     // We set the finger to bottom to ensure that the bitmap
2938     // iteration that will follow this will not do anything.
2939     // (this is not a condition that holds when we set the region up,
2940     // as the region is not supposed to be empty in the first place)
2941     _finger = bottom;
2942   } else if (limit >= _region_limit) {
2943     assert(limit >= _finger, "peace of mind");
2944   } else {
2945     assert(limit < _region_limit, "only way to get here");
2946     // This can happen under some pretty unusual circumstances.  An
2947     // evacuation pause empties the region underneath our feet (NTAMS
2948     // at bottom). We then do some allocation in the region (NTAMS
2949     // stays at bottom), followed by the region being used as a GC
2950     // alloc region (NTAMS will move to top() and the objects
2951     // originally below it will be grayed). All objects now marked in
2952     // the region are explicitly grayed, if below the global finger,
2953     // and we do not need in fact to scan anything else. So, we simply
2954     // set _finger to be limit to ensure that the bitmap iteration
2955     // doesn't do anything.
2956     _finger = limit;
2957   }
2958 
2959   _region_limit = limit;
2960 }
2961 
2962 void CMTask::giveup_current_region() {
2963   assert(_curr_region != NULL, "invariant");
2964   if (_cm->verbose_low())
2965     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
2966                            _task_id, _curr_region);
2967   clear_region_fields();
2968 }
2969 
2970 void CMTask::clear_region_fields() {
2971   // Values for these three fields that indicate that we're not
2972   // holding on to a region.
2973   _curr_region   = NULL;
2974   _finger        = NULL;
2975   _region_limit  = NULL;
2976 
2977   _region_finger = NULL;
2978 }
2979 
2980 void CMTask::reset(CMBitMap* nextMarkBitMap) {
2981   guarantee(nextMarkBitMap != NULL, "invariant");
2982 
2983   if (_cm->verbose_low())
2984     gclog_or_tty->print_cr("[%d] resetting", _task_id);
2985 
2986   _nextMarkBitMap                = nextMarkBitMap;
2987   clear_region_fields();
2988   clear_aborted_region();
2989 
2990   _calls                         = 0;
2991   _elapsed_time_ms               = 0.0;
2992   _termination_time_ms           = 0.0;
2993   _termination_start_time_ms     = 0.0;
2994 
2995 #if _MARKING_STATS_
2996   _local_pushes                  = 0;
2997   _local_pops                    = 0;
2998   _local_max_size                = 0;
2999   _objs_scanned                  = 0;
3000   _global_pushes                 = 0;
3001   _global_pops                   = 0;
3002   _global_max_size               = 0;
3003   _global_transfers_to           = 0;
3004   _global_transfers_from         = 0;
3005   _region_stack_pops             = 0;
3006   _regions_claimed               = 0;
3007   _objs_found_on_bitmap          = 0;
3008   _satb_buffers_processed        = 0;
3009   _steal_attempts                = 0;
3010   _steals                        = 0;
3011   _aborted                       = 0;
3012   _aborted_overflow              = 0;
3013   _aborted_cm_aborted            = 0;
3014   _aborted_yield                 = 0;
3015   _aborted_timed_out             = 0;
3016   _aborted_satb                  = 0;
3017   _aborted_termination           = 0;
3018 #endif // _MARKING_STATS_
3019 }
3020 
3021 bool CMTask::should_exit_termination() {
3022   regular_clock_call();
3023   // This is called when we are in the termination protocol. We should
3024   // quit if, for some reason, this task wants to abort or the global
3025   // stack is not empty (this means that we can get work from it).
3026   return !_cm->mark_stack_empty() || has_aborted();
3027 }
3028 
3029 // This determines whether the method below will check both the local
3030 // and global fingers when determining whether to push on the stack a
3031 // gray object (value 1) or whether it will only check the global one
3032 // (value 0). The tradeoffs are that the former will be a bit more
3033 // accurate and possibly push less on the stack, but it might also be
3034 // a little bit slower.
3035 
3036 #define _CHECK_BOTH_FINGERS_      1
3037 
3038 void CMTask::deal_with_reference(oop obj) {
3039   if (_cm->verbose_high())
3040     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3041                            _task_id, (void*) obj);
3042 
3043   ++_refs_reached;
3044 
3045   HeapWord* objAddr = (HeapWord*) obj;
3046   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3047   if (_g1h->is_in_g1_reserved(objAddr)) {
3048     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3049     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3050     if (_g1h->is_obj_ill(obj, hr)) {
3051       if (_cm->verbose_high())
3052         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3053                                _task_id, (void*) obj);
3054 
3055       // we need to mark it first
3056       if (_nextMarkBitMap->parMark(objAddr)) {
3057         // No OrderAccess:store_load() is needed. It is implicit in the
3058         // CAS done in parMark(objAddr) above
3059         HeapWord* global_finger = _cm->finger();
3060 
3061 #if _CHECK_BOTH_FINGERS_
3062         // we will check both the local and global fingers
3063 
3064         if (_finger != NULL && objAddr < _finger) {
3065           if (_cm->verbose_high())
3066             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3067                                    "pushing it", _task_id, _finger);
3068           push(obj);
3069         } else if (_curr_region != NULL && objAddr < _region_limit) {
3070           // do nothing
3071         } else if (objAddr < global_finger) {
3072           // Notice that the global finger might be moving forward
3073           // concurrently. This is not a problem. In the worst case, we
3074           // mark the object while it is above the global finger and, by
3075           // the time we read the global finger, it has moved forward
3076           // passed this object. In this case, the object will probably
3077           // be visited when a task is scanning the region and will also
3078           // be pushed on the stack. So, some duplicate work, but no
3079           // correctness problems.
3080 
3081           if (_cm->verbose_high())
3082             gclog_or_tty->print_cr("[%d] below the global finger "
3083                                    "("PTR_FORMAT"), pushing it",
3084                                    _task_id, global_finger);
3085           push(obj);
3086         } else {
3087           // do nothing
3088         }
3089 #else // _CHECK_BOTH_FINGERS_
3090       // we will only check the global finger
3091 
3092         if (objAddr < global_finger) {
3093           // see long comment above
3094 
3095           if (_cm->verbose_high())
3096             gclog_or_tty->print_cr("[%d] below the global finger "
3097                                    "("PTR_FORMAT"), pushing it",
3098                                    _task_id, global_finger);
3099           push(obj);
3100         }
3101 #endif // _CHECK_BOTH_FINGERS_
3102       }
3103     }
3104   }
3105 }
3106 
3107 void CMTask::push(oop obj) {
3108   HeapWord* objAddr = (HeapWord*) obj;
3109   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3110   assert(!_g1h->heap_region_containing(objAddr)->is_on_free_list(),
3111          "invariant");
3112   assert(!_g1h->is_obj_ill(obj), "invariant");
3113   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3114 
3115   if (_cm->verbose_high())
3116     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3117 
3118   if (!_task_queue->push(obj)) {
3119     // The local task queue looks full. We need to push some entries
3120     // to the global stack.
3121 
3122     if (_cm->verbose_medium())
3123       gclog_or_tty->print_cr("[%d] task queue overflow, "
3124                              "moving entries to the global stack",
3125                              _task_id);
3126     move_entries_to_global_stack();
3127 
3128     // this should succeed since, even if we overflow the global
3129     // stack, we should have definitely removed some entries from the
3130     // local queue. So, there must be space on it.
3131     bool success = _task_queue->push(obj);
3132     assert(success, "invariant");
3133   }
3134 
3135   statsOnly( int tmp_size = _task_queue->size();
3136              if (tmp_size > _local_max_size)
3137                _local_max_size = tmp_size;
3138              ++_local_pushes );
3139 }
3140 
3141 void CMTask::reached_limit() {
3142   assert(_words_scanned >= _words_scanned_limit ||
3143          _refs_reached >= _refs_reached_limit ,
3144          "shouldn't have been called otherwise");
3145   regular_clock_call();
3146 }
3147 
3148 void CMTask::regular_clock_call() {
3149   if (has_aborted())
3150     return;
3151 
3152   // First, we need to recalculate the words scanned and refs reached
3153   // limits for the next clock call.
3154   recalculate_limits();
3155 
3156   // During the regular clock call we do the following
3157 
3158   // (1) If an overflow has been flagged, then we abort.
3159   if (_cm->has_overflown()) {
3160     set_has_aborted();
3161     return;
3162   }
3163 
3164   // If we are not concurrent (i.e. we're doing remark) we don't need
3165   // to check anything else. The other steps are only needed during
3166   // the concurrent marking phase.
3167   if (!concurrent())
3168     return;
3169 
3170   // (2) If marking has been aborted for Full GC, then we also abort.
3171   if (_cm->has_aborted()) {
3172     set_has_aborted();
3173     statsOnly( ++_aborted_cm_aborted );
3174     return;
3175   }
3176 
3177   double curr_time_ms = os::elapsedVTime() * 1000.0;
3178 
3179   // (3) If marking stats are enabled, then we update the step history.
3180 #if _MARKING_STATS_
3181   if (_words_scanned >= _words_scanned_limit)
3182     ++_clock_due_to_scanning;
3183   if (_refs_reached >= _refs_reached_limit)
3184     ++_clock_due_to_marking;
3185 
3186   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3187   _interval_start_time_ms = curr_time_ms;
3188   _all_clock_intervals_ms.add(last_interval_ms);
3189 
3190   if (_cm->verbose_medium()) {
3191     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3192                            "scanned = %d%s, refs reached = %d%s",
3193                            _task_id, last_interval_ms,
3194                            _words_scanned,
3195                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3196                            _refs_reached,
3197                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3198   }
3199 #endif // _MARKING_STATS_
3200 
3201   // (4) We check whether we should yield. If we have to, then we abort.
3202   if (_cm->should_yield()) {
3203     // We should yield. To do this we abort the task. The caller is
3204     // responsible for yielding.
3205     set_has_aborted();
3206     statsOnly( ++_aborted_yield );
3207     return;
3208   }
3209 
3210   // (5) We check whether we've reached our time quota. If we have,
3211   // then we abort.
3212   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3213   if (elapsed_time_ms > _time_target_ms) {
3214     set_has_aborted();
3215     _has_aborted_timed_out = true;
3216     statsOnly( ++_aborted_timed_out );
3217     return;
3218   }
3219 
3220   // (6) Finally, we check whether there are enough completed STAB
3221   // buffers available for processing. If there are, we abort.
3222   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3223   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3224     if (_cm->verbose_low())
3225       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3226                              _task_id);
3227     // we do need to process SATB buffers, we'll abort and restart
3228     // the marking task to do so
3229     set_has_aborted();
3230     statsOnly( ++_aborted_satb );
3231     return;
3232   }
3233 }
3234 
3235 void CMTask::recalculate_limits() {
3236   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3237   _words_scanned_limit      = _real_words_scanned_limit;
3238 
3239   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3240   _refs_reached_limit       = _real_refs_reached_limit;
3241 }
3242 
3243 void CMTask::decrease_limits() {
3244   // This is called when we believe that we're going to do an infrequent
3245   // operation which will increase the per byte scanned cost (i.e. move
3246   // entries to/from the global stack). It basically tries to decrease the
3247   // scanning limit so that the clock is called earlier.
3248 
3249   if (_cm->verbose_medium())
3250     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3251 
3252   _words_scanned_limit = _real_words_scanned_limit -
3253     3 * words_scanned_period / 4;
3254   _refs_reached_limit  = _real_refs_reached_limit -
3255     3 * refs_reached_period / 4;
3256 }
3257 
3258 void CMTask::move_entries_to_global_stack() {
3259   // local array where we'll store the entries that will be popped
3260   // from the local queue
3261   oop buffer[global_stack_transfer_size];
3262 
3263   int n = 0;
3264   oop obj;
3265   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3266     buffer[n] = obj;
3267     ++n;
3268   }
3269 
3270   if (n > 0) {
3271     // we popped at least one entry from the local queue
3272 
3273     statsOnly( ++_global_transfers_to; _local_pops += n );
3274 
3275     if (!_cm->mark_stack_push(buffer, n)) {
3276       if (_cm->verbose_low())
3277         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3278       set_has_aborted();
3279     } else {
3280       // the transfer was successful
3281 
3282       if (_cm->verbose_medium())
3283         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3284                                _task_id, n);
3285       statsOnly( int tmp_size = _cm->mark_stack_size();
3286                  if (tmp_size > _global_max_size)
3287                    _global_max_size = tmp_size;
3288                  _global_pushes += n );
3289     }
3290   }
3291 
3292   // this operation was quite expensive, so decrease the limits
3293   decrease_limits();
3294 }
3295 
3296 void CMTask::get_entries_from_global_stack() {
3297   // local array where we'll store the entries that will be popped
3298   // from the global stack.
3299   oop buffer[global_stack_transfer_size];
3300   int n;
3301   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3302   assert(n <= global_stack_transfer_size,
3303          "we should not pop more than the given limit");
3304   if (n > 0) {
3305     // yes, we did actually pop at least one entry
3306 
3307     statsOnly( ++_global_transfers_from; _global_pops += n );
3308     if (_cm->verbose_medium())
3309       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3310                              _task_id, n);
3311     for (int i = 0; i < n; ++i) {
3312       bool success = _task_queue->push(buffer[i]);
3313       // We only call this when the local queue is empty or under a
3314       // given target limit. So, we do not expect this push to fail.
3315       assert(success, "invariant");
3316     }
3317 
3318     statsOnly( int tmp_size = _task_queue->size();
3319                if (tmp_size > _local_max_size)
3320                  _local_max_size = tmp_size;
3321                _local_pushes += n );
3322   }
3323 
3324   // this operation was quite expensive, so decrease the limits
3325   decrease_limits();
3326 }
3327 
3328 void CMTask::drain_local_queue(bool partially) {
3329   if (has_aborted())
3330     return;
3331 
3332   // Decide what the target size is, depending whether we're going to
3333   // drain it partially (so that other tasks can steal if they run out
3334   // of things to do) or totally (at the very end).
3335   size_t target_size;
3336   if (partially)
3337     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3338   else
3339     target_size = 0;
3340 
3341   if (_task_queue->size() > target_size) {
3342     if (_cm->verbose_high())
3343       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3344                              _task_id, target_size);
3345 
3346     oop obj;
3347     bool ret = _task_queue->pop_local(obj);
3348     while (ret) {
3349       statsOnly( ++_local_pops );
3350 
3351       if (_cm->verbose_high())
3352         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3353                                (void*) obj);
3354 
3355       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3356       assert(!_g1h->heap_region_containing(obj)->is_on_free_list(),
3357              "invariant");
3358 
3359       scan_object(obj);
3360 
3361       if (_task_queue->size() <= target_size || has_aborted())
3362         ret = false;
3363       else
3364         ret = _task_queue->pop_local(obj);
3365     }
3366 
3367     if (_cm->verbose_high())
3368       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3369                              _task_id, _task_queue->size());
3370   }
3371 }
3372 
3373 void CMTask::drain_global_stack(bool partially) {
3374   if (has_aborted())
3375     return;
3376 
3377   // We have a policy to drain the local queue before we attempt to
3378   // drain the global stack.
3379   assert(partially || _task_queue->size() == 0, "invariant");
3380 
3381   // Decide what the target size is, depending whether we're going to
3382   // drain it partially (so that other tasks can steal if they run out
3383   // of things to do) or totally (at the very end).  Notice that,
3384   // because we move entries from the global stack in chunks or
3385   // because another task might be doing the same, we might in fact
3386   // drop below the target. But, this is not a problem.
3387   size_t target_size;
3388   if (partially)
3389     target_size = _cm->partial_mark_stack_size_target();
3390   else
3391     target_size = 0;
3392 
3393   if (_cm->mark_stack_size() > target_size) {
3394     if (_cm->verbose_low())
3395       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3396                              _task_id, target_size);
3397 
3398     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3399       get_entries_from_global_stack();
3400       drain_local_queue(partially);
3401     }
3402 
3403     if (_cm->verbose_low())
3404       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3405                              _task_id, _cm->mark_stack_size());
3406   }
3407 }
3408 
3409 // SATB Queue has several assumptions on whether to call the par or
3410 // non-par versions of the methods. this is why some of the code is
3411 // replicated. We should really get rid of the single-threaded version
3412 // of the code to simplify things.
3413 void CMTask::drain_satb_buffers() {
3414   if (has_aborted())
3415     return;
3416 
3417   // We set this so that the regular clock knows that we're in the
3418   // middle of draining buffers and doesn't set the abort flag when it
3419   // notices that SATB buffers are available for draining. It'd be
3420   // very counter productive if it did that. :-)
3421   _draining_satb_buffers = true;
3422 
3423   CMObjectClosure oc(this);
3424   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3425   if (G1CollectedHeap::use_parallel_gc_threads())
3426     satb_mq_set.set_par_closure(_task_id, &oc);
3427   else
3428     satb_mq_set.set_closure(&oc);
3429 
3430   // This keeps claiming and applying the closure to completed buffers
3431   // until we run out of buffers or we need to abort.
3432   if (G1CollectedHeap::use_parallel_gc_threads()) {
3433     while (!has_aborted() &&
3434            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3435       if (_cm->verbose_medium())
3436         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3437       statsOnly( ++_satb_buffers_processed );
3438       regular_clock_call();
3439     }
3440   } else {
3441     while (!has_aborted() &&
3442            satb_mq_set.apply_closure_to_completed_buffer()) {
3443       if (_cm->verbose_medium())
3444         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3445       statsOnly( ++_satb_buffers_processed );
3446       regular_clock_call();
3447     }
3448   }
3449 
3450   if (!concurrent() && !has_aborted()) {
3451     // We should only do this during remark.
3452     if (G1CollectedHeap::use_parallel_gc_threads())
3453       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3454     else
3455       satb_mq_set.iterate_closure_all_threads();
3456   }
3457 
3458   _draining_satb_buffers = false;
3459 
3460   assert(has_aborted() ||
3461          concurrent() ||
3462          satb_mq_set.completed_buffers_num() == 0, "invariant");
3463 
3464   if (G1CollectedHeap::use_parallel_gc_threads())
3465     satb_mq_set.set_par_closure(_task_id, NULL);
3466   else
3467     satb_mq_set.set_closure(NULL);
3468 
3469   // again, this was a potentially expensive operation, decrease the
3470   // limits to get the regular clock call early
3471   decrease_limits();
3472 }
3473 
3474 void CMTask::drain_region_stack(BitMapClosure* bc) {
3475   if (has_aborted())
3476     return;
3477 
3478   assert(_region_finger == NULL,
3479          "it should be NULL when we're not scanning a region");
3480 
3481   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3482     if (_cm->verbose_low())
3483       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3484                              _task_id, _cm->region_stack_size());
3485 
3486     MemRegion mr;
3487 
3488     if (!_aborted_region.is_empty()) {
3489       mr = _aborted_region;
3490       _aborted_region = MemRegion();
3491 
3492       if (_cm->verbose_low())
3493         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3494                              _task_id, mr.start(), mr.end());
3495     } else {
3496       mr = _cm->region_stack_pop_lock_free();
3497       // it returns MemRegion() if the pop fails
3498       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3499     }
3500 
3501     while (mr.start() != NULL) {
3502       if (_cm->verbose_medium())
3503         gclog_or_tty->print_cr("[%d] we are scanning region "
3504                                "["PTR_FORMAT", "PTR_FORMAT")",
3505                                _task_id, mr.start(), mr.end());
3506 
3507       assert(mr.end() <= _cm->finger(),
3508              "otherwise the region shouldn't be on the stack");
3509       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3510       if (_nextMarkBitMap->iterate(bc, mr)) {
3511         assert(!has_aborted(),
3512                "cannot abort the task without aborting the bitmap iteration");
3513 
3514         // We finished iterating over the region without aborting.
3515         regular_clock_call();
3516         if (has_aborted())
3517           mr = MemRegion();
3518         else {
3519           mr = _cm->region_stack_pop_lock_free();
3520           // it returns MemRegion() if the pop fails
3521           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3522         }
3523       } else {
3524         assert(has_aborted(), "currently the only way to do so");
3525 
3526         // The only way to abort the bitmap iteration is to return
3527         // false from the do_bit() method. However, inside the
3528         // do_bit() method we move the _region_finger to point to the
3529         // object currently being looked at. So, if we bail out, we
3530         // have definitely set _region_finger to something non-null.
3531         assert(_region_finger != NULL, "invariant");
3532 
3533         // Make sure that any previously aborted region has been
3534         // cleared.
3535         assert(_aborted_region.is_empty(), "aborted region not cleared");
3536 
3537         // The iteration was actually aborted. So now _region_finger
3538         // points to the address of the object we last scanned. If we
3539         // leave it there, when we restart this task, we will rescan
3540         // the object. It is easy to avoid this. We move the finger by
3541         // enough to point to the next possible object header (the
3542         // bitmap knows by how much we need to move it as it knows its
3543         // granularity).
3544         MemRegion newRegion =
3545           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3546 
3547         if (!newRegion.is_empty()) {
3548           if (_cm->verbose_low()) {
3549             gclog_or_tty->print_cr("[%d] recording unscanned region"
3550                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3551                                    _task_id,
3552                                    newRegion.start(), newRegion.end());
3553           }
3554           // Now record the part of the region we didn't scan to
3555           // make sure this task scans it later.
3556           _aborted_region = newRegion;
3557         }
3558         // break from while
3559         mr = MemRegion();
3560       }
3561       _region_finger = NULL;
3562     }
3563 
3564     if (_cm->verbose_low())
3565       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3566                              _task_id, _cm->region_stack_size());
3567   }
3568 }
3569 
3570 void CMTask::print_stats() {
3571   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3572                          _task_id, _calls);
3573   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3574                          _elapsed_time_ms, _termination_time_ms);
3575   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3576                          _step_times_ms.num(), _step_times_ms.avg(),
3577                          _step_times_ms.sd());
3578   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3579                          _step_times_ms.maximum(), _step_times_ms.sum());
3580 
3581 #if _MARKING_STATS_
3582   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3583                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3584                          _all_clock_intervals_ms.sd());
3585   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3586                          _all_clock_intervals_ms.maximum(),
3587                          _all_clock_intervals_ms.sum());
3588   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3589                          _clock_due_to_scanning, _clock_due_to_marking);
3590   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3591                          _objs_scanned, _objs_found_on_bitmap);
3592   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3593                          _local_pushes, _local_pops, _local_max_size);
3594   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3595                          _global_pushes, _global_pops, _global_max_size);
3596   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3597                          _global_transfers_to,_global_transfers_from);
3598   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3599                          _regions_claimed, _region_stack_pops);
3600   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3601   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3602                          _steal_attempts, _steals);
3603   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3604   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3605                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3606   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3607                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3608 #endif // _MARKING_STATS_
3609 }
3610 
3611 /*****************************************************************************
3612 
3613     The do_marking_step(time_target_ms) method is the building block
3614     of the parallel marking framework. It can be called in parallel
3615     with other invocations of do_marking_step() on different tasks
3616     (but only one per task, obviously) and concurrently with the
3617     mutator threads, or during remark, hence it eliminates the need
3618     for two versions of the code. When called during remark, it will
3619     pick up from where the task left off during the concurrent marking
3620     phase. Interestingly, tasks are also claimable during evacuation
3621     pauses too, since do_marking_step() ensures that it aborts before
3622     it needs to yield.
3623 
3624     The data structures that is uses to do marking work are the
3625     following:
3626 
3627       (1) Marking Bitmap. If there are gray objects that appear only
3628       on the bitmap (this happens either when dealing with an overflow
3629       or when the initial marking phase has simply marked the roots
3630       and didn't push them on the stack), then tasks claim heap
3631       regions whose bitmap they then scan to find gray objects. A
3632       global finger indicates where the end of the last claimed region
3633       is. A local finger indicates how far into the region a task has
3634       scanned. The two fingers are used to determine how to gray an
3635       object (i.e. whether simply marking it is OK, as it will be
3636       visited by a task in the future, or whether it needs to be also
3637       pushed on a stack).
3638 
3639       (2) Local Queue. The local queue of the task which is accessed
3640       reasonably efficiently by the task. Other tasks can steal from
3641       it when they run out of work. Throughout the marking phase, a
3642       task attempts to keep its local queue short but not totally
3643       empty, so that entries are available for stealing by other
3644       tasks. Only when there is no more work, a task will totally
3645       drain its local queue.
3646 
3647       (3) Global Mark Stack. This handles local queue overflow. During
3648       marking only sets of entries are moved between it and the local
3649       queues, as access to it requires a mutex and more fine-grain
3650       interaction with it which might cause contention. If it
3651       overflows, then the marking phase should restart and iterate
3652       over the bitmap to identify gray objects. Throughout the marking
3653       phase, tasks attempt to keep the global mark stack at a small
3654       length but not totally empty, so that entries are available for
3655       popping by other tasks. Only when there is no more work, tasks
3656       will totally drain the global mark stack.
3657 
3658       (4) Global Region Stack. Entries on it correspond to areas of
3659       the bitmap that need to be scanned since they contain gray
3660       objects. Pushes on the region stack only happen during
3661       evacuation pauses and typically correspond to areas covered by
3662       GC LABS. If it overflows, then the marking phase should restart
3663       and iterate over the bitmap to identify gray objects. Tasks will
3664       try to totally drain the region stack as soon as possible.
3665 
3666       (5) SATB Buffer Queue. This is where completed SATB buffers are
3667       made available. Buffers are regularly removed from this queue
3668       and scanned for roots, so that the queue doesn't get too
3669       long. During remark, all completed buffers are processed, as
3670       well as the filled in parts of any uncompleted buffers.
3671 
3672     The do_marking_step() method tries to abort when the time target
3673     has been reached. There are a few other cases when the
3674     do_marking_step() method also aborts:
3675 
3676       (1) When the marking phase has been aborted (after a Full GC).
3677 
3678       (2) When a global overflow (either on the global stack or the
3679       region stack) has been triggered. Before the task aborts, it
3680       will actually sync up with the other tasks to ensure that all
3681       the marking data structures (local queues, stacks, fingers etc.)
3682       are re-initialised so that when do_marking_step() completes,
3683       the marking phase can immediately restart.
3684 
3685       (3) When enough completed SATB buffers are available. The
3686       do_marking_step() method only tries to drain SATB buffers right
3687       at the beginning. So, if enough buffers are available, the
3688       marking step aborts and the SATB buffers are processed at
3689       the beginning of the next invocation.
3690 
3691       (4) To yield. when we have to yield then we abort and yield
3692       right at the end of do_marking_step(). This saves us from a lot
3693       of hassle as, by yielding we might allow a Full GC. If this
3694       happens then objects will be compacted underneath our feet, the
3695       heap might shrink, etc. We save checking for this by just
3696       aborting and doing the yield right at the end.
3697 
3698     From the above it follows that the do_marking_step() method should
3699     be called in a loop (or, otherwise, regularly) until it completes.
3700 
3701     If a marking step completes without its has_aborted() flag being
3702     true, it means it has completed the current marking phase (and
3703     also all other marking tasks have done so and have all synced up).
3704 
3705     A method called regular_clock_call() is invoked "regularly" (in
3706     sub ms intervals) throughout marking. It is this clock method that
3707     checks all the abort conditions which were mentioned above and
3708     decides when the task should abort. A work-based scheme is used to
3709     trigger this clock method: when the number of object words the
3710     marking phase has scanned or the number of references the marking
3711     phase has visited reach a given limit. Additional invocations to
3712     the method clock have been planted in a few other strategic places
3713     too. The initial reason for the clock method was to avoid calling
3714     vtime too regularly, as it is quite expensive. So, once it was in
3715     place, it was natural to piggy-back all the other conditions on it
3716     too and not constantly check them throughout the code.
3717 
3718  *****************************************************************************/
3719 
3720 void CMTask::do_marking_step(double time_target_ms) {
3721   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
3722   assert(concurrent() == _cm->concurrent(), "they should be the same");
3723 
3724   assert(concurrent() || _cm->region_stack_empty(),
3725          "the region stack should have been cleared before remark");
3726   assert(concurrent() || !_cm->has_aborted_regions(),
3727          "aborted regions should have been cleared before remark");
3728   assert(_region_finger == NULL,
3729          "this should be non-null only when a region is being scanned");
3730 
3731   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
3732   assert(_task_queues != NULL, "invariant");
3733   assert(_task_queue != NULL, "invariant");
3734   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
3735 
3736   assert(!_claimed,
3737          "only one thread should claim this task at any one time");
3738 
3739   // OK, this doesn't safeguard again all possible scenarios, as it is
3740   // possible for two threads to set the _claimed flag at the same
3741   // time. But it is only for debugging purposes anyway and it will
3742   // catch most problems.
3743   _claimed = true;
3744 
3745   _start_time_ms = os::elapsedVTime() * 1000.0;
3746   statsOnly( _interval_start_time_ms = _start_time_ms );
3747 
3748   double diff_prediction_ms =
3749     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
3750   _time_target_ms = time_target_ms - diff_prediction_ms;
3751 
3752   // set up the variables that are used in the work-based scheme to
3753   // call the regular clock method
3754   _words_scanned = 0;
3755   _refs_reached  = 0;
3756   recalculate_limits();
3757 
3758   // clear all flags
3759   clear_has_aborted();
3760   _has_aborted_timed_out = false;
3761   _draining_satb_buffers = false;
3762 
3763   ++_calls;
3764 
3765   if (_cm->verbose_low())
3766     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
3767                            "target = %1.2lfms >>>>>>>>>>",
3768                            _task_id, _calls, _time_target_ms);
3769 
3770   // Set up the bitmap and oop closures. Anything that uses them is
3771   // eventually called from this method, so it is OK to allocate these
3772   // statically.
3773   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
3774   CMOopClosure    oop_closure(_g1h, _cm, this);
3775   set_oop_closure(&oop_closure);
3776 
3777   if (_cm->has_overflown()) {
3778     // This can happen if the region stack or the mark stack overflows
3779     // during a GC pause and this task, after a yield point,
3780     // restarts. We have to abort as we need to get into the overflow
3781     // protocol which happens right at the end of this task.
3782     set_has_aborted();
3783   }
3784 
3785   // First drain any available SATB buffers. After this, we will not
3786   // look at SATB buffers before the next invocation of this method.
3787   // If enough completed SATB buffers are queued up, the regular clock
3788   // will abort this task so that it restarts.
3789   drain_satb_buffers();
3790   // ...then partially drain the local queue and the global stack
3791   drain_local_queue(true);
3792   drain_global_stack(true);
3793 
3794   // Then totally drain the region stack.  We will not look at
3795   // it again before the next invocation of this method. Entries on
3796   // the region stack are only added during evacuation pauses, for
3797   // which we have to yield. When we do, we abort the task anyway so
3798   // it will look at the region stack again when it restarts.
3799   bitmap_closure.set_scanning_heap_region(false);
3800   drain_region_stack(&bitmap_closure);
3801   // ...then partially drain the local queue and the global stack
3802   drain_local_queue(true);
3803   drain_global_stack(true);
3804 
3805   do {
3806     if (!has_aborted() && _curr_region != NULL) {
3807       // This means that we're already holding on to a region.
3808       assert(_finger != NULL, "if region is not NULL, then the finger "
3809              "should not be NULL either");
3810 
3811       // We might have restarted this task after an evacuation pause
3812       // which might have evacuated the region we're holding on to
3813       // underneath our feet. Let's read its limit again to make sure
3814       // that we do not iterate over a region of the heap that
3815       // contains garbage (update_region_limit() will also move
3816       // _finger to the start of the region if it is found empty).
3817       update_region_limit();
3818       // We will start from _finger not from the start of the region,
3819       // as we might be restarting this task after aborting half-way
3820       // through scanning this region. In this case, _finger points to
3821       // the address where we last found a marked object. If this is a
3822       // fresh region, _finger points to start().
3823       MemRegion mr = MemRegion(_finger, _region_limit);
3824 
3825       if (_cm->verbose_low())
3826         gclog_or_tty->print_cr("[%d] we're scanning part "
3827                                "["PTR_FORMAT", "PTR_FORMAT") "
3828                                "of region "PTR_FORMAT,
3829                                _task_id, _finger, _region_limit, _curr_region);
3830 
3831       // Let's iterate over the bitmap of the part of the
3832       // region that is left.
3833       bitmap_closure.set_scanning_heap_region(true);
3834       if (mr.is_empty() ||
3835           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
3836         // We successfully completed iterating over the region. Now,
3837         // let's give up the region.
3838         giveup_current_region();
3839         regular_clock_call();
3840       } else {
3841         assert(has_aborted(), "currently the only way to do so");
3842         // The only way to abort the bitmap iteration is to return
3843         // false from the do_bit() method. However, inside the
3844         // do_bit() method we move the _finger to point to the
3845         // object currently being looked at. So, if we bail out, we
3846         // have definitely set _finger to something non-null.
3847         assert(_finger != NULL, "invariant");
3848 
3849         // Region iteration was actually aborted. So now _finger
3850         // points to the address of the object we last scanned. If we
3851         // leave it there, when we restart this task, we will rescan
3852         // the object. It is easy to avoid this. We move the finger by
3853         // enough to point to the next possible object header (the
3854         // bitmap knows by how much we need to move it as it knows its
3855         // granularity).
3856         assert(_finger < _region_limit, "invariant");
3857         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
3858         // Check if bitmap iteration was aborted while scanning the last object
3859         if (new_finger >= _region_limit) {
3860             giveup_current_region();
3861         } else {
3862             move_finger_to(new_finger);
3863         }
3864       }
3865     }
3866     // At this point we have either completed iterating over the
3867     // region we were holding on to, or we have aborted.
3868 
3869     // We then partially drain the local queue and the global stack.
3870     // (Do we really need this?)
3871     drain_local_queue(true);
3872     drain_global_stack(true);
3873 
3874     // Read the note on the claim_region() method on why it might
3875     // return NULL with potentially more regions available for
3876     // claiming and why we have to check out_of_regions() to determine
3877     // whether we're done or not.
3878     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
3879       // We are going to try to claim a new region. We should have
3880       // given up on the previous one.
3881       // Separated the asserts so that we know which one fires.
3882       assert(_curr_region  == NULL, "invariant");
3883       assert(_finger       == NULL, "invariant");
3884       assert(_region_limit == NULL, "invariant");
3885       if (_cm->verbose_low())
3886         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
3887       HeapRegion* claimed_region = _cm->claim_region(_task_id);
3888       if (claimed_region != NULL) {
3889         // Yes, we managed to claim one
3890         statsOnly( ++_regions_claimed );
3891 
3892         if (_cm->verbose_low())
3893           gclog_or_tty->print_cr("[%d] we successfully claimed "
3894                                  "region "PTR_FORMAT,
3895                                  _task_id, claimed_region);
3896 
3897         setup_for_region(claimed_region);
3898         assert(_curr_region == claimed_region, "invariant");
3899       }
3900       // It is important to call the regular clock here. It might take
3901       // a while to claim a region if, for example, we hit a large
3902       // block of empty regions. So we need to call the regular clock
3903       // method once round the loop to make sure it's called
3904       // frequently enough.
3905       regular_clock_call();
3906     }
3907 
3908     if (!has_aborted() && _curr_region == NULL) {
3909       assert(_cm->out_of_regions(),
3910              "at this point we should be out of regions");
3911     }
3912   } while ( _curr_region != NULL && !has_aborted());
3913 
3914   if (!has_aborted()) {
3915     // We cannot check whether the global stack is empty, since other
3916     // tasks might be pushing objects to it concurrently. We also cannot
3917     // check if the region stack is empty because if a thread is aborting
3918     // it can push a partially done region back.
3919     assert(_cm->out_of_regions(),
3920            "at this point we should be out of regions");
3921 
3922     if (_cm->verbose_low())
3923       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
3924 
3925     // Try to reduce the number of available SATB buffers so that
3926     // remark has less work to do.
3927     drain_satb_buffers();
3928   }
3929 
3930   // Since we've done everything else, we can now totally drain the
3931   // local queue and global stack.
3932   drain_local_queue(false);
3933   drain_global_stack(false);
3934 
3935   // Attempt at work stealing from other task's queues.
3936   if (!has_aborted()) {
3937     // We have not aborted. This means that we have finished all that
3938     // we could. Let's try to do some stealing...
3939 
3940     // We cannot check whether the global stack is empty, since other
3941     // tasks might be pushing objects to it concurrently. We also cannot
3942     // check if the region stack is empty because if a thread is aborting
3943     // it can push a partially done region back.
3944     assert(_cm->out_of_regions() && _task_queue->size() == 0,
3945            "only way to reach here");
3946 
3947     if (_cm->verbose_low())
3948       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
3949 
3950     while (!has_aborted()) {
3951       oop obj;
3952       statsOnly( ++_steal_attempts );
3953 
3954       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
3955         if (_cm->verbose_medium())
3956           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
3957                                  _task_id, (void*) obj);
3958 
3959         statsOnly( ++_steals );
3960 
3961         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
3962                "any stolen object should be marked");
3963         scan_object(obj);
3964 
3965         // And since we're towards the end, let's totally drain the
3966         // local queue and global stack.
3967         drain_local_queue(false);
3968         drain_global_stack(false);
3969       } else {
3970         break;
3971       }
3972     }
3973   }
3974 
3975   // We still haven't aborted. Now, let's try to get into the
3976   // termination protocol.
3977   if (!has_aborted()) {
3978     // We cannot check whether the global stack is empty, since other
3979     // tasks might be concurrently pushing objects on it. We also cannot
3980     // check if the region stack is empty because if a thread is aborting
3981     // it can push a partially done region back.
3982     // Separated the asserts so that we know which one fires.
3983     assert(_cm->out_of_regions(), "only way to reach here");
3984     assert(_task_queue->size() == 0, "only way to reach here");
3985 
3986     if (_cm->verbose_low())
3987       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
3988 
3989     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
3990     // The CMTask class also extends the TerminatorTerminator class,
3991     // hence its should_exit_termination() method will also decide
3992     // whether to exit the termination protocol or not.
3993     bool finished = _cm->terminator()->offer_termination(this);
3994     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
3995     _termination_time_ms +=
3996       termination_end_time_ms - _termination_start_time_ms;
3997 
3998     if (finished) {
3999       // We're all done.
4000 
4001       if (_task_id == 0) {
4002         // let's allow task 0 to do this
4003         if (concurrent()) {
4004           assert(_cm->concurrent_marking_in_progress(), "invariant");
4005           // we need to set this to false before the next
4006           // safepoint. This way we ensure that the marking phase
4007           // doesn't observe any more heap expansions.
4008           _cm->clear_concurrent_marking_in_progress();
4009         }
4010       }
4011 
4012       // We can now guarantee that the global stack is empty, since
4013       // all other tasks have finished. We separated the guarantees so
4014       // that, if a condition is false, we can immediately find out
4015       // which one.
4016       guarantee(_cm->out_of_regions(), "only way to reach here");
4017       guarantee(_aborted_region.is_empty(), "only way to reach here");
4018       guarantee(_cm->region_stack_empty(), "only way to reach here");
4019       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4020       guarantee(_task_queue->size() == 0, "only way to reach here");
4021       guarantee(!_cm->has_overflown(), "only way to reach here");
4022       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4023       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4024 
4025       if (_cm->verbose_low())
4026         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4027     } else {
4028       // Apparently there's more work to do. Let's abort this task. It
4029       // will restart it and we can hopefully find more things to do.
4030 
4031       if (_cm->verbose_low())
4032         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4033 
4034       set_has_aborted();
4035       statsOnly( ++_aborted_termination );
4036     }
4037   }
4038 
4039   // Mainly for debugging purposes to make sure that a pointer to the
4040   // closure which was statically allocated in this frame doesn't
4041   // escape it by accident.
4042   set_oop_closure(NULL);
4043   double end_time_ms = os::elapsedVTime() * 1000.0;
4044   double elapsed_time_ms = end_time_ms - _start_time_ms;
4045   // Update the step history.
4046   _step_times_ms.add(elapsed_time_ms);
4047 
4048   if (has_aborted()) {
4049     // The task was aborted for some reason.
4050 
4051     statsOnly( ++_aborted );
4052 
4053     if (_has_aborted_timed_out) {
4054       double diff_ms = elapsed_time_ms - _time_target_ms;
4055       // Keep statistics of how well we did with respect to hitting
4056       // our target only if we actually timed out (if we aborted for
4057       // other reasons, then the results might get skewed).
4058       _marking_step_diffs_ms.add(diff_ms);
4059     }
4060 
4061     if (_cm->has_overflown()) {
4062       // This is the interesting one. We aborted because a global
4063       // overflow was raised. This means we have to restart the
4064       // marking phase and start iterating over regions. However, in
4065       // order to do this we have to make sure that all tasks stop
4066       // what they are doing and re-initialise in a safe manner. We
4067       // will achieve this with the use of two barrier sync points.
4068 
4069       if (_cm->verbose_low())
4070         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4071 
4072       _cm->enter_first_sync_barrier(_task_id);
4073       // When we exit this sync barrier we know that all tasks have
4074       // stopped doing marking work. So, it's now safe to
4075       // re-initialise our data structures. At the end of this method,
4076       // task 0 will clear the global data structures.
4077 
4078       statsOnly( ++_aborted_overflow );
4079 
4080       // We clear the local state of this task...
4081       clear_region_fields();
4082 
4083       // ...and enter the second barrier.
4084       _cm->enter_second_sync_barrier(_task_id);
4085       // At this point everything has bee re-initialised and we're
4086       // ready to restart.
4087     }
4088 
4089     if (_cm->verbose_low()) {
4090       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4091                              "elapsed = %1.2lfms <<<<<<<<<<",
4092                              _task_id, _time_target_ms, elapsed_time_ms);
4093       if (_cm->has_aborted())
4094         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4095                                _task_id);
4096     }
4097   } else {
4098     if (_cm->verbose_low())
4099       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4100                              "elapsed = %1.2lfms <<<<<<<<<<",
4101                              _task_id, _time_target_ms, elapsed_time_ms);
4102   }
4103 
4104   _claimed = false;
4105 }
4106 
4107 CMTask::CMTask(int task_id,
4108                ConcurrentMark* cm,
4109                CMTaskQueue* task_queue,
4110                CMTaskQueueSet* task_queues)
4111   : _g1h(G1CollectedHeap::heap()),
4112     _task_id(task_id), _cm(cm),
4113     _claimed(false),
4114     _nextMarkBitMap(NULL), _hash_seed(17),
4115     _task_queue(task_queue),
4116     _task_queues(task_queues),
4117     _oop_closure(NULL),
4118     _aborted_region(MemRegion()) {
4119   guarantee(task_queue != NULL, "invariant");
4120   guarantee(task_queues != NULL, "invariant");
4121 
4122   statsOnly( _clock_due_to_scanning = 0;
4123              _clock_due_to_marking  = 0 );
4124 
4125   _marking_step_diffs_ms.add(0.5);
4126 }