1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1RemSet.hpp"
  32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  34 #include "gc_implementation/shared/vmGCOperations.hpp"
  35 #include "memory/genOopClosures.inline.hpp"
  36 #include "memory/referencePolicy.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "oops/oop.inline.hpp"
  39 #include "runtime/handles.inline.hpp"
  40 #include "runtime/java.hpp"
  41 
  42 //
  43 // CMS Bit Map Wrapper
  44 
  45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  46   _bm((uintptr_t*)NULL,0),
  47   _shifter(shifter) {
  48   _bmStartWord = (HeapWord*)(rs.base());
  49   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  50   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  51                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  52 
  53   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  54   // For now we'll just commit all of the bit map up fromt.
  55   // Later on we'll try to be more parsimonious with swap.
  56   guarantee(_virtual_space.initialize(brs, brs.size()),
  57             "couldn't reseve backing store for CMS bit map");
  58   assert(_virtual_space.committed_size() == brs.size(),
  59          "didn't reserve backing store for all of CMS bit map?");
  60   _bm.set_map((uintptr_t*)_virtual_space.low());
  61   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  62          _bmWordSize, "inconsistency in bit map sizing");
  63   _bm.set_size(_bmWordSize >> _shifter);
  64 }
  65 
  66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  67                                                HeapWord* limit) const {
  68   // First we must round addr *up* to a possible object boundary.
  69   addr = (HeapWord*)align_size_up((intptr_t)addr,
  70                                   HeapWordSize << _shifter);
  71   size_t addrOffset = heapWordToOffset(addr);
  72   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  83                                                  HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  86   size_t limitOffset = heapWordToOffset(limit);
  87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  89   assert(nextAddr >= addr, "get_next_one postcondition");
  90   assert(nextAddr == limit || !isMarked(nextAddr),
  91          "get_next_one postcondition");
  92   return nextAddr;
  93 }
  94 
  95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  97   return (int) (diff >> _shifter);
  98 }
  99 
 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 101   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 102   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 103   if (right > left) {
 104     // Right-open interval [leftOffset, rightOffset).
 105     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 106   } else {
 107     return true;
 108   }
 109 }
 110 
 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 112                                              size_t    from_start_index,
 113                                              HeapWord* to_start_word,
 114                                              size_t    word_num) {
 115   _bm.mostly_disjoint_range_union(from_bitmap,
 116                                   from_start_index,
 117                                   heapWordToOffset(to_start_word),
 118                                   word_num);
 119 }
 120 
 121 #ifndef PRODUCT
 122 bool CMBitMapRO::covers(ReservedSpace rs) const {
 123   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 124   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 125          "size inconsistency");
 126   return _bmStartWord == (HeapWord*)(rs.base()) &&
 127          _bmWordSize  == rs.size()>>LogHeapWordSize;
 128 }
 129 #endif
 130 
 131 void CMBitMap::clearAll() {
 132   _bm.clear();
 133   return;
 134 }
 135 
 136 void CMBitMap::markRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 140           ((HeapWord *) mr.end())),
 141          "markRange memory region end is not card aligned");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), true);
 145 }
 146 
 147 void CMBitMap::clearRange(MemRegion mr) {
 148   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 149   assert(!mr.is_empty(), "unexpected empty region");
 150   // convert address range into offset range
 151   _bm.at_put_range(heapWordToOffset(mr.start()),
 152                    heapWordToOffset(mr.end()), false);
 153 }
 154 
 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 156                                             HeapWord* end_addr) {
 157   HeapWord* start = getNextMarkedWordAddress(addr);
 158   start = MIN2(start, end_addr);
 159   HeapWord* end   = getNextUnmarkedWordAddress(start);
 160   end = MIN2(end, end_addr);
 161   assert(start <= end, "Consistency check");
 162   MemRegion mr(start, end);
 163   if (!mr.is_empty()) {
 164     clearRange(mr);
 165   }
 166   return mr;
 167 }
 168 
 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 170   _base(NULL), _cm(cm)
 171 #ifdef ASSERT
 172   , _drain_in_progress(false)
 173   , _drain_in_progress_yields(false)
 174 #endif
 175 {}
 176 
 177 void CMMarkStack::allocate(size_t size) {
 178   _base = NEW_C_HEAP_ARRAY(oop, size);
 179   if (_base == NULL)
 180     vm_exit_during_initialization("Failed to allocate "
 181                                   "CM region mark stack");
 182   _index = 0;
 183   // QQQQ cast ...
 184   _capacity = (jint) size;
 185   _oops_do_bound = -1;
 186   NOT_PRODUCT(_max_depth = 0);
 187 }
 188 
 189 CMMarkStack::~CMMarkStack() {
 190   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 191 }
 192 
 193 void CMMarkStack::par_push(oop ptr) {
 194   while (true) {
 195     if (isFull()) {
 196       _overflow = true;
 197       return;
 198     }
 199     // Otherwise...
 200     jint index = _index;
 201     jint next_index = index+1;
 202     jint res = Atomic::cmpxchg(next_index, &_index, index);
 203     if (res == index) {
 204       _base[index] = ptr;
 205       // Note that we don't maintain this atomically.  We could, but it
 206       // doesn't seem necessary.
 207       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 208       return;
 209     }
 210     // Otherwise, we need to try again.
 211   }
 212 }
 213 
 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 215   while (true) {
 216     if (isFull()) {
 217       _overflow = true;
 218       return;
 219     }
 220     // Otherwise...
 221     jint index = _index;
 222     jint next_index = index + n;
 223     if (next_index > _capacity) {
 224       _overflow = true;
 225       return;
 226     }
 227     jint res = Atomic::cmpxchg(next_index, &_index, index);
 228     if (res == index) {
 229       for (int i = 0; i < n; i++) {
 230         int ind = index + i;
 231         assert(ind < _capacity, "By overflow test above.");
 232         _base[ind] = ptr_arr[i];
 233       }
 234       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 235       return;
 236     }
 237     // Otherwise, we need to try again.
 238   }
 239 }
 240 
 241 
 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 244   jint start = _index;
 245   jint next_index = start + n;
 246   if (next_index > _capacity) {
 247     _overflow = true;
 248     return;
 249   }
 250   // Otherwise.
 251   _index = next_index;
 252   for (int i = 0; i < n; i++) {
 253     int ind = start + i;
 254     assert(ind < _capacity, "By overflow test above.");
 255     _base[ind] = ptr_arr[i];
 256   }
 257 }
 258 
 259 
 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 261   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 262   jint index = _index;
 263   if (index == 0) {
 264     *n = 0;
 265     return false;
 266   } else {
 267     int k = MIN2(max, index);
 268     jint new_ind = index - k;
 269     for (int j = 0; j < k; j++) {
 270       ptr_arr[j] = _base[new_ind + j];
 271     }
 272     _index = new_ind;
 273     *n = k;
 274     return true;
 275   }
 276 }
 277 
 278 
 279 CMRegionStack::CMRegionStack() : _base(NULL) {}
 280 
 281 void CMRegionStack::allocate(size_t size) {
 282   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 283   if (_base == NULL)
 284     vm_exit_during_initialization("Failed to allocate "
 285                                   "CM region mark stack");
 286   _index = 0;
 287   // QQQQ cast ...
 288   _capacity = (jint) size;
 289 }
 290 
 291 CMRegionStack::~CMRegionStack() {
 292   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 293 }
 294 
 295 void CMRegionStack::push_lock_free(MemRegion mr) {
 296   assert(mr.word_size() > 0, "Precondition");
 297   while (true) {
 298     jint index = _index;
 299 
 300     if (index >= _capacity) {
 301       _overflow = true;
 302       return;
 303     }
 304     // Otherwise...
 305     jint next_index = index+1;
 306     jint res = Atomic::cmpxchg(next_index, &_index, index);
 307     if (res == index) {
 308       _base[index] = mr;
 309       return;
 310     }
 311     // Otherwise, we need to try again.
 312   }
 313 }
 314 
 315 // Lock-free pop of the region stack. Called during the concurrent
 316 // marking / remark phases. Should only be called in tandem with
 317 // other lock-free pops.
 318 MemRegion CMRegionStack::pop_lock_free() {
 319   while (true) {
 320     jint index = _index;
 321 
 322     if (index == 0) {
 323       return MemRegion();
 324     }
 325     // Otherwise...
 326     jint next_index = index-1;
 327     jint res = Atomic::cmpxchg(next_index, &_index, index);
 328     if (res == index) {
 329       MemRegion mr = _base[next_index];
 330       if (mr.start() != NULL) {
 331         assert(mr.end() != NULL, "invariant");
 332         assert(mr.word_size() > 0, "invariant");
 333         return mr;
 334       } else {
 335         // that entry was invalidated... let's skip it
 336         assert(mr.end() == NULL, "invariant");
 337       }
 338     }
 339     // Otherwise, we need to try again.
 340   }
 341 }
 342 
 343 #if 0
 344 // The routines that manipulate the region stack with a lock are
 345 // not currently used. They should be retained, however, as a
 346 // diagnostic aid.
 347 
 348 void CMRegionStack::push_with_lock(MemRegion mr) {
 349   assert(mr.word_size() > 0, "Precondition");
 350   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 351 
 352   if (isFull()) {
 353     _overflow = true;
 354     return;
 355   }
 356 
 357   _base[_index] = mr;
 358   _index += 1;
 359 }
 360 
 361 MemRegion CMRegionStack::pop_with_lock() {
 362   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 363 
 364   while (true) {
 365     if (_index == 0) {
 366       return MemRegion();
 367     }
 368     _index -= 1;
 369 
 370     MemRegion mr = _base[_index];
 371     if (mr.start() != NULL) {
 372       assert(mr.end() != NULL, "invariant");
 373       assert(mr.word_size() > 0, "invariant");
 374       return mr;
 375     } else {
 376       // that entry was invalidated... let's skip it
 377       assert(mr.end() == NULL, "invariant");
 378     }
 379   }
 380 }
 381 #endif
 382 
 383 bool CMRegionStack::invalidate_entries_into_cset() {
 384   bool result = false;
 385   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 386   for (int i = 0; i < _oops_do_bound; ++i) {
 387     MemRegion mr = _base[i];
 388     if (mr.start() != NULL) {
 389       assert(mr.end() != NULL, "invariant");
 390       assert(mr.word_size() > 0, "invariant");
 391       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 392       assert(hr != NULL, "invariant");
 393       if (hr->in_collection_set()) {
 394         // The region points into the collection set
 395         _base[i] = MemRegion();
 396         result = true;
 397       }
 398     } else {
 399       // that entry was invalidated... let's skip it
 400       assert(mr.end() == NULL, "invariant");
 401     }
 402   }
 403   return result;
 404 }
 405 
 406 template<class OopClosureClass>
 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 408   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 409          || SafepointSynchronize::is_at_safepoint(),
 410          "Drain recursion must be yield-safe.");
 411   bool res = true;
 412   debug_only(_drain_in_progress = true);
 413   debug_only(_drain_in_progress_yields = yield_after);
 414   while (!isEmpty()) {
 415     oop newOop = pop();
 416     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 417     assert(newOop->is_oop(), "Expected an oop");
 418     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 419            "only grey objects on this stack");
 420     // iterate over the oops in this oop, marking and pushing
 421     // the ones in CMS generation.
 422     newOop->oop_iterate(cl);
 423     if (yield_after && _cm->do_yield_check()) {
 424       res = false; break;
 425     }
 426   }
 427   debug_only(_drain_in_progress = false);
 428   return res;
 429 }
 430 
 431 void CMMarkStack::oops_do(OopClosure* f) {
 432   if (_index == 0) return;
 433   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 434          "Bound must be set.");
 435   for (int i = 0; i < _oops_do_bound; i++) {
 436     f->do_oop(&_base[i]);
 437   }
 438   _oops_do_bound = -1;
 439 }
 440 
 441 bool ConcurrentMark::not_yet_marked(oop obj) const {
 442   return (_g1h->is_obj_ill(obj)
 443           || (_g1h->is_in_permanent(obj)
 444               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 445 }
 446 
 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 449 #endif // _MSC_VER
 450 
 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 452                                int max_regions) :
 453   _markBitMap1(rs, MinObjAlignment - 1),
 454   _markBitMap2(rs, MinObjAlignment - 1),
 455 
 456   _parallel_marking_threads(0),
 457   _sleep_factor(0.0),
 458   _marking_task_overhead(1.0),
 459   _cleanup_sleep_factor(0.0),
 460   _cleanup_task_overhead(1.0),
 461   _cleanup_list("Cleanup List"),
 462   _region_bm(max_regions, false /* in_resource_area*/),
 463   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 464            CardTableModRefBS::card_shift,
 465            false /* in_resource_area*/),
 466   _prevMarkBitMap(&_markBitMap1),
 467   _nextMarkBitMap(&_markBitMap2),
 468   _at_least_one_mark_complete(false),
 469 
 470   _markStack(this),
 471   _regionStack(),
 472   // _finger set in set_non_marking_state
 473 
 474   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 475   // _active_tasks set in set_non_marking_state
 476   // _tasks set inside the constructor
 477   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 478   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 479 
 480   _has_overflown(false),
 481   _concurrent(false),
 482   _has_aborted(false),
 483   _restart_for_overflow(false),
 484   _concurrent_marking_in_progress(false),
 485   _should_gray_objects(false),
 486 
 487   // _verbose_level set below
 488 
 489   _init_times(),
 490   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 491   _cleanup_times(),
 492   _total_counting_time(0.0),
 493   _total_rs_scrub_time(0.0),
 494 
 495   _parallel_workers(NULL)
 496 {
 497   CMVerboseLevel verbose_level =
 498     (CMVerboseLevel) G1MarkingVerboseLevel;
 499   if (verbose_level < no_verbose)
 500     verbose_level = no_verbose;
 501   if (verbose_level > high_verbose)
 502     verbose_level = high_verbose;
 503   _verbose_level = verbose_level;
 504 
 505   if (verbose_low())
 506     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 507                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 508 
 509   _markStack.allocate(MarkStackSize);
 510   _regionStack.allocate(G1MarkRegionStackSize);
 511 
 512   // Create & start a ConcurrentMark thread.
 513   _cmThread = new ConcurrentMarkThread(this);
 514   assert(cmThread() != NULL, "CM Thread should have been created");
 515   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 516 
 517   _g1h = G1CollectedHeap::heap();
 518   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 519   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 520   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 521 
 522   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 523   satb_qs.set_buffer_size(G1SATBBufferSize);
 524 
 525   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 526   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 527 
 528   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 529   _active_tasks = _max_task_num;
 530   for (int i = 0; i < (int) _max_task_num; ++i) {
 531     CMTaskQueue* task_queue = new CMTaskQueue();
 532     task_queue->initialize();
 533     _task_queues->register_queue(i, task_queue);
 534 
 535     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 536     _accum_task_vtime[i] = 0.0;
 537   }
 538 
 539   if (ConcGCThreads > ParallelGCThreads) {
 540     vm_exit_during_initialization("Can't have more ConcGCThreads "
 541                                   "than ParallelGCThreads.");
 542   }
 543   if (ParallelGCThreads == 0) {
 544     // if we are not running with any parallel GC threads we will not
 545     // spawn any marking threads either
 546     _parallel_marking_threads =   0;
 547     _sleep_factor             = 0.0;
 548     _marking_task_overhead    = 1.0;
 549   } else {
 550     if (ConcGCThreads > 0) {
 551       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 552       // if both are set
 553 
 554       _parallel_marking_threads = ConcGCThreads;
 555       _sleep_factor             = 0.0;
 556       _marking_task_overhead    = 1.0;
 557     } else if (G1MarkingOverheadPercent > 0) {
 558       // we will calculate the number of parallel marking threads
 559       // based on a target overhead with respect to the soft real-time
 560       // goal
 561 
 562       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 563       double overall_cm_overhead =
 564         (double) MaxGCPauseMillis * marking_overhead /
 565         (double) GCPauseIntervalMillis;
 566       double cpu_ratio = 1.0 / (double) os::processor_count();
 567       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 568       double marking_task_overhead =
 569         overall_cm_overhead / marking_thread_num *
 570                                                 (double) os::processor_count();
 571       double sleep_factor =
 572                          (1.0 - marking_task_overhead) / marking_task_overhead;
 573 
 574       _parallel_marking_threads = (size_t) marking_thread_num;
 575       _sleep_factor             = sleep_factor;
 576       _marking_task_overhead    = marking_task_overhead;
 577     } else {
 578       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;
 581     }
 582 
 583     if (parallel_marking_threads() > 1)
 584       _cleanup_task_overhead = 1.0;
 585     else
 586       _cleanup_task_overhead = marking_task_overhead();
 587     _cleanup_sleep_factor =
 588                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 589 
 590 #if 0
 591     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 592     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 593     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 594     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 595     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 596 #endif
 597 
 598     guarantee(parallel_marking_threads() > 0, "peace of mind");
 599     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 600          (int) _parallel_marking_threads, false, true);
 601     if (_parallel_workers == NULL) {
 602       vm_exit_during_initialization("Failed necessary allocation.");
 603     } else {
 604       _parallel_workers->initialize_workers();
 605     }
 606   }
 607 
 608   // so that the call below can read a sensible value
 609   _heap_start = (HeapWord*) rs.base();
 610   set_non_marking_state();
 611 }
 612 
 613 void ConcurrentMark::update_g1_committed(bool force) {
 614   // If concurrent marking is not in progress, then we do not need to
 615   // update _heap_end. This has a subtle and important
 616   // side-effect. Imagine that two evacuation pauses happen between
 617   // marking completion and remark. The first one can grow the
 618   // heap (hence now the finger is below the heap end). Then, the
 619   // second one could unnecessarily push regions on the region
 620   // stack. This causes the invariant that the region stack is empty
 621   // at the beginning of remark to be false. By ensuring that we do
 622   // not observe heap expansions after marking is complete, then we do
 623   // not have this problem.
 624   if (!concurrent_marking_in_progress() && !force)
 625     return;
 626 
 627   MemRegion committed = _g1h->g1_committed();
 628   assert(committed.start() == _heap_start, "start shouldn't change");
 629   HeapWord* new_end = committed.end();
 630   if (new_end > _heap_end) {
 631     // The heap has been expanded.
 632 
 633     _heap_end = new_end;
 634   }
 635   // Notice that the heap can also shrink. However, this only happens
 636   // during a Full GC (at least currently) and the entire marking
 637   // phase will bail out and the task will not be restarted. So, let's
 638   // do nothing.
 639 }
 640 
 641 void ConcurrentMark::reset() {
 642   // Starting values for these two. This should be called in a STW
 643   // phase. CM will be notified of any future g1_committed expansions
 644   // will be at the end of evacuation pauses, when tasks are
 645   // inactive.
 646   MemRegion committed = _g1h->g1_committed();
 647   _heap_start = committed.start();
 648   _heap_end   = committed.end();
 649 
 650   // Separated the asserts so that we know which one fires.
 651   assert(_heap_start != NULL, "heap bounds should look ok");
 652   assert(_heap_end != NULL, "heap bounds should look ok");
 653   assert(_heap_start < _heap_end, "heap bounds should look ok");
 654 
 655   // reset all the marking data structures and any necessary flags
 656   clear_marking_state();
 657 
 658   if (verbose_low())
 659     gclog_or_tty->print_cr("[global] resetting");
 660 
 661   // We do reset all of them, since different phases will use
 662   // different number of active threads. So, it's easiest to have all
 663   // of them ready.
 664   for (int i = 0; i < (int) _max_task_num; ++i) {
 665     _tasks[i]->reset(_nextMarkBitMap);
 666   }
 667 
 668   // we need this to make sure that the flag is on during the evac
 669   // pause with initial mark piggy-backed
 670   set_concurrent_marking_in_progress();
 671 }
 672 
 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 674   assert(active_tasks <= _max_task_num, "we should not have more");
 675 
 676   _active_tasks = active_tasks;
 677   // Need to update the three data structures below according to the
 678   // number of active threads for this phase.
 679   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 680   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 681   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 682 
 683   _concurrent = concurrent;
 684   // We propagate this to all tasks, not just the active ones.
 685   for (int i = 0; i < (int) _max_task_num; ++i)
 686     _tasks[i]->set_concurrent(concurrent);
 687 
 688   if (concurrent) {
 689     set_concurrent_marking_in_progress();
 690   } else {
 691     // We currently assume that the concurrent flag has been set to
 692     // false before we start remark. At this point we should also be
 693     // in a STW phase.
 694     assert(!concurrent_marking_in_progress(), "invariant");
 695     assert(_finger == _heap_end, "only way to get here");
 696     update_g1_committed(true);
 697   }
 698 }
 699 
 700 void ConcurrentMark::set_non_marking_state() {
 701   // We set the global marking state to some default values when we're
 702   // not doing marking.
 703   clear_marking_state();
 704   _active_tasks = 0;
 705   clear_concurrent_marking_in_progress();
 706 }
 707 
 708 ConcurrentMark::~ConcurrentMark() {
 709   for (int i = 0; i < (int) _max_task_num; ++i) {
 710     delete _task_queues->queue(i);
 711     delete _tasks[i];
 712   }
 713   delete _task_queues;
 714   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 715 }
 716 
 717 // This closure is used to mark refs into the g1 generation
 718 // from external roots in the CMS bit map.
 719 // Called at the first checkpoint.
 720 //
 721 
 722 void ConcurrentMark::clearNextBitmap() {
 723   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 724   G1CollectorPolicy* g1p = g1h->g1_policy();
 725 
 726   // Make sure that the concurrent mark thread looks to still be in
 727   // the current cycle.
 728   guarantee(cmThread()->during_cycle(), "invariant");
 729 
 730   // We are finishing up the current cycle by clearing the next
 731   // marking bitmap and getting it ready for the next cycle. During
 732   // this time no other cycle can start. So, let's make sure that this
 733   // is the case.
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 
 736   // clear the mark bitmap (no grey objects to start with).
 737   // We need to do this in chunks and offer to yield in between
 738   // each chunk.
 739   HeapWord* start  = _nextMarkBitMap->startWord();
 740   HeapWord* end    = _nextMarkBitMap->endWord();
 741   HeapWord* cur    = start;
 742   size_t chunkSize = M;
 743   while (cur < end) {
 744     HeapWord* next = cur + chunkSize;
 745     if (next > end)
 746       next = end;
 747     MemRegion mr(cur,next);
 748     _nextMarkBitMap->clearRange(mr);
 749     cur = next;
 750     do_yield_check();
 751 
 752     // Repeat the asserts from above. We'll do them as asserts here to
 753     // minimize their overhead on the product. However, we'll have
 754     // them as guarantees at the beginning / end of the bitmap
 755     // clearing to get some checking in the product.
 756     assert(cmThread()->during_cycle(), "invariant");
 757     assert(!g1h->mark_in_progress(), "invariant");
 758   }
 759 
 760   // Repeat the asserts from above.
 761   guarantee(cmThread()->during_cycle(), "invariant");
 762   guarantee(!g1h->mark_in_progress(), "invariant");
 763 }
 764 
 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 766 public:
 767   bool doHeapRegion(HeapRegion* r) {
 768     if (!r->continuesHumongous()) {
 769       r->note_start_of_marking(true);
 770     }
 771     return false;
 772   }
 773 };
 774 
 775 void ConcurrentMark::checkpointRootsInitialPre() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777   G1CollectorPolicy* g1p = g1h->g1_policy();
 778 
 779   _has_aborted = false;
 780 
 781 #ifndef PRODUCT
 782   if (G1PrintReachableAtInitialMark) {
 783     print_reachable("at-cycle-start",
 784                     true /* use_prev_marking */, true /* all */);
 785   }
 786 #endif
 787 
 788   // Initialise marking structures. This has to be done in a STW phase.
 789   reset();
 790 }
 791 
 792 class CMMarkRootsClosure: public OopsInGenClosure {
 793 private:
 794   ConcurrentMark*  _cm;
 795   G1CollectedHeap* _g1h;
 796   bool             _do_barrier;
 797 
 798 public:
 799   CMMarkRootsClosure(ConcurrentMark* cm,
 800                      G1CollectedHeap* g1h,
 801                      bool do_barrier) : _cm(cm), _g1h(g1h),
 802                                         _do_barrier(do_barrier) { }
 803 
 804   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 805   virtual void do_oop(      oop* p) { do_oop_work(p); }
 806 
 807   template <class T> void do_oop_work(T* p) {
 808     T heap_oop = oopDesc::load_heap_oop(p);
 809     if (!oopDesc::is_null(heap_oop)) {
 810       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 811       assert(obj->is_oop() || obj->mark() == NULL,
 812              "expected an oop, possibly with mark word displaced");
 813       HeapWord* addr = (HeapWord*)obj;
 814       if (_g1h->is_in_g1_reserved(addr)) {
 815         _cm->grayRoot(obj);
 816       }
 817     }
 818     if (_do_barrier) {
 819       assert(!_g1h->is_in_g1_reserved(p),
 820              "Should be called on external roots");
 821       do_barrier(p);
 822     }
 823   }
 824 };
 825 
 826 void ConcurrentMark::checkpointRootsInitialPost() {
 827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 828 
 829   // If we force an overflow during remark, the remark operation will
 830   // actually abort and we'll restart concurrent marking. If we always
 831   // force an oveflow during remark we'll never actually complete the
 832   // marking phase. So, we initilize this here, at the start of the
 833   // cycle, so that at the remaining overflow number will decrease at
 834   // every remark and we'll eventually not need to cause one.
 835   force_overflow_stw()->init();
 836 
 837   // For each region note start of marking.
 838   NoteStartOfMarkHRClosure startcl;
 839   g1h->heap_region_iterate(&startcl);
 840 
 841   // Start weak-reference discovery.
 842   ReferenceProcessor* rp = g1h->ref_processor();
 843   rp->verify_no_references_recorded();
 844   rp->enable_discovery(); // enable ("weak") refs discovery
 845   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 846 
 847   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 848   // This is the start of  the marking cycle, we're expected all
 849   // threads to have SATB queues with active set to false.
 850   satb_mq_set.set_active_all_threads(true, /* new active value */
 851                                      false /* expected_active */);
 852 
 853   // update_g1_committed() will be called at the end of an evac pause
 854   // when marking is on. So, it's also called at the end of the
 855   // initial-mark pause to update the heap end, if the heap expands
 856   // during it. No need to call it here.
 857 }
 858 
 859 // Checkpoint the roots into this generation from outside
 860 // this generation. [Note this initial checkpoint need only
 861 // be approximate -- we'll do a catch up phase subsequently.]
 862 void ConcurrentMark::checkpointRootsInitial() {
 863   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 864   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 865 
 866   double start = os::elapsedTime();
 867 
 868   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 869   g1p->record_concurrent_mark_init_start();
 870   checkpointRootsInitialPre();
 871 
 872   // YSR: when concurrent precleaning is in place, we'll
 873   // need to clear the cached card table here
 874 
 875   ResourceMark rm;
 876   HandleMark  hm;
 877 
 878   g1h->ensure_parsability(false);
 879   g1h->perm_gen()->save_marks();
 880 
 881   CMMarkRootsClosure notOlder(this, g1h, false);
 882   CMMarkRootsClosure older(this, g1h, true);
 883 
 884   g1h->set_marking_started();
 885   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 886 
 887   g1h->process_strong_roots(true,    // activate StrongRootsScope
 888                             false,   // fake perm gen collection
 889                             SharedHeap::SO_AllClasses,
 890                             &notOlder, // Regular roots
 891                             NULL,     // do not visit active blobs
 892                             &older    // Perm Gen Roots
 893                             );
 894   checkpointRootsInitialPost();
 895 
 896   // Statistics.
 897   double end = os::elapsedTime();
 898   _init_times.add((end - start) * 1000.0);
 899 
 900   g1p->record_concurrent_mark_init_end();
 901 }
 902 
 903 /*
 904  * Notice that in the next two methods, we actually leave the STS
 905  * during the barrier sync and join it immediately afterwards. If we
 906  * do not do this, the following deadlock can occur: one thread could
 907  * be in the barrier sync code, waiting for the other thread to also
 908  * sync up, whereas another one could be trying to yield, while also
 909  * waiting for the other threads to sync up too.
 910  *
 911  * Note, however, that this code is also used during remark and in
 912  * this case we should not attempt to leave / enter the STS, otherwise
 913  * we'll either hit an asseert (debug / fastdebug) or deadlock
 914  * (product). So we should only leave / enter the STS if we are
 915  * operating concurrently.
 916  *
 917  * Because the thread that does the sync barrier has left the STS, it
 918  * is possible to be suspended for a Full GC or an evacuation pause
 919  * could occur. This is actually safe, since the entering the sync
 920  * barrier is one of the last things do_marking_step() does, and it
 921  * doesn't manipulate any data structures afterwards.
 922  */
 923 
 924 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 925   if (verbose_low())
 926     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 927 
 928   if (concurrent()) {
 929     ConcurrentGCThread::stsLeave();
 930   }
 931   _first_overflow_barrier_sync.enter();
 932   if (concurrent()) {
 933     ConcurrentGCThread::stsJoin();
 934   }
 935   // at this point everyone should have synced up and not be doing any
 936   // more work
 937 
 938   if (verbose_low())
 939     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 940 
 941   // let task 0 do this
 942   if (task_num == 0) {
 943     // task 0 is responsible for clearing the global data structures
 944     // We should be here because of an overflow. During STW we should
 945     // not clear the overflow flag since we rely on it being true when
 946     // we exit this method to abort the pause and restart concurent
 947     // marking.
 948     clear_marking_state(concurrent() /* clear_overflow */);
 949     force_overflow()->update();
 950 
 951     if (PrintGC) {
 952       gclog_or_tty->date_stamp(PrintGCDateStamps);
 953       gclog_or_tty->stamp(PrintGCTimeStamps);
 954       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 955     }
 956   }
 957 
 958   // after this, each task should reset its own data structures then
 959   // then go into the second barrier
 960 }
 961 
 962 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 963   if (verbose_low())
 964     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 965 
 966   if (concurrent()) {
 967     ConcurrentGCThread::stsLeave();
 968   }
 969   _second_overflow_barrier_sync.enter();
 970   if (concurrent()) {
 971     ConcurrentGCThread::stsJoin();
 972   }
 973   // at this point everything should be re-initialised and ready to go
 974 
 975   if (verbose_low())
 976     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 977 }
 978 
 979 #ifndef PRODUCT
 980 void ForceOverflowSettings::init() {
 981   _num_remaining = G1ConcMarkForceOverflow;
 982   _force = false;
 983   update();
 984 }
 985 
 986 void ForceOverflowSettings::update() {
 987   if (_num_remaining > 0) {
 988     _num_remaining -= 1;
 989     _force = true;
 990   } else {
 991     _force = false;
 992   }
 993 }
 994 
 995 bool ForceOverflowSettings::should_force() {
 996   if (_force) {
 997     _force = false;
 998     return true;
 999   } else {
1000     return false;
1001   }
1002 }
1003 #endif // !PRODUCT
1004 
1005 void ConcurrentMark::grayRoot(oop p) {
1006   HeapWord* addr = (HeapWord*) p;
1007   // We can't really check against _heap_start and _heap_end, since it
1008   // is possible during an evacuation pause with piggy-backed
1009   // initial-mark that the committed space is expanded during the
1010   // pause without CM observing this change. So the assertions below
1011   // is a bit conservative; but better than nothing.
1012   assert(_g1h->g1_committed().contains(addr),
1013          "address should be within the heap bounds");
1014 
1015   if (!_nextMarkBitMap->isMarked(addr))
1016     _nextMarkBitMap->parMark(addr);
1017 }
1018 
1019 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
1020   // The objects on the region have already been marked "in bulk" by
1021   // the caller. We only need to decide whether to push the region on
1022   // the region stack or not.
1023 
1024   if (!concurrent_marking_in_progress() || !_should_gray_objects)
1025     // We're done with marking and waiting for remark. We do not need to
1026     // push anything else on the region stack.
1027     return;
1028 
1029   HeapWord* finger = _finger;
1030 
1031   if (verbose_low())
1032     gclog_or_tty->print_cr("[global] attempting to push "
1033                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
1034                            PTR_FORMAT, mr.start(), mr.end(), finger);
1035 
1036   if (mr.start() < finger) {
1037     // The finger is always heap region aligned and it is not possible
1038     // for mr to span heap regions.
1039     assert(mr.end() <= finger, "invariant");
1040 
1041     // Separated the asserts so that we know which one fires.
1042     assert(mr.start() <= mr.end(),
1043            "region boundaries should fall within the committed space");
1044     assert(_heap_start <= mr.start(),
1045            "region boundaries should fall within the committed space");
1046     assert(mr.end() <= _heap_end,
1047            "region boundaries should fall within the committed space");
1048     if (verbose_low())
1049       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
1050                              "below the finger, pushing it",
1051                              mr.start(), mr.end());
1052 
1053     if (!region_stack_push_lock_free(mr)) {
1054       if (verbose_low())
1055         gclog_or_tty->print_cr("[global] region stack has overflown.");
1056     }
1057   }
1058 }
1059 
1060 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1061   // The object is not marked by the caller. We need to at least mark
1062   // it and maybe push in on the stack.
1063 
1064   HeapWord* addr = (HeapWord*)p;
1065   if (!_nextMarkBitMap->isMarked(addr)) {
1066     // We definitely need to mark it, irrespective whether we bail out
1067     // because we're done with marking.
1068     if (_nextMarkBitMap->parMark(addr)) {
1069       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1070         // If we're done with concurrent marking and we're waiting for
1071         // remark, then we're not pushing anything on the stack.
1072         return;
1073 
1074       // No OrderAccess:store_load() is needed. It is implicit in the
1075       // CAS done in parMark(addr) above
1076       HeapWord* finger = _finger;
1077 
1078       if (addr < finger) {
1079         if (!mark_stack_push(oop(addr))) {
1080           if (verbose_low())
1081             gclog_or_tty->print_cr("[global] global stack overflow "
1082                                    "during parMark");
1083         }
1084       }
1085     }
1086   }
1087 }
1088 
1089 class CMConcurrentMarkingTask: public AbstractGangTask {
1090 private:
1091   ConcurrentMark*       _cm;
1092   ConcurrentMarkThread* _cmt;
1093 
1094 public:
1095   void work(int worker_i) {
1096     assert(Thread::current()->is_ConcurrentGC_thread(),
1097            "this should only be done by a conc GC thread");
1098     ResourceMark rm;
1099 
1100     double start_vtime = os::elapsedVTime();
1101 
1102     ConcurrentGCThread::stsJoin();
1103 
1104     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1105     CMTask* the_task = _cm->task(worker_i);
1106     the_task->record_start_time();
1107     if (!_cm->has_aborted()) {
1108       do {
1109         double start_vtime_sec = os::elapsedVTime();
1110         double start_time_sec = os::elapsedTime();
1111         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1112 
1113         the_task->do_marking_step(mark_step_duration_ms,
1114                                   true /* do_stealing    */,
1115                                   true /* do_termination */);
1116 
1117         double end_time_sec = os::elapsedTime();
1118         double end_vtime_sec = os::elapsedVTime();
1119         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1120         double elapsed_time_sec = end_time_sec - start_time_sec;
1121         _cm->clear_has_overflown();
1122 
1123         bool ret = _cm->do_yield_check(worker_i);
1124 
1125         jlong sleep_time_ms;
1126         if (!_cm->has_aborted() && the_task->has_aborted()) {
1127           sleep_time_ms =
1128             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1129           ConcurrentGCThread::stsLeave();
1130           os::sleep(Thread::current(), sleep_time_ms, false);
1131           ConcurrentGCThread::stsJoin();
1132         }
1133         double end_time2_sec = os::elapsedTime();
1134         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1135 
1136 #if 0
1137           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1138                                  "overhead %1.4lf",
1139                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1140                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1141           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1142                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1143 #endif
1144       } while (!_cm->has_aborted() && the_task->has_aborted());
1145     }
1146     the_task->record_end_time();
1147     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1148 
1149     ConcurrentGCThread::stsLeave();
1150 
1151     double end_vtime = os::elapsedVTime();
1152     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1153   }
1154 
1155   CMConcurrentMarkingTask(ConcurrentMark* cm,
1156                           ConcurrentMarkThread* cmt) :
1157       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1158 
1159   ~CMConcurrentMarkingTask() { }
1160 };
1161 
1162 void ConcurrentMark::markFromRoots() {
1163   // we might be tempted to assert that:
1164   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1165   //        "inconsistent argument?");
1166   // However that wouldn't be right, because it's possible that
1167   // a safepoint is indeed in progress as a younger generation
1168   // stop-the-world GC happens even as we mark in this generation.
1169 
1170   _restart_for_overflow = false;
1171 
1172   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1173   force_overflow_conc()->init();
1174   set_phase(active_workers, true /* concurrent */);
1175 
1176   CMConcurrentMarkingTask markingTask(this, cmThread());
1177   if (parallel_marking_threads() > 0)
1178     _parallel_workers->run_task(&markingTask);
1179   else
1180     markingTask.work(0);
1181   print_stats();
1182 }
1183 
1184 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1185   // world is stopped at this checkpoint
1186   assert(SafepointSynchronize::is_at_safepoint(),
1187          "world should be stopped");
1188   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1189 
1190   // If a full collection has happened, we shouldn't do this.
1191   if (has_aborted()) {
1192     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1193     return;
1194   }
1195 
1196   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1197 
1198   if (VerifyDuringGC) {
1199     HandleMark hm;  // handle scope
1200     gclog_or_tty->print(" VerifyDuringGC:(before)");
1201     Universe::heap()->prepare_for_verify();
1202     Universe::verify(true, false, true);
1203   }
1204 
1205   G1CollectorPolicy* g1p = g1h->g1_policy();
1206   g1p->record_concurrent_mark_remark_start();
1207 
1208   double start = os::elapsedTime();
1209 
1210   checkpointRootsFinalWork();
1211 
1212   double mark_work_end = os::elapsedTime();
1213 
1214   weakRefsWork(clear_all_soft_refs);
1215 
1216   if (has_overflown()) {
1217     // Oops.  We overflowed.  Restart concurrent marking.
1218     _restart_for_overflow = true;
1219     // Clear the flag. We do not need it any more.
1220     clear_has_overflown();
1221     if (G1TraceMarkStackOverflow)
1222       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1223   } else {
1224     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1225     // We're done with marking.
1226     // This is the end of  the marking cycle, we're expected all
1227     // threads to have SATB queues with active set to true.
1228     satb_mq_set.set_active_all_threads(false, /* new active value */
1229                                        true /* expected_active */);
1230 
1231     if (VerifyDuringGC) {
1232       HandleMark hm;  // handle scope
1233       gclog_or_tty->print(" VerifyDuringGC:(after)");
1234       Universe::heap()->prepare_for_verify();
1235       Universe::heap()->verify(/* allow_dirty */      true,
1236                                /* silent */           false,
1237                                /* use_prev_marking */ false);
1238     }
1239     assert(!restart_for_overflow(), "sanity");
1240   }
1241 
1242   // Reset the marking state if marking completed
1243   if (!restart_for_overflow()) {
1244     set_non_marking_state();
1245   }
1246 
1247 #if VERIFY_OBJS_PROCESSED
1248   _scan_obj_cl.objs_processed = 0;
1249   ThreadLocalObjQueue::objs_enqueued = 0;
1250 #endif
1251 
1252   // Statistics
1253   double now = os::elapsedTime();
1254   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1255   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1256   _remark_times.add((now - start) * 1000.0);
1257 
1258   g1p->record_concurrent_mark_remark_end();
1259 }
1260 
1261 #define CARD_BM_TEST_MODE 0
1262 
1263 class CalcLiveObjectsClosure: public HeapRegionClosure {
1264 
1265   CMBitMapRO* _bm;
1266   ConcurrentMark* _cm;
1267   bool _changed;
1268   bool _yield;
1269   size_t _words_done;
1270   size_t _tot_live;
1271   size_t _tot_used;
1272   size_t _regions_done;
1273   double _start_vtime_sec;
1274 
1275   BitMap* _region_bm;
1276   BitMap* _card_bm;
1277   intptr_t _bottom_card_num;
1278   bool _final;
1279 
1280   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1281     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1282 #if CARD_BM_TEST_MODE
1283       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1284 #else
1285       _card_bm->par_at_put(i - _bottom_card_num, 1);
1286 #endif
1287     }
1288   }
1289 
1290 public:
1291   CalcLiveObjectsClosure(bool final,
1292                          CMBitMapRO *bm, ConcurrentMark *cm,
1293                          BitMap* region_bm, BitMap* card_bm) :
1294     _bm(bm), _cm(cm), _changed(false), _yield(true),
1295     _words_done(0), _tot_live(0), _tot_used(0),
1296     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1297     _regions_done(0), _start_vtime_sec(0.0)
1298   {
1299     _bottom_card_num =
1300       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1301                CardTableModRefBS::card_shift);
1302   }
1303 
1304   // It takes a region that's not empty (i.e., it has at least one
1305   // live object in it and sets its corresponding bit on the region
1306   // bitmap to 1. If the region is "starts humongous" it will also set
1307   // to 1 the bits on the region bitmap that correspond to its
1308   // associated "continues humongous" regions.
1309   void set_bit_for_region(HeapRegion* hr) {
1310     assert(!hr->continuesHumongous(), "should have filtered those out");
1311 
1312     size_t index = hr->hrs_index();
1313     if (!hr->startsHumongous()) {
1314       // Normal (non-humongous) case: just set the bit.
1315       _region_bm->par_at_put((BitMap::idx_t) index, true);
1316     } else {
1317       // Starts humongous case: calculate how many regions are part of
1318       // this humongous region and then set the bit range. It might
1319       // have been a bit more efficient to look at the object that
1320       // spans these humongous regions to calculate their number from
1321       // the object's size. However, it's a good idea to calculate
1322       // this based on the metadata itself, and not the region
1323       // contents, so that this code is not aware of what goes into
1324       // the humongous regions (in case this changes in the future).
1325       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1326       size_t end_index = index + 1;
1327       while (end_index < g1h->n_regions()) {
1328         HeapRegion* chr = g1h->region_at(end_index);
1329         if (!chr->continuesHumongous()) {
1330           break;
1331         }
1332         end_index += 1;
1333       }
1334       _region_bm->par_at_put_range((BitMap::idx_t) index,
1335                                    (BitMap::idx_t) end_index, true);
1336     }
1337   }
1338 
1339   bool doHeapRegion(HeapRegion* hr) {
1340     if (!_final && _regions_done == 0)
1341       _start_vtime_sec = os::elapsedVTime();
1342 
1343     if (hr->continuesHumongous()) {
1344       // We will ignore these here and process them when their
1345       // associated "starts humongous" region is processed (see
1346       // set_bit_for_heap_region()). Note that we cannot rely on their
1347       // associated "starts humongous" region to have their bit set to
1348       // 1 since, due to the region chunking in the parallel region
1349       // iteration, a "continues humongous" region might be visited
1350       // before its associated "starts humongous".
1351       return false;
1352     }
1353 
1354     HeapWord* nextTop = hr->next_top_at_mark_start();
1355     HeapWord* start   = hr->top_at_conc_mark_count();
1356     assert(hr->bottom() <= start && start <= hr->end() &&
1357            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1358            start <= nextTop,
1359            "Preconditions.");
1360     // Otherwise, record the number of word's we'll examine.
1361     size_t words_done = (nextTop - start);
1362     // Find the first marked object at or after "start".
1363     start = _bm->getNextMarkedWordAddress(start, nextTop);
1364     size_t marked_bytes = 0;
1365 
1366     // Below, the term "card num" means the result of shifting an address
1367     // by the card shift -- address 0 corresponds to card number 0.  One
1368     // must subtract the card num of the bottom of the heap to obtain a
1369     // card table index.
1370     // The first card num of the sequence of live cards currently being
1371     // constructed.  -1 ==> no sequence.
1372     intptr_t start_card_num = -1;
1373     // The last card num of the sequence of live cards currently being
1374     // constructed.  -1 ==> no sequence.
1375     intptr_t last_card_num = -1;
1376 
1377     while (start < nextTop) {
1378       if (_yield && _cm->do_yield_check()) {
1379         // We yielded.  It might be for a full collection, in which case
1380         // all bets are off; terminate the traversal.
1381         if (_cm->has_aborted()) {
1382           _changed = false;
1383           return true;
1384         } else {
1385           // Otherwise, it might be a collection pause, and the region
1386           // we're looking at might be in the collection set.  We'll
1387           // abandon this region.
1388           return false;
1389         }
1390       }
1391       oop obj = oop(start);
1392       int obj_sz = obj->size();
1393       // The card num of the start of the current object.
1394       intptr_t obj_card_num =
1395         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1396 
1397       HeapWord* obj_last = start + obj_sz - 1;
1398       intptr_t obj_last_card_num =
1399         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1400 
1401       if (obj_card_num != last_card_num) {
1402         if (start_card_num == -1) {
1403           assert(last_card_num == -1, "Both or neither.");
1404           start_card_num = obj_card_num;
1405         } else {
1406           assert(last_card_num != -1, "Both or neither.");
1407           assert(obj_card_num >= last_card_num, "Inv");
1408           if ((obj_card_num - last_card_num) > 1) {
1409             // Mark the last run, and start a new one.
1410             mark_card_num_range(start_card_num, last_card_num);
1411             start_card_num = obj_card_num;
1412           }
1413         }
1414 #if CARD_BM_TEST_MODE
1415         /*
1416         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1417                                obj_card_num - _bottom_card_num,
1418                                obj_last_card_num - _bottom_card_num);
1419         */
1420         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1421           _card_bm->par_at_put(j - _bottom_card_num, 1);
1422         }
1423 #endif
1424       }
1425       // In any case, we set the last card num.
1426       last_card_num = obj_last_card_num;
1427 
1428       marked_bytes += (size_t)obj_sz * HeapWordSize;
1429       // Find the next marked object after this one.
1430       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1431       _changed = true;
1432     }
1433     // Handle the last range, if any.
1434     if (start_card_num != -1)
1435       mark_card_num_range(start_card_num, last_card_num);
1436     if (_final) {
1437       // Mark the allocated-since-marking portion...
1438       HeapWord* tp = hr->top();
1439       if (nextTop < tp) {
1440         start_card_num =
1441           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1442         last_card_num =
1443           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1444         mark_card_num_range(start_card_num, last_card_num);
1445         // This definitely means the region has live objects.
1446         set_bit_for_region(hr);
1447       }
1448     }
1449 
1450     hr->add_to_marked_bytes(marked_bytes);
1451     // Update the live region bitmap.
1452     if (marked_bytes > 0) {
1453       set_bit_for_region(hr);
1454     }
1455     hr->set_top_at_conc_mark_count(nextTop);
1456     _tot_live += hr->next_live_bytes();
1457     _tot_used += hr->used();
1458     _words_done = words_done;
1459 
1460     if (!_final) {
1461       ++_regions_done;
1462       if (_regions_done % 10 == 0) {
1463         double end_vtime_sec = os::elapsedVTime();
1464         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1465         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1466           jlong sleep_time_ms =
1467             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1468           os::sleep(Thread::current(), sleep_time_ms, false);
1469           _start_vtime_sec = end_vtime_sec;
1470         }
1471       }
1472     }
1473 
1474     return false;
1475   }
1476 
1477   bool changed() { return _changed;  }
1478   void reset()   { _changed = false; _words_done = 0; }
1479   void no_yield() { _yield = false; }
1480   size_t words_done() { return _words_done; }
1481   size_t tot_live() { return _tot_live; }
1482   size_t tot_used() { return _tot_used; }
1483 };
1484 
1485 
1486 void ConcurrentMark::calcDesiredRegions() {
1487   _region_bm.clear();
1488   _card_bm.clear();
1489   CalcLiveObjectsClosure calccl(false /*final*/,
1490                                 nextMarkBitMap(), this,
1491                                 &_region_bm, &_card_bm);
1492   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1493   g1h->heap_region_iterate(&calccl);
1494 
1495   do {
1496     calccl.reset();
1497     g1h->heap_region_iterate(&calccl);
1498   } while (calccl.changed());
1499 }
1500 
1501 class G1ParFinalCountTask: public AbstractGangTask {
1502 protected:
1503   G1CollectedHeap* _g1h;
1504   CMBitMap* _bm;
1505   size_t _n_workers;
1506   size_t *_live_bytes;
1507   size_t *_used_bytes;
1508   BitMap* _region_bm;
1509   BitMap* _card_bm;
1510 public:
1511   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1512                       BitMap* region_bm, BitMap* card_bm) :
1513     AbstractGangTask("G1 final counting"), _g1h(g1h),
1514     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1515   {
1516     if (ParallelGCThreads > 0)
1517       _n_workers = _g1h->workers()->total_workers();
1518     else
1519       _n_workers = 1;
1520     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1521     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1522   }
1523 
1524   ~G1ParFinalCountTask() {
1525     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1526     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1527   }
1528 
1529   void work(int i) {
1530     CalcLiveObjectsClosure calccl(true /*final*/,
1531                                   _bm, _g1h->concurrent_mark(),
1532                                   _region_bm, _card_bm);
1533     calccl.no_yield();
1534     if (G1CollectedHeap::use_parallel_gc_threads()) {
1535       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1536                                             HeapRegion::FinalCountClaimValue);
1537     } else {
1538       _g1h->heap_region_iterate(&calccl);
1539     }
1540     assert(calccl.complete(), "Shouldn't have yielded!");
1541 
1542     assert((size_t) i < _n_workers, "invariant");
1543     _live_bytes[i] = calccl.tot_live();
1544     _used_bytes[i] = calccl.tot_used();
1545   }
1546   size_t live_bytes()  {
1547     size_t live_bytes = 0;
1548     for (size_t i = 0; i < _n_workers; ++i)
1549       live_bytes += _live_bytes[i];
1550     return live_bytes;
1551   }
1552   size_t used_bytes()  {
1553     size_t used_bytes = 0;
1554     for (size_t i = 0; i < _n_workers; ++i)
1555       used_bytes += _used_bytes[i];
1556     return used_bytes;
1557   }
1558 };
1559 
1560 class G1ParNoteEndTask;
1561 
1562 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1563   G1CollectedHeap* _g1;
1564   int _worker_num;
1565   size_t _max_live_bytes;
1566   size_t _regions_claimed;
1567   size_t _freed_bytes;
1568   FreeRegionList* _local_cleanup_list;
1569   HumongousRegionSet* _humongous_proxy_set;
1570   HRRSCleanupTask* _hrrs_cleanup_task;
1571   double _claimed_region_time;
1572   double _max_region_time;
1573 
1574 public:
1575   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1576                              int worker_num,
1577                              FreeRegionList* local_cleanup_list,
1578                              HumongousRegionSet* humongous_proxy_set,
1579                              HRRSCleanupTask* hrrs_cleanup_task);
1580   size_t freed_bytes() { return _freed_bytes; }
1581 
1582   bool doHeapRegion(HeapRegion *r);
1583 
1584   size_t max_live_bytes() { return _max_live_bytes; }
1585   size_t regions_claimed() { return _regions_claimed; }
1586   double claimed_region_time_sec() { return _claimed_region_time; }
1587   double max_region_time_sec() { return _max_region_time; }
1588 };
1589 
1590 class G1ParNoteEndTask: public AbstractGangTask {
1591   friend class G1NoteEndOfConcMarkClosure;
1592 
1593 protected:
1594   G1CollectedHeap* _g1h;
1595   size_t _max_live_bytes;
1596   size_t _freed_bytes;
1597   FreeRegionList* _cleanup_list;
1598 
1599 public:
1600   G1ParNoteEndTask(G1CollectedHeap* g1h,
1601                    FreeRegionList* cleanup_list) :
1602     AbstractGangTask("G1 note end"), _g1h(g1h),
1603     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1604 
1605   void work(int i) {
1606     double start = os::elapsedTime();
1607     FreeRegionList local_cleanup_list("Local Cleanup List");
1608     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1609     HRRSCleanupTask hrrs_cleanup_task;
1610     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1611                                            &humongous_proxy_set,
1612                                            &hrrs_cleanup_task);
1613     if (G1CollectedHeap::use_parallel_gc_threads()) {
1614       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1615                                             HeapRegion::NoteEndClaimValue);
1616     } else {
1617       _g1h->heap_region_iterate(&g1_note_end);
1618     }
1619     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1620 
1621     // Now update the lists
1622     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1623                                             NULL /* free_list */,
1624                                             &humongous_proxy_set,
1625                                             true /* par */);
1626     {
1627       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1628       _max_live_bytes += g1_note_end.max_live_bytes();
1629       _freed_bytes += g1_note_end.freed_bytes();
1630 
1631       _cleanup_list->add_as_tail(&local_cleanup_list);
1632       assert(local_cleanup_list.is_empty(), "post-condition");
1633 
1634       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1635     }
1636     double end = os::elapsedTime();
1637     if (G1PrintParCleanupStats) {
1638       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1639                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1640                           i, start, end, (end-start)*1000.0,
1641                           g1_note_end.regions_claimed(),
1642                           g1_note_end.claimed_region_time_sec()*1000.0,
1643                           g1_note_end.max_region_time_sec()*1000.0);
1644     }
1645   }
1646   size_t max_live_bytes() { return _max_live_bytes; }
1647   size_t freed_bytes() { return _freed_bytes; }
1648 };
1649 
1650 class G1ParScrubRemSetTask: public AbstractGangTask {
1651 protected:
1652   G1RemSet* _g1rs;
1653   BitMap* _region_bm;
1654   BitMap* _card_bm;
1655 public:
1656   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1657                        BitMap* region_bm, BitMap* card_bm) :
1658     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1659     _region_bm(region_bm), _card_bm(card_bm)
1660   {}
1661 
1662   void work(int i) {
1663     if (G1CollectedHeap::use_parallel_gc_threads()) {
1664       _g1rs->scrub_par(_region_bm, _card_bm, i,
1665                        HeapRegion::ScrubRemSetClaimValue);
1666     } else {
1667       _g1rs->scrub(_region_bm, _card_bm);
1668     }
1669   }
1670 
1671 };
1672 
1673 G1NoteEndOfConcMarkClosure::
1674 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1675                            int worker_num,
1676                            FreeRegionList* local_cleanup_list,
1677                            HumongousRegionSet* humongous_proxy_set,
1678                            HRRSCleanupTask* hrrs_cleanup_task)
1679   : _g1(g1), _worker_num(worker_num),
1680     _max_live_bytes(0), _regions_claimed(0),
1681     _freed_bytes(0),
1682     _claimed_region_time(0.0), _max_region_time(0.0),
1683     _local_cleanup_list(local_cleanup_list),
1684     _humongous_proxy_set(humongous_proxy_set),
1685     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1686 
1687 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1688   // We use a claim value of zero here because all regions
1689   // were claimed with value 1 in the FinalCount task.
1690   hr->reset_gc_time_stamp();
1691   if (!hr->continuesHumongous()) {
1692     double start = os::elapsedTime();
1693     _regions_claimed++;
1694     hr->note_end_of_marking();
1695     _max_live_bytes += hr->max_live_bytes();
1696     _g1->free_region_if_empty(hr,
1697                               &_freed_bytes,
1698                               _local_cleanup_list,
1699                               _humongous_proxy_set,
1700                               _hrrs_cleanup_task,
1701                               true /* par */);
1702     double region_time = (os::elapsedTime() - start);
1703     _claimed_region_time += region_time;
1704     if (region_time > _max_region_time) _max_region_time = region_time;
1705   }
1706   return false;
1707 }
1708 
1709 void ConcurrentMark::cleanup() {
1710   // world is stopped at this checkpoint
1711   assert(SafepointSynchronize::is_at_safepoint(),
1712          "world should be stopped");
1713   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1714 
1715   // If a full collection has happened, we shouldn't do this.
1716   if (has_aborted()) {
1717     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1718     return;
1719   }
1720 
1721   g1h->verify_region_sets_optional();
1722 
1723   if (VerifyDuringGC) {
1724     HandleMark hm;  // handle scope
1725     gclog_or_tty->print(" VerifyDuringGC:(before)");
1726     Universe::heap()->prepare_for_verify();
1727     Universe::verify(/* allow dirty  */ true,
1728                      /* silent       */ false,
1729                      /* prev marking */ true);
1730   }
1731 
1732   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1733   g1p->record_concurrent_mark_cleanup_start();
1734 
1735   double start = os::elapsedTime();
1736 
1737   HeapRegionRemSet::reset_for_cleanup_tasks();
1738 
1739   // Do counting once more with the world stopped for good measure.
1740   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1741                                         &_region_bm, &_card_bm);
1742   if (G1CollectedHeap::use_parallel_gc_threads()) {
1743     assert(g1h->check_heap_region_claim_values(
1744                                                HeapRegion::InitialClaimValue),
1745            "sanity check");
1746 
1747     int n_workers = g1h->workers()->total_workers();
1748     g1h->set_par_threads(n_workers);
1749     g1h->workers()->run_task(&g1_par_count_task);
1750     g1h->set_par_threads(0);
1751 
1752     assert(g1h->check_heap_region_claim_values(
1753                                              HeapRegion::FinalCountClaimValue),
1754            "sanity check");
1755   } else {
1756     g1_par_count_task.work(0);
1757   }
1758 
1759   size_t known_garbage_bytes =
1760     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1761 #if 0
1762   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1763                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1764                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1765                          (double) known_garbage_bytes / (double) (1024 * 1024));
1766 #endif // 0
1767   g1p->set_known_garbage_bytes(known_garbage_bytes);
1768 
1769   size_t start_used_bytes = g1h->used();
1770   _at_least_one_mark_complete = true;
1771   g1h->set_marking_complete();
1772 
1773   double count_end = os::elapsedTime();
1774   double this_final_counting_time = (count_end - start);
1775   if (G1PrintParCleanupStats) {
1776     gclog_or_tty->print_cr("Cleanup:");
1777     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1778                            this_final_counting_time*1000.0);
1779   }
1780   _total_counting_time += this_final_counting_time;
1781 
1782   if (G1PrintRegionLivenessInfo) {
1783     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1784     _g1h->heap_region_iterate(&cl);
1785   }
1786 
1787   // Install newly created mark bitMap as "prev".
1788   swapMarkBitMaps();
1789 
1790   g1h->reset_gc_time_stamp();
1791 
1792   // Note end of marking in all heap regions.
1793   double note_end_start = os::elapsedTime();
1794   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1795   if (G1CollectedHeap::use_parallel_gc_threads()) {
1796     int n_workers = g1h->workers()->total_workers();
1797     g1h->set_par_threads(n_workers);
1798     g1h->workers()->run_task(&g1_par_note_end_task);
1799     g1h->set_par_threads(0);
1800 
1801     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1802            "sanity check");
1803   } else {
1804     g1_par_note_end_task.work(0);
1805   }
1806 
1807   if (!cleanup_list_is_empty()) {
1808     // The cleanup list is not empty, so we'll have to process it
1809     // concurrently. Notify anyone else that might be wanting free
1810     // regions that there will be more free regions coming soon.
1811     g1h->set_free_regions_coming();
1812   }
1813   double note_end_end = os::elapsedTime();
1814   if (G1PrintParCleanupStats) {
1815     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1816                            (note_end_end - note_end_start)*1000.0);
1817   }
1818 
1819 
1820   // call below, since it affects the metric by which we sort the heap
1821   // regions.
1822   if (G1ScrubRemSets) {
1823     double rs_scrub_start = os::elapsedTime();
1824     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1825     if (G1CollectedHeap::use_parallel_gc_threads()) {
1826       int n_workers = g1h->workers()->total_workers();
1827       g1h->set_par_threads(n_workers);
1828       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1829       g1h->set_par_threads(0);
1830 
1831       assert(g1h->check_heap_region_claim_values(
1832                                             HeapRegion::ScrubRemSetClaimValue),
1833              "sanity check");
1834     } else {
1835       g1_par_scrub_rs_task.work(0);
1836     }
1837 
1838     double rs_scrub_end = os::elapsedTime();
1839     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1840     _total_rs_scrub_time += this_rs_scrub_time;
1841   }
1842 
1843   // this will also free any regions totally full of garbage objects,
1844   // and sort the regions.
1845   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1846                         g1_par_note_end_task.freed_bytes(),
1847                         g1_par_note_end_task.max_live_bytes());
1848 
1849   // Statistics.
1850   double end = os::elapsedTime();
1851   _cleanup_times.add((end - start) * 1000.0);
1852 
1853   // G1CollectedHeap::heap()->print();
1854   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1855   // G1CollectedHeap::heap()->get_gc_time_stamp());
1856 
1857   if (PrintGC || PrintGCDetails) {
1858     g1h->print_size_transition(gclog_or_tty,
1859                                start_used_bytes,
1860                                g1h->used(),
1861                                g1h->capacity());
1862   }
1863 
1864   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1865   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1866 
1867   // We need to make this be a "collection" so any collection pause that
1868   // races with it goes around and waits for completeCleanup to finish.
1869   g1h->increment_total_collections();
1870 
1871   if (VerifyDuringGC) {
1872     HandleMark hm;  // handle scope
1873     gclog_or_tty->print(" VerifyDuringGC:(after)");
1874     Universe::heap()->prepare_for_verify();
1875     Universe::verify(/* allow dirty  */ true,
1876                      /* silent       */ false,
1877                      /* prev marking */ true);
1878   }
1879 
1880   g1h->verify_region_sets_optional();
1881 }
1882 
1883 void ConcurrentMark::completeCleanup() {
1884   if (has_aborted()) return;
1885 
1886   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1887 
1888   _cleanup_list.verify_optional();
1889   FreeRegionList tmp_free_list("Tmp Free List");
1890 
1891   if (G1ConcRegionFreeingVerbose) {
1892     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1893                            "cleanup list has "SIZE_FORMAT" entries",
1894                            _cleanup_list.length());
1895   }
1896 
1897   // Noone else should be accessing the _cleanup_list at this point,
1898   // so it's not necessary to take any locks
1899   while (!_cleanup_list.is_empty()) {
1900     HeapRegion* hr = _cleanup_list.remove_head();
1901     assert(hr != NULL, "the list was not empty");
1902     hr->par_clear();
1903     tmp_free_list.add_as_tail(hr);
1904 
1905     // Instead of adding one region at a time to the secondary_free_list,
1906     // we accumulate them in the local list and move them a few at a
1907     // time. This also cuts down on the number of notify_all() calls
1908     // we do during this process. We'll also append the local list when
1909     // _cleanup_list is empty (which means we just removed the last
1910     // region from the _cleanup_list).
1911     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1912         _cleanup_list.is_empty()) {
1913       if (G1ConcRegionFreeingVerbose) {
1914         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1915                                "appending "SIZE_FORMAT" entries to the "
1916                                "secondary_free_list, clean list still has "
1917                                SIZE_FORMAT" entries",
1918                                tmp_free_list.length(),
1919                                _cleanup_list.length());
1920       }
1921 
1922       {
1923         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1924         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1925         SecondaryFreeList_lock->notify_all();
1926       }
1927 
1928       if (G1StressConcRegionFreeing) {
1929         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1930           os::sleep(Thread::current(), (jlong) 1, false);
1931         }
1932       }
1933     }
1934   }
1935   assert(tmp_free_list.is_empty(), "post-condition");
1936 }
1937 
1938 // Support closures for reference procssing in G1
1939 
1940 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1941   HeapWord* addr = (HeapWord*)obj;
1942   return addr != NULL &&
1943          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1944 }
1945 
1946 class G1CMKeepAliveClosure: public OopClosure {
1947   G1CollectedHeap* _g1;
1948   ConcurrentMark*  _cm;
1949   CMBitMap*        _bitMap;
1950  public:
1951   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1952                        CMBitMap* bitMap) :
1953     _g1(g1), _cm(cm),
1954     _bitMap(bitMap) {}
1955 
1956   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1957   virtual void do_oop(      oop* p) { do_oop_work(p); }
1958 
1959   template <class T> void do_oop_work(T* p) {
1960     oop obj = oopDesc::load_decode_heap_oop(p);
1961     HeapWord* addr = (HeapWord*)obj;
1962 
1963     if (_cm->verbose_high())
1964       gclog_or_tty->print_cr("\t[0] we're looking at location "
1965                                "*"PTR_FORMAT" = "PTR_FORMAT,
1966                                p, (void*) obj);
1967 
1968     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1969       _bitMap->mark(addr);
1970       _cm->mark_stack_push(obj);
1971     }
1972   }
1973 };
1974 
1975 class G1CMDrainMarkingStackClosure: public VoidClosure {
1976   CMMarkStack*                  _markStack;
1977   CMBitMap*                     _bitMap;
1978   G1CMKeepAliveClosure*         _oopClosure;
1979  public:
1980   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1981                                G1CMKeepAliveClosure* oopClosure) :
1982     _bitMap(bitMap),
1983     _markStack(markStack),
1984     _oopClosure(oopClosure)
1985   {}
1986 
1987   void do_void() {
1988     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1989   }
1990 };
1991 
1992 // 'Keep Alive' closure used by parallel reference processing.
1993 // An instance of this closure is used in the parallel reference processing
1994 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1995 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1996 // placed on to discovered ref lists once so we can mark and push with no
1997 // need to check whether the object has already been marked. Using the
1998 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1999 // operating on the global mark stack. This means that an individual
2000 // worker would be doing lock-free pushes while it processes its own
2001 // discovered ref list followed by drain call. If the discovered ref lists
2002 // are unbalanced then this could cause interference with the other
2003 // workers. Using a CMTask (and its embedded local data structures)
2004 // avoids that potential interference.
2005 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
2006   ConcurrentMark*  _cm;
2007   CMTask*          _task;
2008   CMBitMap*        _bitMap;
2009   int              _ref_counter_limit;
2010   int              _ref_counter;
2011  public:
2012   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
2013                                   CMTask* task,
2014                                   CMBitMap* bitMap) :
2015     _cm(cm), _task(task), _bitMap(bitMap),
2016     _ref_counter_limit(G1RefProcDrainInterval)
2017   {
2018     assert(_ref_counter_limit > 0, "sanity");
2019     _ref_counter = _ref_counter_limit;
2020   }
2021 
2022   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2023   virtual void do_oop(      oop* p) { do_oop_work(p); }
2024 
2025   template <class T> void do_oop_work(T* p) {
2026     if (!_cm->has_overflown()) {
2027       oop obj = oopDesc::load_decode_heap_oop(p);
2028       if (_cm->verbose_high())
2029         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2030                                "*"PTR_FORMAT" = "PTR_FORMAT,
2031                                _task->task_id(), p, (void*) obj);
2032 
2033       _task->deal_with_reference(obj);
2034       _ref_counter--;
2035 
2036       if (_ref_counter == 0) {
2037         // We have dealt with _ref_counter_limit references, pushing them and objects
2038         // reachable from them on to the local stack (and possibly the global stack).
2039         // Call do_marking_step() to process these entries. We call the routine in a
2040         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2041         // with the entries that we've pushed as a result of the deal_with_reference
2042         // calls above) or we overflow.
2043         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2044         // while there may still be some work to do. (See the comment at the
2045         // beginning of CMTask::do_marking_step() for those conditions - one of which
2046         // is reaching the specified time target.) It is only when
2047         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2048         // that the marking has completed.
2049         do {
2050           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2051           _task->do_marking_step(mark_step_duration_ms,
2052                                  false /* do_stealing    */,
2053                                  false /* do_termination */);
2054         } while (_task->has_aborted() && !_cm->has_overflown());
2055         _ref_counter = _ref_counter_limit;
2056       }
2057     } else {
2058        if (_cm->verbose_high())
2059          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2060     }
2061   }
2062 };
2063 
2064 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2065   ConcurrentMark* _cm;
2066   CMTask* _task;
2067  public:
2068   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2069     _cm(cm), _task(task)
2070   {}
2071 
2072   void do_void() {
2073     do {
2074       if (_cm->verbose_high())
2075         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
2076 
2077       // We call CMTask::do_marking_step() to completely drain the local and
2078       // global marking stacks. The routine is called in a loop, which we'll
2079       // exit if there's nothing more to do (i.e. we'completely drained the
2080       // entries that were pushed as a result of applying the
2081       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2082       // lists above) or we overflow the global marking stack.
2083       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2084       // while there may still be some work to do. (See the comment at the
2085       // beginning of CMTask::do_marking_step() for those conditions - one of which
2086       // is reaching the specified time target.) It is only when
2087       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2088       // that the marking has completed.
2089 
2090       _task->do_marking_step(1000000000.0 /* something very large */,
2091                              true /* do_stealing    */,
2092                              true /* do_termination */);
2093     } while (_task->has_aborted() && !_cm->has_overflown());
2094   }
2095 };
2096 
2097 // Implementation of AbstractRefProcTaskExecutor for G1
2098 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2099 private:
2100   G1CollectedHeap* _g1h;
2101   ConcurrentMark*  _cm;
2102   CMBitMap*        _bitmap;
2103   WorkGang*        _workers;
2104   int              _active_workers;
2105 
2106 public:
2107   G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2108                         ConcurrentMark* cm,
2109                         CMBitMap* bitmap,
2110                         WorkGang* workers,
2111                         int n_workers) :
2112     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2113     _workers(workers), _active_workers(n_workers)
2114   { }
2115 
2116   // Executes the given task using concurrent marking worker threads.
2117   virtual void execute(ProcessTask& task);
2118   virtual void execute(EnqueueTask& task);
2119 };
2120 
2121 class G1RefProcTaskProxy: public AbstractGangTask {
2122   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2123   ProcessTask&     _proc_task;
2124   G1CollectedHeap* _g1h;
2125   ConcurrentMark*  _cm;
2126   CMBitMap*        _bitmap;
2127 
2128 public:
2129   G1RefProcTaskProxy(ProcessTask& proc_task,
2130                      G1CollectedHeap* g1h,
2131                      ConcurrentMark* cm,
2132                      CMBitMap* bitmap) :
2133     AbstractGangTask("Process reference objects in parallel"),
2134     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2135   {}
2136 
2137   virtual void work(int i) {
2138     CMTask* marking_task = _cm->task(i);
2139     G1CMIsAliveClosure g1_is_alive(_g1h);
2140     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2141     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2142 
2143     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2144   }
2145 };
2146 
2147 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2148   assert(_workers != NULL, "Need parallel worker threads.");
2149 
2150   G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2151 
2152   // We need to reset the phase for each task execution so that
2153   // the termination protocol of CMTask::do_marking_step works.
2154   _cm->set_phase(_active_workers, false /* concurrent */);
2155   _g1h->set_par_threads(_active_workers);
2156   _workers->run_task(&proc_task_proxy);
2157   _g1h->set_par_threads(0);
2158 }
2159 
2160 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2161   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2162   EnqueueTask& _enq_task;
2163 
2164 public:
2165   G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2166     AbstractGangTask("Enqueue reference objects in parallel"),
2167     _enq_task(enq_task)
2168   { }
2169 
2170   virtual void work(int i) {
2171     _enq_task.work(i);
2172   }
2173 };
2174 
2175 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2176   assert(_workers != NULL, "Need parallel worker threads.");
2177 
2178   G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2179 
2180   _g1h->set_par_threads(_active_workers);
2181   _workers->run_task(&enq_task_proxy);
2182   _g1h->set_par_threads(0);
2183 }
2184 
2185 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2186   ResourceMark rm;
2187   HandleMark   hm;
2188   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2189   ReferenceProcessor* rp = g1h->ref_processor();
2190 
2191   // See the comment in G1CollectedHeap::ref_processing_init()
2192   // about how reference processing currently works in G1.
2193 
2194   // Process weak references.
2195   rp->setup_policy(clear_all_soft_refs);
2196   assert(_markStack.isEmpty(), "mark stack should be empty");
2197 
2198   G1CMIsAliveClosure   g1_is_alive(g1h);
2199   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2200   G1CMDrainMarkingStackClosure
2201     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2202   // We use the work gang from the G1CollectedHeap and we utilize all
2203   // the worker threads.
2204   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2205   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2206 
2207   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2208                                           g1h->workers(), active_workers);
2209 
2210 
2211   if (rp->processing_is_mt()) {
2212     // Set the degree of MT here.  If the discovery is done MT, there
2213     // may have been a different number of threads doing the discovery
2214     // and a different number of discovered lists may have Ref objects.
2215     // That is OK as long as the Reference lists are balanced (see
2216     // balance_all_queues() and balance_queues()).
2217     rp->set_active_mt_degree(active_workers);
2218 
2219     rp->process_discovered_references(&g1_is_alive,
2220                                       &g1_keep_alive,
2221                                       &g1_drain_mark_stack,
2222                                       &par_task_executor);
2223 
2224     // The work routines of the parallel keep_alive and drain_marking_stack
2225     // will set the has_overflown flag if we overflow the global marking
2226     // stack.
2227   } else {
2228     rp->process_discovered_references(&g1_is_alive,
2229                                       &g1_keep_alive,
2230                                       &g1_drain_mark_stack,
2231                                       NULL);
2232 
2233   }
2234 
2235   assert(_markStack.overflow() || _markStack.isEmpty(),
2236       "mark stack should be empty (unless it overflowed)");
2237   if (_markStack.overflow()) {
2238     // Should have been done already when we tried to push an
2239     // entry on to the global mark stack. But let's do it again.
2240     set_has_overflown();
2241   }
2242 
2243   if (rp->processing_is_mt()) {
2244     assert(rp->num_q() == active_workers, "why not");
2245     rp->enqueue_discovered_references(&par_task_executor);
2246   } else {
2247     rp->enqueue_discovered_references();
2248   }
2249 
2250   rp->verify_no_references_recorded();
2251   assert(!rp->discovery_enabled(), "should have been disabled");
2252 
2253   // Now clean up stale oops in StringTable
2254   StringTable::unlink(&g1_is_alive);
2255   // Clean up unreferenced symbols in symbol table.
2256   SymbolTable::unlink();
2257 }
2258 
2259 void ConcurrentMark::swapMarkBitMaps() {
2260   CMBitMapRO* temp = _prevMarkBitMap;
2261   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2262   _nextMarkBitMap  = (CMBitMap*)  temp;
2263 }
2264 
2265 class CMRemarkTask: public AbstractGangTask {
2266 private:
2267   ConcurrentMark *_cm;
2268 
2269 public:
2270   void work(int worker_i) {
2271     // Since all available tasks are actually started, we should
2272     // only proceed if we're supposed to be actived.
2273     if ((size_t)worker_i < _cm->active_tasks()) {
2274       CMTask* task = _cm->task(worker_i);
2275       task->record_start_time();
2276       do {
2277         task->do_marking_step(1000000000.0 /* something very large */,
2278                               true /* do_stealing    */,
2279                               true /* do_termination */);
2280       } while (task->has_aborted() && !_cm->has_overflown());
2281       // If we overflow, then we do not want to restart. We instead
2282       // want to abort remark and do concurrent marking again.
2283       task->record_end_time();
2284     }
2285   }
2286 
2287   CMRemarkTask(ConcurrentMark* cm) :
2288     AbstractGangTask("Par Remark"), _cm(cm) { }
2289 };
2290 
2291 void ConcurrentMark::checkpointRootsFinalWork() {
2292   ResourceMark rm;
2293   HandleMark   hm;
2294   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2295 
2296   g1h->ensure_parsability(false);
2297 
2298   if (G1CollectedHeap::use_parallel_gc_threads()) {
2299     G1CollectedHeap::StrongRootsScope srs(g1h);
2300     // this is remark, so we'll use up all available threads
2301     int active_workers = ParallelGCThreads;
2302     set_phase(active_workers, false /* concurrent */);
2303 
2304     CMRemarkTask remarkTask(this);
2305     // We will start all available threads, even if we decide that the
2306     // active_workers will be fewer. The extra ones will just bail out
2307     // immediately.
2308     int n_workers = g1h->workers()->total_workers();
2309     g1h->set_par_threads(n_workers);
2310     g1h->workers()->run_task(&remarkTask);
2311     g1h->set_par_threads(0);
2312   } else {
2313     G1CollectedHeap::StrongRootsScope srs(g1h);
2314     // this is remark, so we'll use up all available threads
2315     int active_workers = 1;
2316     set_phase(active_workers, false /* concurrent */);
2317 
2318     CMRemarkTask remarkTask(this);
2319     // We will start all available threads, even if we decide that the
2320     // active_workers will be fewer. The extra ones will just bail out
2321     // immediately.
2322     remarkTask.work(0);
2323   }
2324   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2325   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2326 
2327   print_stats();
2328 
2329 #if VERIFY_OBJS_PROCESSED
2330   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2331     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2332                            _scan_obj_cl.objs_processed,
2333                            ThreadLocalObjQueue::objs_enqueued);
2334     guarantee(_scan_obj_cl.objs_processed ==
2335               ThreadLocalObjQueue::objs_enqueued,
2336               "Different number of objs processed and enqueued.");
2337   }
2338 #endif
2339 }
2340 
2341 #ifndef PRODUCT
2342 
2343 class PrintReachableOopClosure: public OopClosure {
2344 private:
2345   G1CollectedHeap* _g1h;
2346   CMBitMapRO*      _bitmap;
2347   outputStream*    _out;
2348   bool             _use_prev_marking;
2349   bool             _all;
2350 
2351 public:
2352   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2353                            outputStream* out,
2354                            bool          use_prev_marking,
2355                            bool          all) :
2356     _g1h(G1CollectedHeap::heap()),
2357     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2358 
2359   void do_oop(narrowOop* p) { do_oop_work(p); }
2360   void do_oop(      oop* p) { do_oop_work(p); }
2361 
2362   template <class T> void do_oop_work(T* p) {
2363     oop         obj = oopDesc::load_decode_heap_oop(p);
2364     const char* str = NULL;
2365     const char* str2 = "";
2366 
2367     if (obj == NULL) {
2368       str = "";
2369     } else if (!_g1h->is_in_g1_reserved(obj)) {
2370       str = " O";
2371     } else {
2372       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2373       guarantee(hr != NULL, "invariant");
2374       bool over_tams = false;
2375       if (_use_prev_marking) {
2376         over_tams = hr->obj_allocated_since_prev_marking(obj);
2377       } else {
2378         over_tams = hr->obj_allocated_since_next_marking(obj);
2379       }
2380       bool marked = _bitmap->isMarked((HeapWord*) obj);
2381 
2382       if (over_tams) {
2383         str = " >";
2384         if (marked) {
2385           str2 = " AND MARKED";
2386         }
2387       } else if (marked) {
2388         str = " M";
2389       } else {
2390         str = " NOT";
2391       }
2392     }
2393 
2394     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2395                    p, (void*) obj, str, str2);
2396   }
2397 };
2398 
2399 class PrintReachableObjectClosure : public ObjectClosure {
2400 private:
2401   CMBitMapRO*   _bitmap;
2402   outputStream* _out;
2403   bool          _use_prev_marking;
2404   bool          _all;
2405   HeapRegion*   _hr;
2406 
2407 public:
2408   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2409                               outputStream* out,
2410                               bool          use_prev_marking,
2411                               bool          all,
2412                               HeapRegion*   hr) :
2413     _bitmap(bitmap), _out(out),
2414     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2415 
2416   void do_object(oop o) {
2417     bool over_tams;
2418     if (_use_prev_marking) {
2419       over_tams = _hr->obj_allocated_since_prev_marking(o);
2420     } else {
2421       over_tams = _hr->obj_allocated_since_next_marking(o);
2422     }
2423     bool marked = _bitmap->isMarked((HeapWord*) o);
2424     bool print_it = _all || over_tams || marked;
2425 
2426     if (print_it) {
2427       _out->print_cr(" "PTR_FORMAT"%s",
2428                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2429       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2430       o->oop_iterate(&oopCl);
2431     }
2432   }
2433 };
2434 
2435 class PrintReachableRegionClosure : public HeapRegionClosure {
2436 private:
2437   CMBitMapRO*   _bitmap;
2438   outputStream* _out;
2439   bool          _use_prev_marking;
2440   bool          _all;
2441 
2442 public:
2443   bool doHeapRegion(HeapRegion* hr) {
2444     HeapWord* b = hr->bottom();
2445     HeapWord* e = hr->end();
2446     HeapWord* t = hr->top();
2447     HeapWord* p = NULL;
2448     if (_use_prev_marking) {
2449       p = hr->prev_top_at_mark_start();
2450     } else {
2451       p = hr->next_top_at_mark_start();
2452     }
2453     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2454                    "TAMS: "PTR_FORMAT, b, e, t, p);
2455     _out->cr();
2456 
2457     HeapWord* from = b;
2458     HeapWord* to   = t;
2459 
2460     if (to > from) {
2461       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2462       _out->cr();
2463       PrintReachableObjectClosure ocl(_bitmap, _out,
2464                                       _use_prev_marking, _all, hr);
2465       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2466       _out->cr();
2467     }
2468 
2469     return false;
2470   }
2471 
2472   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2473                               outputStream* out,
2474                               bool          use_prev_marking,
2475                               bool          all) :
2476     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2477 };
2478 
2479 void ConcurrentMark::print_reachable(const char* str,
2480                                      bool use_prev_marking,
2481                                      bool all) {
2482   gclog_or_tty->cr();
2483   gclog_or_tty->print_cr("== Doing heap dump... ");
2484 
2485   if (G1PrintReachableBaseFile == NULL) {
2486     gclog_or_tty->print_cr("  #### error: no base file defined");
2487     return;
2488   }
2489 
2490   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2491       (JVM_MAXPATHLEN - 1)) {
2492     gclog_or_tty->print_cr("  #### error: file name too long");
2493     return;
2494   }
2495 
2496   char file_name[JVM_MAXPATHLEN];
2497   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2498   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2499 
2500   fileStream fout(file_name);
2501   if (!fout.is_open()) {
2502     gclog_or_tty->print_cr("  #### error: could not open file");
2503     return;
2504   }
2505 
2506   outputStream* out = &fout;
2507 
2508   CMBitMapRO* bitmap = NULL;
2509   if (use_prev_marking) {
2510     bitmap = _prevMarkBitMap;
2511   } else {
2512     bitmap = _nextMarkBitMap;
2513   }
2514 
2515   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2516   out->cr();
2517 
2518   out->print_cr("--- ITERATING OVER REGIONS");
2519   out->cr();
2520   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2521   _g1h->heap_region_iterate(&rcl);
2522   out->cr();
2523 
2524   gclog_or_tty->print_cr("  done");
2525   gclog_or_tty->flush();
2526 }
2527 
2528 #endif // PRODUCT
2529 
2530 // This note is for drainAllSATBBuffers and the code in between.
2531 // In the future we could reuse a task to do this work during an
2532 // evacuation pause (since now tasks are not active and can be claimed
2533 // during an evacuation pause). This was a late change to the code and
2534 // is currently not being taken advantage of.
2535 
2536 class CMGlobalObjectClosure : public ObjectClosure {
2537 private:
2538   ConcurrentMark* _cm;
2539 
2540 public:
2541   void do_object(oop obj) {
2542     _cm->deal_with_reference(obj);
2543   }
2544 
2545   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2546 };
2547 
2548 void ConcurrentMark::deal_with_reference(oop obj) {
2549   if (verbose_high())
2550     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2551                            (void*) obj);
2552 
2553 
2554   HeapWord* objAddr = (HeapWord*) obj;
2555   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2556   if (_g1h->is_in_g1_reserved(objAddr)) {
2557     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2558     HeapRegion* hr = _g1h->heap_region_containing(obj);
2559     if (_g1h->is_obj_ill(obj, hr)) {
2560       if (verbose_high())
2561         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2562                                "marked", (void*) obj);
2563 
2564       // we need to mark it first
2565       if (_nextMarkBitMap->parMark(objAddr)) {
2566         // No OrderAccess:store_load() is needed. It is implicit in the
2567         // CAS done in parMark(objAddr) above
2568         HeapWord* finger = _finger;
2569         if (objAddr < finger) {
2570           if (verbose_high())
2571             gclog_or_tty->print_cr("[global] below the global finger "
2572                                    "("PTR_FORMAT"), pushing it", finger);
2573           if (!mark_stack_push(obj)) {
2574             if (verbose_low())
2575               gclog_or_tty->print_cr("[global] global stack overflow during "
2576                                      "deal_with_reference");
2577           }
2578         }
2579       }
2580     }
2581   }
2582 }
2583 
2584 void ConcurrentMark::drainAllSATBBuffers() {
2585   CMGlobalObjectClosure oc(this);
2586   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2587   satb_mq_set.set_closure(&oc);
2588 
2589   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2590     if (verbose_medium())
2591       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2592   }
2593 
2594   // no need to check whether we should do this, as this is only
2595   // called during an evacuation pause
2596   satb_mq_set.iterate_closure_all_threads();
2597 
2598   satb_mq_set.set_closure(NULL);
2599   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2600 }
2601 
2602 void ConcurrentMark::markPrev(oop p) {
2603   // Note we are overriding the read-only view of the prev map here, via
2604   // the cast.
2605   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2606 }
2607 
2608 void ConcurrentMark::clear(oop p) {
2609   assert(p != NULL && p->is_oop(), "expected an oop");
2610   HeapWord* addr = (HeapWord*)p;
2611   assert(addr >= _nextMarkBitMap->startWord() ||
2612          addr < _nextMarkBitMap->endWord(), "in a region");
2613 
2614   _nextMarkBitMap->clear(addr);
2615 }
2616 
2617 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2618   // Note we are overriding the read-only view of the prev map here, via
2619   // the cast.
2620   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2621   _nextMarkBitMap->clearRange(mr);
2622 }
2623 
2624 HeapRegion*
2625 ConcurrentMark::claim_region(int task_num) {
2626   // "checkpoint" the finger
2627   HeapWord* finger = _finger;
2628 
2629   // _heap_end will not change underneath our feet; it only changes at
2630   // yield points.
2631   while (finger < _heap_end) {
2632     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2633 
2634     // is the gap between reading the finger and doing the CAS too long?
2635 
2636     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2637     HeapWord*   bottom        = curr_region->bottom();
2638     HeapWord*   end           = curr_region->end();
2639     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2640 
2641     if (verbose_low())
2642       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2643                              "["PTR_FORMAT", "PTR_FORMAT"), "
2644                              "limit = "PTR_FORMAT,
2645                              task_num, curr_region, bottom, end, limit);
2646 
2647     HeapWord* res =
2648       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2649     if (res == finger) {
2650       // we succeeded
2651 
2652       // notice that _finger == end cannot be guaranteed here since,
2653       // someone else might have moved the finger even further
2654       assert(_finger >= end, "the finger should have moved forward");
2655 
2656       if (verbose_low())
2657         gclog_or_tty->print_cr("[%d] we were successful with region = "
2658                                PTR_FORMAT, task_num, curr_region);
2659 
2660       if (limit > bottom) {
2661         if (verbose_low())
2662           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2663                                  "returning it ", task_num, curr_region);
2664         return curr_region;
2665       } else {
2666         assert(limit == bottom,
2667                "the region limit should be at bottom");
2668         if (verbose_low())
2669           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2670                                  "returning NULL", task_num, curr_region);
2671         // we return NULL and the caller should try calling
2672         // claim_region() again.
2673         return NULL;
2674       }
2675     } else {
2676       assert(_finger > finger, "the finger should have moved forward");
2677       if (verbose_low())
2678         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2679                                "global finger = "PTR_FORMAT", "
2680                                "our finger = "PTR_FORMAT,
2681                                task_num, _finger, finger);
2682 
2683       // read it again
2684       finger = _finger;
2685     }
2686   }
2687 
2688   return NULL;
2689 }
2690 
2691 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2692   bool result = false;
2693   for (int i = 0; i < (int)_max_task_num; ++i) {
2694     CMTask* the_task = _tasks[i];
2695     MemRegion mr = the_task->aborted_region();
2696     if (mr.start() != NULL) {
2697       assert(mr.end() != NULL, "invariant");
2698       assert(mr.word_size() > 0, "invariant");
2699       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2700       assert(hr != NULL, "invariant");
2701       if (hr->in_collection_set()) {
2702         // The region points into the collection set
2703         the_task->set_aborted_region(MemRegion());
2704         result = true;
2705       }
2706     }
2707   }
2708   return result;
2709 }
2710 
2711 bool ConcurrentMark::has_aborted_regions() {
2712   for (int i = 0; i < (int)_max_task_num; ++i) {
2713     CMTask* the_task = _tasks[i];
2714     MemRegion mr = the_task->aborted_region();
2715     if (mr.start() != NULL) {
2716       assert(mr.end() != NULL, "invariant");
2717       assert(mr.word_size() > 0, "invariant");
2718       return true;
2719     }
2720   }
2721   return false;
2722 }
2723 
2724 void ConcurrentMark::oops_do(OopClosure* cl) {
2725   if (_markStack.size() > 0 && verbose_low())
2726     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2727                            "size = %d", _markStack.size());
2728   // we first iterate over the contents of the mark stack...
2729   _markStack.oops_do(cl);
2730 
2731   for (int i = 0; i < (int)_max_task_num; ++i) {
2732     OopTaskQueue* queue = _task_queues->queue((int)i);
2733 
2734     if (queue->size() > 0 && verbose_low())
2735       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2736                              "size = %d", i, queue->size());
2737 
2738     // ...then over the contents of the all the task queues.
2739     queue->oops_do(cl);
2740   }
2741 
2742   // Invalidate any entries, that are in the region stack, that
2743   // point into the collection set
2744   if (_regionStack.invalidate_entries_into_cset()) {
2745     // otherwise, any gray objects copied during the evacuation pause
2746     // might not be visited.
2747     assert(_should_gray_objects, "invariant");
2748   }
2749 
2750   // Invalidate any aborted regions, recorded in the individual CM
2751   // tasks, that point into the collection set.
2752   if (invalidate_aborted_regions_in_cset()) {
2753     // otherwise, any gray objects copied during the evacuation pause
2754     // might not be visited.
2755     assert(_should_gray_objects, "invariant");
2756   }
2757 
2758 }
2759 
2760 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2761   _markStack.setEmpty();
2762   _markStack.clear_overflow();
2763   _regionStack.setEmpty();
2764   _regionStack.clear_overflow();
2765   if (clear_overflow) {
2766     clear_has_overflown();
2767   } else {
2768     assert(has_overflown(), "pre-condition");
2769   }
2770   _finger = _heap_start;
2771 
2772   for (int i = 0; i < (int)_max_task_num; ++i) {
2773     OopTaskQueue* queue = _task_queues->queue(i);
2774     queue->set_empty();
2775     // Clear any partial regions from the CMTasks
2776     _tasks[i]->clear_aborted_region();
2777   }
2778 }
2779 
2780 void ConcurrentMark::print_stats() {
2781   if (verbose_stats()) {
2782     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2783     for (size_t i = 0; i < _active_tasks; ++i) {
2784       _tasks[i]->print_stats();
2785       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2786     }
2787   }
2788 }
2789 
2790 class CSMarkOopClosure: public OopClosure {
2791   friend class CSMarkBitMapClosure;
2792 
2793   G1CollectedHeap* _g1h;
2794   CMBitMap*        _bm;
2795   ConcurrentMark*  _cm;
2796   oop*             _ms;
2797   jint*            _array_ind_stack;
2798   int              _ms_size;
2799   int              _ms_ind;
2800   int              _array_increment;
2801 
2802   bool push(oop obj, int arr_ind = 0) {
2803     if (_ms_ind == _ms_size) {
2804       gclog_or_tty->print_cr("Mark stack is full.");
2805       return false;
2806     }
2807     _ms[_ms_ind] = obj;
2808     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2809     _ms_ind++;
2810     return true;
2811   }
2812 
2813   oop pop() {
2814     if (_ms_ind == 0) return NULL;
2815     else {
2816       _ms_ind--;
2817       return _ms[_ms_ind];
2818     }
2819   }
2820 
2821   template <class T> bool drain() {
2822     while (_ms_ind > 0) {
2823       oop obj = pop();
2824       assert(obj != NULL, "Since index was non-zero.");
2825       if (obj->is_objArray()) {
2826         jint arr_ind = _array_ind_stack[_ms_ind];
2827         objArrayOop aobj = objArrayOop(obj);
2828         jint len = aobj->length();
2829         jint next_arr_ind = arr_ind + _array_increment;
2830         if (next_arr_ind < len) {
2831           push(obj, next_arr_ind);
2832         }
2833         // Now process this portion of this one.
2834         int lim = MIN2(next_arr_ind, len);
2835         for (int j = arr_ind; j < lim; j++) {
2836           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2837         }
2838 
2839       } else {
2840         obj->oop_iterate(this);
2841       }
2842       if (abort()) return false;
2843     }
2844     return true;
2845   }
2846 
2847 public:
2848   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2849     _g1h(G1CollectedHeap::heap()),
2850     _cm(cm),
2851     _bm(cm->nextMarkBitMap()),
2852     _ms_size(ms_size), _ms_ind(0),
2853     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2854     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2855     _array_increment(MAX2(ms_size/8, 16))
2856   {}
2857 
2858   ~CSMarkOopClosure() {
2859     FREE_C_HEAP_ARRAY(oop, _ms);
2860     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2861   }
2862 
2863   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2864   virtual void do_oop(      oop* p) { do_oop_work(p); }
2865 
2866   template <class T> void do_oop_work(T* p) {
2867     T heap_oop = oopDesc::load_heap_oop(p);
2868     if (oopDesc::is_null(heap_oop)) return;
2869     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2870     if (obj->is_forwarded()) {
2871       // If the object has already been forwarded, we have to make sure
2872       // that it's marked.  So follow the forwarding pointer.  Note that
2873       // this does the right thing for self-forwarding pointers in the
2874       // evacuation failure case.
2875       obj = obj->forwardee();
2876     }
2877     HeapRegion* hr = _g1h->heap_region_containing(obj);
2878     if (hr != NULL) {
2879       if (hr->in_collection_set()) {
2880         if (_g1h->is_obj_ill(obj)) {
2881           _bm->mark((HeapWord*)obj);
2882           if (!push(obj)) {
2883             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2884             set_abort();
2885           }
2886         }
2887       } else {
2888         // Outside the collection set; we need to gray it
2889         _cm->deal_with_reference(obj);
2890       }
2891     }
2892   }
2893 };
2894 
2895 class CSMarkBitMapClosure: public BitMapClosure {
2896   G1CollectedHeap* _g1h;
2897   CMBitMap*        _bitMap;
2898   ConcurrentMark*  _cm;
2899   CSMarkOopClosure _oop_cl;
2900 public:
2901   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2902     _g1h(G1CollectedHeap::heap()),
2903     _bitMap(cm->nextMarkBitMap()),
2904     _oop_cl(cm, ms_size)
2905   {}
2906 
2907   ~CSMarkBitMapClosure() {}
2908 
2909   bool do_bit(size_t offset) {
2910     // convert offset into a HeapWord*
2911     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2912     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2913            "address out of range");
2914     assert(_bitMap->isMarked(addr), "tautology");
2915     oop obj = oop(addr);
2916     if (!obj->is_forwarded()) {
2917       if (!_oop_cl.push(obj)) return false;
2918       if (UseCompressedOops) {
2919         if (!_oop_cl.drain<narrowOop>()) return false;
2920       } else {
2921         if (!_oop_cl.drain<oop>()) return false;
2922       }
2923     }
2924     // Otherwise...
2925     return true;
2926   }
2927 };
2928 
2929 
2930 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2931   CMBitMap* _bm;
2932   CSMarkBitMapClosure _bit_cl;
2933   enum SomePrivateConstants {
2934     MSSize = 1000
2935   };
2936   bool _completed;
2937 public:
2938   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2939     _bm(cm->nextMarkBitMap()),
2940     _bit_cl(cm, MSSize),
2941     _completed(true)
2942   {}
2943 
2944   ~CompleteMarkingInCSHRClosure() {}
2945 
2946   bool doHeapRegion(HeapRegion* r) {
2947     if (!r->evacuation_failed()) {
2948       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2949       if (!mr.is_empty()) {
2950         if (!_bm->iterate(&_bit_cl, mr)) {
2951           _completed = false;
2952           return true;
2953         }
2954       }
2955     }
2956     return false;
2957   }
2958 
2959   bool completed() { return _completed; }
2960 };
2961 
2962 class ClearMarksInHRClosure: public HeapRegionClosure {
2963   CMBitMap* _bm;
2964 public:
2965   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2966 
2967   bool doHeapRegion(HeapRegion* r) {
2968     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2969       MemRegion usedMR = r->used_region();
2970       _bm->clearRange(r->used_region());
2971     }
2972     return false;
2973   }
2974 };
2975 
2976 void ConcurrentMark::complete_marking_in_collection_set() {
2977   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2978 
2979   if (!g1h->mark_in_progress()) {
2980     g1h->g1_policy()->record_mark_closure_time(0.0);
2981     return;
2982   }
2983 
2984   int i = 1;
2985   double start = os::elapsedTime();
2986   while (true) {
2987     i++;
2988     CompleteMarkingInCSHRClosure cmplt(this);
2989     g1h->collection_set_iterate(&cmplt);
2990     if (cmplt.completed()) break;
2991   }
2992   double end_time = os::elapsedTime();
2993   double elapsed_time_ms = (end_time - start) * 1000.0;
2994   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2995 
2996   ClearMarksInHRClosure clr(nextMarkBitMap());
2997   g1h->collection_set_iterate(&clr);
2998 }
2999 
3000 // The next two methods deal with the following optimisation. Some
3001 // objects are gray by being marked and located above the finger. If
3002 // they are copied, during an evacuation pause, below the finger then
3003 // the need to be pushed on the stack. The observation is that, if
3004 // there are no regions in the collection set located above the
3005 // finger, then the above cannot happen, hence we do not need to
3006 // explicitly gray any objects when copying them to below the
3007 // finger. The global stack will be scanned to ensure that, if it
3008 // points to objects being copied, it will update their
3009 // location. There is a tricky situation with the gray objects in
3010 // region stack that are being coped, however. See the comment in
3011 // newCSet().
3012 
3013 void ConcurrentMark::newCSet() {
3014   if (!concurrent_marking_in_progress())
3015     // nothing to do if marking is not in progress
3016     return;
3017 
3018   // find what the lowest finger is among the global and local fingers
3019   _min_finger = _finger;
3020   for (int i = 0; i < (int)_max_task_num; ++i) {
3021     CMTask* task = _tasks[i];
3022     HeapWord* task_finger = task->finger();
3023     if (task_finger != NULL && task_finger < _min_finger)
3024       _min_finger = task_finger;
3025   }
3026 
3027   _should_gray_objects = false;
3028 
3029   // This fixes a very subtle and fustrating bug. It might be the case
3030   // that, during en evacuation pause, heap regions that contain
3031   // objects that are gray (by being in regions contained in the
3032   // region stack) are included in the collection set. Since such gray
3033   // objects will be moved, and because it's not easy to redirect
3034   // region stack entries to point to a new location (because objects
3035   // in one region might be scattered to multiple regions after they
3036   // are copied), one option is to ensure that all marked objects
3037   // copied during a pause are pushed on the stack. Notice, however,
3038   // that this problem can only happen when the region stack is not
3039   // empty during an evacuation pause. So, we make the fix a bit less
3040   // conservative and ensure that regions are pushed on the stack,
3041   // irrespective whether all collection set regions are below the
3042   // finger, if the region stack is not empty. This is expected to be
3043   // a rare case, so I don't think it's necessary to be smarted about it.
3044   if (!region_stack_empty() || has_aborted_regions())
3045     _should_gray_objects = true;
3046 }
3047 
3048 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3049   if (!concurrent_marking_in_progress())
3050     return;
3051 
3052   HeapWord* region_end = hr->end();
3053   if (region_end > _min_finger)
3054     _should_gray_objects = true;
3055 }
3056 
3057 // abandon current marking iteration due to a Full GC
3058 void ConcurrentMark::abort() {
3059   // Clear all marks to force marking thread to do nothing
3060   _nextMarkBitMap->clearAll();
3061   // Empty mark stack
3062   clear_marking_state();
3063   for (int i = 0; i < (int)_max_task_num; ++i) {
3064     _tasks[i]->clear_region_fields();
3065   }
3066   _has_aborted = true;
3067 
3068   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3069   satb_mq_set.abandon_partial_marking();
3070   // This can be called either during or outside marking, we'll read
3071   // the expected_active value from the SATB queue set.
3072   satb_mq_set.set_active_all_threads(
3073                                  false, /* new active value */
3074                                  satb_mq_set.is_active() /* expected_active */);
3075 }
3076 
3077 static void print_ms_time_info(const char* prefix, const char* name,
3078                                NumberSeq& ns) {
3079   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3080                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3081   if (ns.num() > 0) {
3082     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3083                            prefix, ns.sd(), ns.maximum());
3084   }
3085 }
3086 
3087 void ConcurrentMark::print_summary_info() {
3088   gclog_or_tty->print_cr(" Concurrent marking:");
3089   print_ms_time_info("  ", "init marks", _init_times);
3090   print_ms_time_info("  ", "remarks", _remark_times);
3091   {
3092     print_ms_time_info("     ", "final marks", _remark_mark_times);
3093     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3094 
3095   }
3096   print_ms_time_info("  ", "cleanups", _cleanup_times);
3097   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3098                          _total_counting_time,
3099                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3100                           (double)_cleanup_times.num()
3101                          : 0.0));
3102   if (G1ScrubRemSets) {
3103     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3104                            _total_rs_scrub_time,
3105                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3106                             (double)_cleanup_times.num()
3107                            : 0.0));
3108   }
3109   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3110                          (_init_times.sum() + _remark_times.sum() +
3111                           _cleanup_times.sum())/1000.0);
3112   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3113                 "(%8.2f s marking, %8.2f s counting).",
3114                 cmThread()->vtime_accum(),
3115                 cmThread()->vtime_mark_accum(),
3116                 cmThread()->vtime_count_accum());
3117 }
3118 
3119 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3120   _parallel_workers->print_worker_threads_on(st);
3121 }
3122 
3123 // Closures
3124 // XXX: there seems to be a lot of code  duplication here;
3125 // should refactor and consolidate the shared code.
3126 
3127 // This closure is used to mark refs into the CMS generation in
3128 // the CMS bit map. Called at the first checkpoint.
3129 
3130 // We take a break if someone is trying to stop the world.
3131 bool ConcurrentMark::do_yield_check(int worker_i) {
3132   if (should_yield()) {
3133     if (worker_i == 0)
3134       _g1h->g1_policy()->record_concurrent_pause();
3135     cmThread()->yield();
3136     if (worker_i == 0)
3137       _g1h->g1_policy()->record_concurrent_pause_end();
3138     return true;
3139   } else {
3140     return false;
3141   }
3142 }
3143 
3144 bool ConcurrentMark::should_yield() {
3145   return cmThread()->should_yield();
3146 }
3147 
3148 bool ConcurrentMark::containing_card_is_marked(void* p) {
3149   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3150   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3151 }
3152 
3153 bool ConcurrentMark::containing_cards_are_marked(void* start,
3154                                                  void* last) {
3155   return
3156     containing_card_is_marked(start) &&
3157     containing_card_is_marked(last);
3158 }
3159 
3160 #ifndef PRODUCT
3161 // for debugging purposes
3162 void ConcurrentMark::print_finger() {
3163   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3164                          _heap_start, _heap_end, _finger);
3165   for (int i = 0; i < (int) _max_task_num; ++i) {
3166     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3167   }
3168   gclog_or_tty->print_cr("");
3169 }
3170 #endif
3171 
3172 // Closure for iteration over bitmaps
3173 class CMBitMapClosure : public BitMapClosure {
3174 private:
3175   // the bitmap that is being iterated over
3176   CMBitMap*                   _nextMarkBitMap;
3177   ConcurrentMark*             _cm;
3178   CMTask*                     _task;
3179   // true if we're scanning a heap region claimed by the task (so that
3180   // we move the finger along), false if we're not, i.e. currently when
3181   // scanning a heap region popped from the region stack (so that we
3182   // do not move the task finger along; it'd be a mistake if we did so).
3183   bool                        _scanning_heap_region;
3184 
3185 public:
3186   CMBitMapClosure(CMTask *task,
3187                   ConcurrentMark* cm,
3188                   CMBitMap* nextMarkBitMap)
3189     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3190 
3191   void set_scanning_heap_region(bool scanning_heap_region) {
3192     _scanning_heap_region = scanning_heap_region;
3193   }
3194 
3195   bool do_bit(size_t offset) {
3196     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3197     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3198     assert( addr < _cm->finger(), "invariant");
3199 
3200     if (_scanning_heap_region) {
3201       statsOnly( _task->increase_objs_found_on_bitmap() );
3202       assert(addr >= _task->finger(), "invariant");
3203       // We move that task's local finger along.
3204       _task->move_finger_to(addr);
3205     } else {
3206       // We move the task's region finger along.
3207       _task->move_region_finger_to(addr);
3208     }
3209 
3210     _task->scan_object(oop(addr));
3211     // we only partially drain the local queue and global stack
3212     _task->drain_local_queue(true);
3213     _task->drain_global_stack(true);
3214 
3215     // if the has_aborted flag has been raised, we need to bail out of
3216     // the iteration
3217     return !_task->has_aborted();
3218   }
3219 };
3220 
3221 // Closure for iterating over objects, currently only used for
3222 // processing SATB buffers.
3223 class CMObjectClosure : public ObjectClosure {
3224 private:
3225   CMTask* _task;
3226 
3227 public:
3228   void do_object(oop obj) {
3229     _task->deal_with_reference(obj);
3230   }
3231 
3232   CMObjectClosure(CMTask* task) : _task(task) { }
3233 };
3234 
3235 // Closure for iterating over object fields
3236 class CMOopClosure : public OopClosure {
3237 private:
3238   G1CollectedHeap*   _g1h;
3239   ConcurrentMark*    _cm;
3240   CMTask*            _task;
3241 
3242 public:
3243   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3244   virtual void do_oop(      oop* p) { do_oop_work(p); }
3245 
3246   template <class T> void do_oop_work(T* p) {
3247     assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
3248     assert(!_g1h->is_on_master_free_list(
3249                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
3250 
3251     oop obj = oopDesc::load_decode_heap_oop(p);
3252     if (_cm->verbose_high())
3253       gclog_or_tty->print_cr("[%d] we're looking at location "
3254                              "*"PTR_FORMAT" = "PTR_FORMAT,
3255                              _task->task_id(), p, (void*) obj);
3256     _task->deal_with_reference(obj);
3257   }
3258 
3259   CMOopClosure(G1CollectedHeap* g1h,
3260                ConcurrentMark* cm,
3261                CMTask* task)
3262     : _g1h(g1h), _cm(cm), _task(task)
3263   {
3264     assert(_ref_processor == NULL, "should be initialized to NULL");
3265 
3266     if (G1UseConcMarkReferenceProcessing) {
3267       _ref_processor = g1h->ref_processor();
3268       assert(_ref_processor != NULL, "should not be NULL");
3269     }
3270   }
3271 };
3272 
3273 void CMTask::setup_for_region(HeapRegion* hr) {
3274   // Separated the asserts so that we know which one fires.
3275   assert(hr != NULL,
3276         "claim_region() should have filtered out continues humongous regions");
3277   assert(!hr->continuesHumongous(),
3278         "claim_region() should have filtered out continues humongous regions");
3279 
3280   if (_cm->verbose_low())
3281     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3282                            _task_id, hr);
3283 
3284   _curr_region  = hr;
3285   _finger       = hr->bottom();
3286   update_region_limit();
3287 }
3288 
3289 void CMTask::update_region_limit() {
3290   HeapRegion* hr            = _curr_region;
3291   HeapWord* bottom          = hr->bottom();
3292   HeapWord* limit           = hr->next_top_at_mark_start();
3293 
3294   if (limit == bottom) {
3295     if (_cm->verbose_low())
3296       gclog_or_tty->print_cr("[%d] found an empty region "
3297                              "["PTR_FORMAT", "PTR_FORMAT")",
3298                              _task_id, bottom, limit);
3299     // The region was collected underneath our feet.
3300     // We set the finger to bottom to ensure that the bitmap
3301     // iteration that will follow this will not do anything.
3302     // (this is not a condition that holds when we set the region up,
3303     // as the region is not supposed to be empty in the first place)
3304     _finger = bottom;
3305   } else if (limit >= _region_limit) {
3306     assert(limit >= _finger, "peace of mind");
3307   } else {
3308     assert(limit < _region_limit, "only way to get here");
3309     // This can happen under some pretty unusual circumstances.  An
3310     // evacuation pause empties the region underneath our feet (NTAMS
3311     // at bottom). We then do some allocation in the region (NTAMS
3312     // stays at bottom), followed by the region being used as a GC
3313     // alloc region (NTAMS will move to top() and the objects
3314     // originally below it will be grayed). All objects now marked in
3315     // the region are explicitly grayed, if below the global finger,
3316     // and we do not need in fact to scan anything else. So, we simply
3317     // set _finger to be limit to ensure that the bitmap iteration
3318     // doesn't do anything.
3319     _finger = limit;
3320   }
3321 
3322   _region_limit = limit;
3323 }
3324 
3325 void CMTask::giveup_current_region() {
3326   assert(_curr_region != NULL, "invariant");
3327   if (_cm->verbose_low())
3328     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3329                            _task_id, _curr_region);
3330   clear_region_fields();
3331 }
3332 
3333 void CMTask::clear_region_fields() {
3334   // Values for these three fields that indicate that we're not
3335   // holding on to a region.
3336   _curr_region   = NULL;
3337   _finger        = NULL;
3338   _region_limit  = NULL;
3339 
3340   _region_finger = NULL;
3341 }
3342 
3343 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3344   guarantee(nextMarkBitMap != NULL, "invariant");
3345 
3346   if (_cm->verbose_low())
3347     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3348 
3349   _nextMarkBitMap                = nextMarkBitMap;
3350   clear_region_fields();
3351   assert(_aborted_region.is_empty(), "should have been cleared");
3352 
3353   _calls                         = 0;
3354   _elapsed_time_ms               = 0.0;
3355   _termination_time_ms           = 0.0;
3356   _termination_start_time_ms     = 0.0;
3357 
3358 #if _MARKING_STATS_
3359   _local_pushes                  = 0;
3360   _local_pops                    = 0;
3361   _local_max_size                = 0;
3362   _objs_scanned                  = 0;
3363   _global_pushes                 = 0;
3364   _global_pops                   = 0;
3365   _global_max_size               = 0;
3366   _global_transfers_to           = 0;
3367   _global_transfers_from         = 0;
3368   _region_stack_pops             = 0;
3369   _regions_claimed               = 0;
3370   _objs_found_on_bitmap          = 0;
3371   _satb_buffers_processed        = 0;
3372   _steal_attempts                = 0;
3373   _steals                        = 0;
3374   _aborted                       = 0;
3375   _aborted_overflow              = 0;
3376   _aborted_cm_aborted            = 0;
3377   _aborted_yield                 = 0;
3378   _aborted_timed_out             = 0;
3379   _aborted_satb                  = 0;
3380   _aborted_termination           = 0;
3381 #endif // _MARKING_STATS_
3382 }
3383 
3384 bool CMTask::should_exit_termination() {
3385   regular_clock_call();
3386   // This is called when we are in the termination protocol. We should
3387   // quit if, for some reason, this task wants to abort or the global
3388   // stack is not empty (this means that we can get work from it).
3389   return !_cm->mark_stack_empty() || has_aborted();
3390 }
3391 
3392 // This determines whether the method below will check both the local
3393 // and global fingers when determining whether to push on the stack a
3394 // gray object (value 1) or whether it will only check the global one
3395 // (value 0). The tradeoffs are that the former will be a bit more
3396 // accurate and possibly push less on the stack, but it might also be
3397 // a little bit slower.
3398 
3399 #define _CHECK_BOTH_FINGERS_      1
3400 
3401 void CMTask::deal_with_reference(oop obj) {
3402   if (_cm->verbose_high())
3403     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3404                            _task_id, (void*) obj);
3405 
3406   ++_refs_reached;
3407 
3408   HeapWord* objAddr = (HeapWord*) obj;
3409   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3410   if (_g1h->is_in_g1_reserved(objAddr)) {
3411     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3412     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3413     if (_g1h->is_obj_ill(obj, hr)) {
3414       if (_cm->verbose_high())
3415         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3416                                _task_id, (void*) obj);
3417 
3418       // we need to mark it first
3419       if (_nextMarkBitMap->parMark(objAddr)) {
3420         // No OrderAccess:store_load() is needed. It is implicit in the
3421         // CAS done in parMark(objAddr) above
3422         HeapWord* global_finger = _cm->finger();
3423 
3424 #if _CHECK_BOTH_FINGERS_
3425         // we will check both the local and global fingers
3426 
3427         if (_finger != NULL && objAddr < _finger) {
3428           if (_cm->verbose_high())
3429             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3430                                    "pushing it", _task_id, _finger);
3431           push(obj);
3432         } else if (_curr_region != NULL && objAddr < _region_limit) {
3433           // do nothing
3434         } else if (objAddr < global_finger) {
3435           // Notice that the global finger might be moving forward
3436           // concurrently. This is not a problem. In the worst case, we
3437           // mark the object while it is above the global finger and, by
3438           // the time we read the global finger, it has moved forward
3439           // passed this object. In this case, the object will probably
3440           // be visited when a task is scanning the region and will also
3441           // be pushed on the stack. So, some duplicate work, but no
3442           // correctness problems.
3443 
3444           if (_cm->verbose_high())
3445             gclog_or_tty->print_cr("[%d] below the global finger "
3446                                    "("PTR_FORMAT"), pushing it",
3447                                    _task_id, global_finger);
3448           push(obj);
3449         } else {
3450           // do nothing
3451         }
3452 #else // _CHECK_BOTH_FINGERS_
3453         // we will only check the global finger
3454 
3455         if (objAddr < global_finger) {
3456           // see long comment above
3457 
3458           if (_cm->verbose_high())
3459             gclog_or_tty->print_cr("[%d] below the global finger "
3460                                    "("PTR_FORMAT"), pushing it",
3461                                    _task_id, global_finger);
3462           push(obj);
3463         }
3464 #endif // _CHECK_BOTH_FINGERS_
3465       }
3466     }
3467   }
3468 }
3469 
3470 void CMTask::push(oop obj) {
3471   HeapWord* objAddr = (HeapWord*) obj;
3472   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3473   assert(!_g1h->is_on_master_free_list(
3474               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3475   assert(!_g1h->is_obj_ill(obj), "invariant");
3476   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3477 
3478   if (_cm->verbose_high())
3479     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3480 
3481   if (!_task_queue->push(obj)) {
3482     // The local task queue looks full. We need to push some entries
3483     // to the global stack.
3484 
3485     if (_cm->verbose_medium())
3486       gclog_or_tty->print_cr("[%d] task queue overflow, "
3487                              "moving entries to the global stack",
3488                              _task_id);
3489     move_entries_to_global_stack();
3490 
3491     // this should succeed since, even if we overflow the global
3492     // stack, we should have definitely removed some entries from the
3493     // local queue. So, there must be space on it.
3494     bool success = _task_queue->push(obj);
3495     assert(success, "invariant");
3496   }
3497 
3498   statsOnly( int tmp_size = _task_queue->size();
3499              if (tmp_size > _local_max_size)
3500                _local_max_size = tmp_size;
3501              ++_local_pushes );
3502 }
3503 
3504 void CMTask::reached_limit() {
3505   assert(_words_scanned >= _words_scanned_limit ||
3506          _refs_reached >= _refs_reached_limit ,
3507          "shouldn't have been called otherwise");
3508   regular_clock_call();
3509 }
3510 
3511 void CMTask::regular_clock_call() {
3512   if (has_aborted())
3513     return;
3514 
3515   // First, we need to recalculate the words scanned and refs reached
3516   // limits for the next clock call.
3517   recalculate_limits();
3518 
3519   // During the regular clock call we do the following
3520 
3521   // (1) If an overflow has been flagged, then we abort.
3522   if (_cm->has_overflown()) {
3523     set_has_aborted();
3524     return;
3525   }
3526 
3527   // If we are not concurrent (i.e. we're doing remark) we don't need
3528   // to check anything else. The other steps are only needed during
3529   // the concurrent marking phase.
3530   if (!concurrent())
3531     return;
3532 
3533   // (2) If marking has been aborted for Full GC, then we also abort.
3534   if (_cm->has_aborted()) {
3535     set_has_aborted();
3536     statsOnly( ++_aborted_cm_aborted );
3537     return;
3538   }
3539 
3540   double curr_time_ms = os::elapsedVTime() * 1000.0;
3541 
3542   // (3) If marking stats are enabled, then we update the step history.
3543 #if _MARKING_STATS_
3544   if (_words_scanned >= _words_scanned_limit)
3545     ++_clock_due_to_scanning;
3546   if (_refs_reached >= _refs_reached_limit)
3547     ++_clock_due_to_marking;
3548 
3549   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3550   _interval_start_time_ms = curr_time_ms;
3551   _all_clock_intervals_ms.add(last_interval_ms);
3552 
3553   if (_cm->verbose_medium()) {
3554     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3555                            "scanned = %d%s, refs reached = %d%s",
3556                            _task_id, last_interval_ms,
3557                            _words_scanned,
3558                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3559                            _refs_reached,
3560                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3561   }
3562 #endif // _MARKING_STATS_
3563 
3564   // (4) We check whether we should yield. If we have to, then we abort.
3565   if (_cm->should_yield()) {
3566     // We should yield. To do this we abort the task. The caller is
3567     // responsible for yielding.
3568     set_has_aborted();
3569     statsOnly( ++_aborted_yield );
3570     return;
3571   }
3572 
3573   // (5) We check whether we've reached our time quota. If we have,
3574   // then we abort.
3575   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3576   if (elapsed_time_ms > _time_target_ms) {
3577     set_has_aborted();
3578     _has_timed_out = true;
3579     statsOnly( ++_aborted_timed_out );
3580     return;
3581   }
3582 
3583   // (6) Finally, we check whether there are enough completed STAB
3584   // buffers available for processing. If there are, we abort.
3585   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3586   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3587     if (_cm->verbose_low())
3588       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3589                              _task_id);
3590     // we do need to process SATB buffers, we'll abort and restart
3591     // the marking task to do so
3592     set_has_aborted();
3593     statsOnly( ++_aborted_satb );
3594     return;
3595   }
3596 }
3597 
3598 void CMTask::recalculate_limits() {
3599   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3600   _words_scanned_limit      = _real_words_scanned_limit;
3601 
3602   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3603   _refs_reached_limit       = _real_refs_reached_limit;
3604 }
3605 
3606 void CMTask::decrease_limits() {
3607   // This is called when we believe that we're going to do an infrequent
3608   // operation which will increase the per byte scanned cost (i.e. move
3609   // entries to/from the global stack). It basically tries to decrease the
3610   // scanning limit so that the clock is called earlier.
3611 
3612   if (_cm->verbose_medium())
3613     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3614 
3615   _words_scanned_limit = _real_words_scanned_limit -
3616     3 * words_scanned_period / 4;
3617   _refs_reached_limit  = _real_refs_reached_limit -
3618     3 * refs_reached_period / 4;
3619 }
3620 
3621 void CMTask::move_entries_to_global_stack() {
3622   // local array where we'll store the entries that will be popped
3623   // from the local queue
3624   oop buffer[global_stack_transfer_size];
3625 
3626   int n = 0;
3627   oop obj;
3628   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3629     buffer[n] = obj;
3630     ++n;
3631   }
3632 
3633   if (n > 0) {
3634     // we popped at least one entry from the local queue
3635 
3636     statsOnly( ++_global_transfers_to; _local_pops += n );
3637 
3638     if (!_cm->mark_stack_push(buffer, n)) {
3639       if (_cm->verbose_low())
3640         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3641       set_has_aborted();
3642     } else {
3643       // the transfer was successful
3644 
3645       if (_cm->verbose_medium())
3646         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3647                                _task_id, n);
3648       statsOnly( int tmp_size = _cm->mark_stack_size();
3649                  if (tmp_size > _global_max_size)
3650                    _global_max_size = tmp_size;
3651                  _global_pushes += n );
3652     }
3653   }
3654 
3655   // this operation was quite expensive, so decrease the limits
3656   decrease_limits();
3657 }
3658 
3659 void CMTask::get_entries_from_global_stack() {
3660   // local array where we'll store the entries that will be popped
3661   // from the global stack.
3662   oop buffer[global_stack_transfer_size];
3663   int n;
3664   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3665   assert(n <= global_stack_transfer_size,
3666          "we should not pop more than the given limit");
3667   if (n > 0) {
3668     // yes, we did actually pop at least one entry
3669 
3670     statsOnly( ++_global_transfers_from; _global_pops += n );
3671     if (_cm->verbose_medium())
3672       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3673                              _task_id, n);
3674     for (int i = 0; i < n; ++i) {
3675       bool success = _task_queue->push(buffer[i]);
3676       // We only call this when the local queue is empty or under a
3677       // given target limit. So, we do not expect this push to fail.
3678       assert(success, "invariant");
3679     }
3680 
3681     statsOnly( int tmp_size = _task_queue->size();
3682                if (tmp_size > _local_max_size)
3683                  _local_max_size = tmp_size;
3684                _local_pushes += n );
3685   }
3686 
3687   // this operation was quite expensive, so decrease the limits
3688   decrease_limits();
3689 }
3690 
3691 void CMTask::drain_local_queue(bool partially) {
3692   if (has_aborted())
3693     return;
3694 
3695   // Decide what the target size is, depending whether we're going to
3696   // drain it partially (so that other tasks can steal if they run out
3697   // of things to do) or totally (at the very end).
3698   size_t target_size;
3699   if (partially)
3700     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3701   else
3702     target_size = 0;
3703 
3704   if (_task_queue->size() > target_size) {
3705     if (_cm->verbose_high())
3706       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3707                              _task_id, target_size);
3708 
3709     oop obj;
3710     bool ret = _task_queue->pop_local(obj);
3711     while (ret) {
3712       statsOnly( ++_local_pops );
3713 
3714       if (_cm->verbose_high())
3715         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3716                                (void*) obj);
3717 
3718       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3719       assert(!_g1h->is_on_master_free_list(
3720                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3721 
3722       scan_object(obj);
3723 
3724       if (_task_queue->size() <= target_size || has_aborted())
3725         ret = false;
3726       else
3727         ret = _task_queue->pop_local(obj);
3728     }
3729 
3730     if (_cm->verbose_high())
3731       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3732                              _task_id, _task_queue->size());
3733   }
3734 }
3735 
3736 void CMTask::drain_global_stack(bool partially) {
3737   if (has_aborted())
3738     return;
3739 
3740   // We have a policy to drain the local queue before we attempt to
3741   // drain the global stack.
3742   assert(partially || _task_queue->size() == 0, "invariant");
3743 
3744   // Decide what the target size is, depending whether we're going to
3745   // drain it partially (so that other tasks can steal if they run out
3746   // of things to do) or totally (at the very end).  Notice that,
3747   // because we move entries from the global stack in chunks or
3748   // because another task might be doing the same, we might in fact
3749   // drop below the target. But, this is not a problem.
3750   size_t target_size;
3751   if (partially)
3752     target_size = _cm->partial_mark_stack_size_target();
3753   else
3754     target_size = 0;
3755 
3756   if (_cm->mark_stack_size() > target_size) {
3757     if (_cm->verbose_low())
3758       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3759                              _task_id, target_size);
3760 
3761     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3762       get_entries_from_global_stack();
3763       drain_local_queue(partially);
3764     }
3765 
3766     if (_cm->verbose_low())
3767       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3768                              _task_id, _cm->mark_stack_size());
3769   }
3770 }
3771 
3772 // SATB Queue has several assumptions on whether to call the par or
3773 // non-par versions of the methods. this is why some of the code is
3774 // replicated. We should really get rid of the single-threaded version
3775 // of the code to simplify things.
3776 void CMTask::drain_satb_buffers() {
3777   if (has_aborted())
3778     return;
3779 
3780   // We set this so that the regular clock knows that we're in the
3781   // middle of draining buffers and doesn't set the abort flag when it
3782   // notices that SATB buffers are available for draining. It'd be
3783   // very counter productive if it did that. :-)
3784   _draining_satb_buffers = true;
3785 
3786   CMObjectClosure oc(this);
3787   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3788   if (G1CollectedHeap::use_parallel_gc_threads())
3789     satb_mq_set.set_par_closure(_task_id, &oc);
3790   else
3791     satb_mq_set.set_closure(&oc);
3792 
3793   // This keeps claiming and applying the closure to completed buffers
3794   // until we run out of buffers or we need to abort.
3795   if (G1CollectedHeap::use_parallel_gc_threads()) {
3796     while (!has_aborted() &&
3797            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3798       if (_cm->verbose_medium())
3799         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3800       statsOnly( ++_satb_buffers_processed );
3801       regular_clock_call();
3802     }
3803   } else {
3804     while (!has_aborted() &&
3805            satb_mq_set.apply_closure_to_completed_buffer()) {
3806       if (_cm->verbose_medium())
3807         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3808       statsOnly( ++_satb_buffers_processed );
3809       regular_clock_call();
3810     }
3811   }
3812 
3813   if (!concurrent() && !has_aborted()) {
3814     // We should only do this during remark.
3815     if (G1CollectedHeap::use_parallel_gc_threads())
3816       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3817     else
3818       satb_mq_set.iterate_closure_all_threads();
3819   }
3820 
3821   _draining_satb_buffers = false;
3822 
3823   assert(has_aborted() ||
3824          concurrent() ||
3825          satb_mq_set.completed_buffers_num() == 0, "invariant");
3826 
3827   if (G1CollectedHeap::use_parallel_gc_threads())
3828     satb_mq_set.set_par_closure(_task_id, NULL);
3829   else
3830     satb_mq_set.set_closure(NULL);
3831 
3832   // again, this was a potentially expensive operation, decrease the
3833   // limits to get the regular clock call early
3834   decrease_limits();
3835 }
3836 
3837 void CMTask::drain_region_stack(BitMapClosure* bc) {
3838   if (has_aborted())
3839     return;
3840 
3841   assert(_region_finger == NULL,
3842          "it should be NULL when we're not scanning a region");
3843 
3844   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3845     if (_cm->verbose_low())
3846       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3847                              _task_id, _cm->region_stack_size());
3848 
3849     MemRegion mr;
3850 
3851     if (!_aborted_region.is_empty()) {
3852       mr = _aborted_region;
3853       _aborted_region = MemRegion();
3854 
3855       if (_cm->verbose_low())
3856         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3857                              _task_id, mr.start(), mr.end());
3858     } else {
3859       mr = _cm->region_stack_pop_lock_free();
3860       // it returns MemRegion() if the pop fails
3861       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3862     }
3863 
3864     while (mr.start() != NULL) {
3865       if (_cm->verbose_medium())
3866         gclog_or_tty->print_cr("[%d] we are scanning region "
3867                                "["PTR_FORMAT", "PTR_FORMAT")",
3868                                _task_id, mr.start(), mr.end());
3869 
3870       assert(mr.end() <= _cm->finger(),
3871              "otherwise the region shouldn't be on the stack");
3872       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3873       if (_nextMarkBitMap->iterate(bc, mr)) {
3874         assert(!has_aborted(),
3875                "cannot abort the task without aborting the bitmap iteration");
3876 
3877         // We finished iterating over the region without aborting.
3878         regular_clock_call();
3879         if (has_aborted())
3880           mr = MemRegion();
3881         else {
3882           mr = _cm->region_stack_pop_lock_free();
3883           // it returns MemRegion() if the pop fails
3884           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3885         }
3886       } else {
3887         assert(has_aborted(), "currently the only way to do so");
3888 
3889         // The only way to abort the bitmap iteration is to return
3890         // false from the do_bit() method. However, inside the
3891         // do_bit() method we move the _region_finger to point to the
3892         // object currently being looked at. So, if we bail out, we
3893         // have definitely set _region_finger to something non-null.
3894         assert(_region_finger != NULL, "invariant");
3895 
3896         // Make sure that any previously aborted region has been
3897         // cleared.
3898         assert(_aborted_region.is_empty(), "aborted region not cleared");
3899 
3900         // The iteration was actually aborted. So now _region_finger
3901         // points to the address of the object we last scanned. If we
3902         // leave it there, when we restart this task, we will rescan
3903         // the object. It is easy to avoid this. We move the finger by
3904         // enough to point to the next possible object header (the
3905         // bitmap knows by how much we need to move it as it knows its
3906         // granularity).
3907         MemRegion newRegion =
3908           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3909 
3910         if (!newRegion.is_empty()) {
3911           if (_cm->verbose_low()) {
3912             gclog_or_tty->print_cr("[%d] recording unscanned region"
3913                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3914                                    _task_id,
3915                                    newRegion.start(), newRegion.end());
3916           }
3917           // Now record the part of the region we didn't scan to
3918           // make sure this task scans it later.
3919           _aborted_region = newRegion;
3920         }
3921         // break from while
3922         mr = MemRegion();
3923       }
3924       _region_finger = NULL;
3925     }
3926 
3927     if (_cm->verbose_low())
3928       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3929                              _task_id, _cm->region_stack_size());
3930   }
3931 }
3932 
3933 void CMTask::print_stats() {
3934   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3935                          _task_id, _calls);
3936   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3937                          _elapsed_time_ms, _termination_time_ms);
3938   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3939                          _step_times_ms.num(), _step_times_ms.avg(),
3940                          _step_times_ms.sd());
3941   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3942                          _step_times_ms.maximum(), _step_times_ms.sum());
3943 
3944 #if _MARKING_STATS_
3945   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3946                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3947                          _all_clock_intervals_ms.sd());
3948   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3949                          _all_clock_intervals_ms.maximum(),
3950                          _all_clock_intervals_ms.sum());
3951   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3952                          _clock_due_to_scanning, _clock_due_to_marking);
3953   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3954                          _objs_scanned, _objs_found_on_bitmap);
3955   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3956                          _local_pushes, _local_pops, _local_max_size);
3957   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3958                          _global_pushes, _global_pops, _global_max_size);
3959   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3960                          _global_transfers_to,_global_transfers_from);
3961   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3962                          _regions_claimed, _region_stack_pops);
3963   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3964   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3965                          _steal_attempts, _steals);
3966   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3967   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3968                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3969   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3970                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3971 #endif // _MARKING_STATS_
3972 }
3973 
3974 /*****************************************************************************
3975 
3976     The do_marking_step(time_target_ms) method is the building block
3977     of the parallel marking framework. It can be called in parallel
3978     with other invocations of do_marking_step() on different tasks
3979     (but only one per task, obviously) and concurrently with the
3980     mutator threads, or during remark, hence it eliminates the need
3981     for two versions of the code. When called during remark, it will
3982     pick up from where the task left off during the concurrent marking
3983     phase. Interestingly, tasks are also claimable during evacuation
3984     pauses too, since do_marking_step() ensures that it aborts before
3985     it needs to yield.
3986 
3987     The data structures that is uses to do marking work are the
3988     following:
3989 
3990       (1) Marking Bitmap. If there are gray objects that appear only
3991       on the bitmap (this happens either when dealing with an overflow
3992       or when the initial marking phase has simply marked the roots
3993       and didn't push them on the stack), then tasks claim heap
3994       regions whose bitmap they then scan to find gray objects. A
3995       global finger indicates where the end of the last claimed region
3996       is. A local finger indicates how far into the region a task has
3997       scanned. The two fingers are used to determine how to gray an
3998       object (i.e. whether simply marking it is OK, as it will be
3999       visited by a task in the future, or whether it needs to be also
4000       pushed on a stack).
4001 
4002       (2) Local Queue. The local queue of the task which is accessed
4003       reasonably efficiently by the task. Other tasks can steal from
4004       it when they run out of work. Throughout the marking phase, a
4005       task attempts to keep its local queue short but not totally
4006       empty, so that entries are available for stealing by other
4007       tasks. Only when there is no more work, a task will totally
4008       drain its local queue.
4009 
4010       (3) Global Mark Stack. This handles local queue overflow. During
4011       marking only sets of entries are moved between it and the local
4012       queues, as access to it requires a mutex and more fine-grain
4013       interaction with it which might cause contention. If it
4014       overflows, then the marking phase should restart and iterate
4015       over the bitmap to identify gray objects. Throughout the marking
4016       phase, tasks attempt to keep the global mark stack at a small
4017       length but not totally empty, so that entries are available for
4018       popping by other tasks. Only when there is no more work, tasks
4019       will totally drain the global mark stack.
4020 
4021       (4) Global Region Stack. Entries on it correspond to areas of
4022       the bitmap that need to be scanned since they contain gray
4023       objects. Pushes on the region stack only happen during
4024       evacuation pauses and typically correspond to areas covered by
4025       GC LABS. If it overflows, then the marking phase should restart
4026       and iterate over the bitmap to identify gray objects. Tasks will
4027       try to totally drain the region stack as soon as possible.
4028 
4029       (5) SATB Buffer Queue. This is where completed SATB buffers are
4030       made available. Buffers are regularly removed from this queue
4031       and scanned for roots, so that the queue doesn't get too
4032       long. During remark, all completed buffers are processed, as
4033       well as the filled in parts of any uncompleted buffers.
4034 
4035     The do_marking_step() method tries to abort when the time target
4036     has been reached. There are a few other cases when the
4037     do_marking_step() method also aborts:
4038 
4039       (1) When the marking phase has been aborted (after a Full GC).
4040 
4041       (2) When a global overflow (either on the global stack or the
4042       region stack) has been triggered. Before the task aborts, it
4043       will actually sync up with the other tasks to ensure that all
4044       the marking data structures (local queues, stacks, fingers etc.)
4045       are re-initialised so that when do_marking_step() completes,
4046       the marking phase can immediately restart.
4047 
4048       (3) When enough completed SATB buffers are available. The
4049       do_marking_step() method only tries to drain SATB buffers right
4050       at the beginning. So, if enough buffers are available, the
4051       marking step aborts and the SATB buffers are processed at
4052       the beginning of the next invocation.
4053 
4054       (4) To yield. when we have to yield then we abort and yield
4055       right at the end of do_marking_step(). This saves us from a lot
4056       of hassle as, by yielding we might allow a Full GC. If this
4057       happens then objects will be compacted underneath our feet, the
4058       heap might shrink, etc. We save checking for this by just
4059       aborting and doing the yield right at the end.
4060 
4061     From the above it follows that the do_marking_step() method should
4062     be called in a loop (or, otherwise, regularly) until it completes.
4063 
4064     If a marking step completes without its has_aborted() flag being
4065     true, it means it has completed the current marking phase (and
4066     also all other marking tasks have done so and have all synced up).
4067 
4068     A method called regular_clock_call() is invoked "regularly" (in
4069     sub ms intervals) throughout marking. It is this clock method that
4070     checks all the abort conditions which were mentioned above and
4071     decides when the task should abort. A work-based scheme is used to
4072     trigger this clock method: when the number of object words the
4073     marking phase has scanned or the number of references the marking
4074     phase has visited reach a given limit. Additional invocations to
4075     the method clock have been planted in a few other strategic places
4076     too. The initial reason for the clock method was to avoid calling
4077     vtime too regularly, as it is quite expensive. So, once it was in
4078     place, it was natural to piggy-back all the other conditions on it
4079     too and not constantly check them throughout the code.
4080 
4081  *****************************************************************************/
4082 
4083 void CMTask::do_marking_step(double time_target_ms,
4084                              bool do_stealing,
4085                              bool do_termination) {
4086   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4087   assert(concurrent() == _cm->concurrent(), "they should be the same");
4088 
4089   assert(concurrent() || _cm->region_stack_empty(),
4090          "the region stack should have been cleared before remark");
4091   assert(concurrent() || !_cm->has_aborted_regions(),
4092          "aborted regions should have been cleared before remark");
4093   assert(_region_finger == NULL,
4094          "this should be non-null only when a region is being scanned");
4095 
4096   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4097   assert(_task_queues != NULL, "invariant");
4098   assert(_task_queue != NULL, "invariant");
4099   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4100 
4101   assert(!_claimed,
4102          "only one thread should claim this task at any one time");
4103 
4104   // OK, this doesn't safeguard again all possible scenarios, as it is
4105   // possible for two threads to set the _claimed flag at the same
4106   // time. But it is only for debugging purposes anyway and it will
4107   // catch most problems.
4108   _claimed = true;
4109 
4110   _start_time_ms = os::elapsedVTime() * 1000.0;
4111   statsOnly( _interval_start_time_ms = _start_time_ms );
4112 
4113   double diff_prediction_ms =
4114     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4115   _time_target_ms = time_target_ms - diff_prediction_ms;
4116 
4117   // set up the variables that are used in the work-based scheme to
4118   // call the regular clock method
4119   _words_scanned = 0;
4120   _refs_reached  = 0;
4121   recalculate_limits();
4122 
4123   // clear all flags
4124   clear_has_aborted();
4125   _has_timed_out = false;
4126   _draining_satb_buffers = false;
4127 
4128   ++_calls;
4129 
4130   if (_cm->verbose_low())
4131     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4132                            "target = %1.2lfms >>>>>>>>>>",
4133                            _task_id, _calls, _time_target_ms);
4134 
4135   // Set up the bitmap and oop closures. Anything that uses them is
4136   // eventually called from this method, so it is OK to allocate these
4137   // statically.
4138   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4139   CMOopClosure    oop_closure(_g1h, _cm, this);
4140   set_oop_closure(&oop_closure);
4141 
4142   if (_cm->has_overflown()) {
4143     // This can happen if the region stack or the mark stack overflows
4144     // during a GC pause and this task, after a yield point,
4145     // restarts. We have to abort as we need to get into the overflow
4146     // protocol which happens right at the end of this task.
4147     set_has_aborted();
4148   }
4149 
4150   // First drain any available SATB buffers. After this, we will not
4151   // look at SATB buffers before the next invocation of this method.
4152   // If enough completed SATB buffers are queued up, the regular clock
4153   // will abort this task so that it restarts.
4154   drain_satb_buffers();
4155   // ...then partially drain the local queue and the global stack
4156   drain_local_queue(true);
4157   drain_global_stack(true);
4158 
4159   // Then totally drain the region stack.  We will not look at
4160   // it again before the next invocation of this method. Entries on
4161   // the region stack are only added during evacuation pauses, for
4162   // which we have to yield. When we do, we abort the task anyway so
4163   // it will look at the region stack again when it restarts.
4164   bitmap_closure.set_scanning_heap_region(false);
4165   drain_region_stack(&bitmap_closure);
4166   // ...then partially drain the local queue and the global stack
4167   drain_local_queue(true);
4168   drain_global_stack(true);
4169 
4170   do {
4171     if (!has_aborted() && _curr_region != NULL) {
4172       // This means that we're already holding on to a region.
4173       assert(_finger != NULL, "if region is not NULL, then the finger "
4174              "should not be NULL either");
4175 
4176       // We might have restarted this task after an evacuation pause
4177       // which might have evacuated the region we're holding on to
4178       // underneath our feet. Let's read its limit again to make sure
4179       // that we do not iterate over a region of the heap that
4180       // contains garbage (update_region_limit() will also move
4181       // _finger to the start of the region if it is found empty).
4182       update_region_limit();
4183       // We will start from _finger not from the start of the region,
4184       // as we might be restarting this task after aborting half-way
4185       // through scanning this region. In this case, _finger points to
4186       // the address where we last found a marked object. If this is a
4187       // fresh region, _finger points to start().
4188       MemRegion mr = MemRegion(_finger, _region_limit);
4189 
4190       if (_cm->verbose_low())
4191         gclog_or_tty->print_cr("[%d] we're scanning part "
4192                                "["PTR_FORMAT", "PTR_FORMAT") "
4193                                "of region "PTR_FORMAT,
4194                                _task_id, _finger, _region_limit, _curr_region);
4195 
4196       // Let's iterate over the bitmap of the part of the
4197       // region that is left.
4198       bitmap_closure.set_scanning_heap_region(true);
4199       if (mr.is_empty() ||
4200           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4201         // We successfully completed iterating over the region. Now,
4202         // let's give up the region.
4203         giveup_current_region();
4204         regular_clock_call();
4205       } else {
4206         assert(has_aborted(), "currently the only way to do so");
4207         // The only way to abort the bitmap iteration is to return
4208         // false from the do_bit() method. However, inside the
4209         // do_bit() method we move the _finger to point to the
4210         // object currently being looked at. So, if we bail out, we
4211         // have definitely set _finger to something non-null.
4212         assert(_finger != NULL, "invariant");
4213 
4214         // Region iteration was actually aborted. So now _finger
4215         // points to the address of the object we last scanned. If we
4216         // leave it there, when we restart this task, we will rescan
4217         // the object. It is easy to avoid this. We move the finger by
4218         // enough to point to the next possible object header (the
4219         // bitmap knows by how much we need to move it as it knows its
4220         // granularity).
4221         assert(_finger < _region_limit, "invariant");
4222         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4223         // Check if bitmap iteration was aborted while scanning the last object
4224         if (new_finger >= _region_limit) {
4225             giveup_current_region();
4226         } else {
4227             move_finger_to(new_finger);
4228         }
4229       }
4230     }
4231     // At this point we have either completed iterating over the
4232     // region we were holding on to, or we have aborted.
4233 
4234     // We then partially drain the local queue and the global stack.
4235     // (Do we really need this?)
4236     drain_local_queue(true);
4237     drain_global_stack(true);
4238 
4239     // Read the note on the claim_region() method on why it might
4240     // return NULL with potentially more regions available for
4241     // claiming and why we have to check out_of_regions() to determine
4242     // whether we're done or not.
4243     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4244       // We are going to try to claim a new region. We should have
4245       // given up on the previous one.
4246       // Separated the asserts so that we know which one fires.
4247       assert(_curr_region  == NULL, "invariant");
4248       assert(_finger       == NULL, "invariant");
4249       assert(_region_limit == NULL, "invariant");
4250       if (_cm->verbose_low())
4251         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4252       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4253       if (claimed_region != NULL) {
4254         // Yes, we managed to claim one
4255         statsOnly( ++_regions_claimed );
4256 
4257         if (_cm->verbose_low())
4258           gclog_or_tty->print_cr("[%d] we successfully claimed "
4259                                  "region "PTR_FORMAT,
4260                                  _task_id, claimed_region);
4261 
4262         setup_for_region(claimed_region);
4263         assert(_curr_region == claimed_region, "invariant");
4264       }
4265       // It is important to call the regular clock here. It might take
4266       // a while to claim a region if, for example, we hit a large
4267       // block of empty regions. So we need to call the regular clock
4268       // method once round the loop to make sure it's called
4269       // frequently enough.
4270       regular_clock_call();
4271     }
4272 
4273     if (!has_aborted() && _curr_region == NULL) {
4274       assert(_cm->out_of_regions(),
4275              "at this point we should be out of regions");
4276     }
4277   } while ( _curr_region != NULL && !has_aborted());
4278 
4279   if (!has_aborted()) {
4280     // We cannot check whether the global stack is empty, since other
4281     // tasks might be pushing objects to it concurrently. We also cannot
4282     // check if the region stack is empty because if a thread is aborting
4283     // it can push a partially done region back.
4284     assert(_cm->out_of_regions(),
4285            "at this point we should be out of regions");
4286 
4287     if (_cm->verbose_low())
4288       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4289 
4290     // Try to reduce the number of available SATB buffers so that
4291     // remark has less work to do.
4292     drain_satb_buffers();
4293   }
4294 
4295   // Since we've done everything else, we can now totally drain the
4296   // local queue and global stack.
4297   drain_local_queue(false);
4298   drain_global_stack(false);
4299 
4300   // Attempt at work stealing from other task's queues.
4301   if (do_stealing && !has_aborted()) {
4302     // We have not aborted. This means that we have finished all that
4303     // we could. Let's try to do some stealing...
4304 
4305     // We cannot check whether the global stack is empty, since other
4306     // tasks might be pushing objects to it concurrently. We also cannot
4307     // check if the region stack is empty because if a thread is aborting
4308     // it can push a partially done region back.
4309     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4310            "only way to reach here");
4311 
4312     if (_cm->verbose_low())
4313       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4314 
4315     while (!has_aborted()) {
4316       oop obj;
4317       statsOnly( ++_steal_attempts );
4318 
4319       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4320         if (_cm->verbose_medium())
4321           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4322                                  _task_id, (void*) obj);
4323 
4324         statsOnly( ++_steals );
4325 
4326         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4327                "any stolen object should be marked");
4328         scan_object(obj);
4329 
4330         // And since we're towards the end, let's totally drain the
4331         // local queue and global stack.
4332         drain_local_queue(false);
4333         drain_global_stack(false);
4334       } else {
4335         break;
4336       }
4337     }
4338   }
4339 
4340   // If we are about to wrap up and go into termination, check if we
4341   // should raise the overflow flag.
4342   if (do_termination && !has_aborted()) {
4343     if (_cm->force_overflow()->should_force()) {
4344       _cm->set_has_overflown();
4345       regular_clock_call();
4346     }
4347   }
4348 
4349   // We still haven't aborted. Now, let's try to get into the
4350   // termination protocol.
4351   if (do_termination && !has_aborted()) {
4352     // We cannot check whether the global stack is empty, since other
4353     // tasks might be concurrently pushing objects on it. We also cannot
4354     // check if the region stack is empty because if a thread is aborting
4355     // it can push a partially done region back.
4356     // Separated the asserts so that we know which one fires.
4357     assert(_cm->out_of_regions(), "only way to reach here");
4358     assert(_task_queue->size() == 0, "only way to reach here");
4359 
4360     if (_cm->verbose_low())
4361       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4362 
4363     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4364     // The CMTask class also extends the TerminatorTerminator class,
4365     // hence its should_exit_termination() method will also decide
4366     // whether to exit the termination protocol or not.
4367     bool finished = _cm->terminator()->offer_termination(this);
4368     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4369     _termination_time_ms +=
4370       termination_end_time_ms - _termination_start_time_ms;
4371 
4372     if (finished) {
4373       // We're all done.
4374 
4375       if (_task_id == 0) {
4376         // let's allow task 0 to do this
4377         if (concurrent()) {
4378           assert(_cm->concurrent_marking_in_progress(), "invariant");
4379           // we need to set this to false before the next
4380           // safepoint. This way we ensure that the marking phase
4381           // doesn't observe any more heap expansions.
4382           _cm->clear_concurrent_marking_in_progress();
4383         }
4384       }
4385 
4386       // We can now guarantee that the global stack is empty, since
4387       // all other tasks have finished. We separated the guarantees so
4388       // that, if a condition is false, we can immediately find out
4389       // which one.
4390       guarantee(_cm->out_of_regions(), "only way to reach here");
4391       guarantee(_aborted_region.is_empty(), "only way to reach here");
4392       guarantee(_cm->region_stack_empty(), "only way to reach here");
4393       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4394       guarantee(_task_queue->size() == 0, "only way to reach here");
4395       guarantee(!_cm->has_overflown(), "only way to reach here");
4396       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4397       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4398 
4399       if (_cm->verbose_low())
4400         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4401     } else {
4402       // Apparently there's more work to do. Let's abort this task. It
4403       // will restart it and we can hopefully find more things to do.
4404 
4405       if (_cm->verbose_low())
4406         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4407 
4408       set_has_aborted();
4409       statsOnly( ++_aborted_termination );
4410     }
4411   }
4412 
4413   // Mainly for debugging purposes to make sure that a pointer to the
4414   // closure which was statically allocated in this frame doesn't
4415   // escape it by accident.
4416   set_oop_closure(NULL);
4417   double end_time_ms = os::elapsedVTime() * 1000.0;
4418   double elapsed_time_ms = end_time_ms - _start_time_ms;
4419   // Update the step history.
4420   _step_times_ms.add(elapsed_time_ms);
4421 
4422   if (has_aborted()) {
4423     // The task was aborted for some reason.
4424 
4425     statsOnly( ++_aborted );
4426 
4427     if (_has_timed_out) {
4428       double diff_ms = elapsed_time_ms - _time_target_ms;
4429       // Keep statistics of how well we did with respect to hitting
4430       // our target only if we actually timed out (if we aborted for
4431       // other reasons, then the results might get skewed).
4432       _marking_step_diffs_ms.add(diff_ms);
4433     }
4434 
4435     if (_cm->has_overflown()) {
4436       // This is the interesting one. We aborted because a global
4437       // overflow was raised. This means we have to restart the
4438       // marking phase and start iterating over regions. However, in
4439       // order to do this we have to make sure that all tasks stop
4440       // what they are doing and re-initialise in a safe manner. We
4441       // will achieve this with the use of two barrier sync points.
4442 
4443       if (_cm->verbose_low())
4444         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4445 
4446       _cm->enter_first_sync_barrier(_task_id);
4447       // When we exit this sync barrier we know that all tasks have
4448       // stopped doing marking work. So, it's now safe to
4449       // re-initialise our data structures. At the end of this method,
4450       // task 0 will clear the global data structures.
4451 
4452       statsOnly( ++_aborted_overflow );
4453 
4454       // We clear the local state of this task...
4455       clear_region_fields();
4456 
4457       // ...and enter the second barrier.
4458       _cm->enter_second_sync_barrier(_task_id);
4459       // At this point everything has bee re-initialised and we're
4460       // ready to restart.
4461     }
4462 
4463     if (_cm->verbose_low()) {
4464       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4465                              "elapsed = %1.2lfms <<<<<<<<<<",
4466                              _task_id, _time_target_ms, elapsed_time_ms);
4467       if (_cm->has_aborted())
4468         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4469                                _task_id);
4470     }
4471   } else {
4472     if (_cm->verbose_low())
4473       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4474                              "elapsed = %1.2lfms <<<<<<<<<<",
4475                              _task_id, _time_target_ms, elapsed_time_ms);
4476   }
4477 
4478   _claimed = false;
4479 }
4480 
4481 CMTask::CMTask(int task_id,
4482                ConcurrentMark* cm,
4483                CMTaskQueue* task_queue,
4484                CMTaskQueueSet* task_queues)
4485   : _g1h(G1CollectedHeap::heap()),
4486     _task_id(task_id), _cm(cm),
4487     _claimed(false),
4488     _nextMarkBitMap(NULL), _hash_seed(17),
4489     _task_queue(task_queue),
4490     _task_queues(task_queues),
4491     _oop_closure(NULL),
4492     _aborted_region(MemRegion()) {
4493   guarantee(task_queue != NULL, "invariant");
4494   guarantee(task_queues != NULL, "invariant");
4495 
4496   statsOnly( _clock_due_to_scanning = 0;
4497              _clock_due_to_marking  = 0 );
4498 
4499   _marking_step_diffs_ms.add(0.5);
4500 }
4501 
4502 // These are formatting macros that are used below to ensure
4503 // consistent formatting. The *_H_* versions are used to format the
4504 // header for a particular value and they should be kept consistent
4505 // with the corresponding macro. Also note that most of the macros add
4506 // the necessary white space (as a prefix) which makes them a bit
4507 // easier to compose.
4508 
4509 // All the output lines are prefixed with this string to be able to
4510 // identify them easily in a large log file.
4511 #define G1PPRL_LINE_PREFIX            "###"
4512 
4513 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4514 #ifdef _LP64
4515 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4516 #else // _LP64
4517 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4518 #endif // _LP64
4519 
4520 // For per-region info
4521 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4522 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4523 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4524 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4525 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4526 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4527 
4528 // For summary info
4529 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4530 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4531 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4532 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4533 
4534 G1PrintRegionLivenessInfoClosure::
4535 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4536   : _out(out),
4537     _total_used_bytes(0), _total_capacity_bytes(0),
4538     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4539     _hum_used_bytes(0), _hum_capacity_bytes(0),
4540     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4541   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4542   MemRegion g1_committed = g1h->g1_committed();
4543   MemRegion g1_reserved = g1h->g1_reserved();
4544   double now = os::elapsedTime();
4545 
4546   // Print the header of the output.
4547   _out->cr();
4548   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4549   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4550                  G1PPRL_SUM_ADDR_FORMAT("committed")
4551                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4552                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4553                  g1_committed.start(), g1_committed.end(),
4554                  g1_reserved.start(), g1_reserved.end(),
4555                  HeapRegion::GrainBytes);
4556   _out->print_cr(G1PPRL_LINE_PREFIX);
4557   _out->print_cr(G1PPRL_LINE_PREFIX
4558                  G1PPRL_TYPE_H_FORMAT
4559                  G1PPRL_ADDR_BASE_H_FORMAT
4560                  G1PPRL_BYTE_H_FORMAT
4561                  G1PPRL_BYTE_H_FORMAT
4562                  G1PPRL_BYTE_H_FORMAT
4563                  G1PPRL_DOUBLE_H_FORMAT,
4564                  "type", "address-range",
4565                  "used", "prev-live", "next-live", "gc-eff");
4566 }
4567 
4568 // It takes as a parameter a reference to one of the _hum_* fields, it
4569 // deduces the corresponding value for a region in a humongous region
4570 // series (either the region size, or what's left if the _hum_* field
4571 // is < the region size), and updates the _hum_* field accordingly.
4572 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4573   size_t bytes = 0;
4574   // The > 0 check is to deal with the prev and next live bytes which
4575   // could be 0.
4576   if (*hum_bytes > 0) {
4577     bytes = MIN2((size_t) HeapRegion::GrainBytes, *hum_bytes);
4578     *hum_bytes -= bytes;
4579   }
4580   return bytes;
4581 }
4582 
4583 // It deduces the values for a region in a humongous region series
4584 // from the _hum_* fields and updates those accordingly. It assumes
4585 // that that _hum_* fields have already been set up from the "starts
4586 // humongous" region and we visit the regions in address order.
4587 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4588                                                      size_t* capacity_bytes,
4589                                                      size_t* prev_live_bytes,
4590                                                      size_t* next_live_bytes) {
4591   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4592   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4593   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4594   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4595   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4596 }
4597 
4598 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4599   const char* type = "";
4600   HeapWord* bottom       = r->bottom();
4601   HeapWord* end          = r->end();
4602   size_t capacity_bytes  = r->capacity();
4603   size_t used_bytes      = r->used();
4604   size_t prev_live_bytes = r->live_bytes();
4605   size_t next_live_bytes = r->next_live_bytes();
4606   double gc_eff          = r->gc_efficiency();
4607   if (r->used() == 0) {
4608     type = "FREE";
4609   } else if (r->is_survivor()) {
4610     type = "SURV";
4611   } else if (r->is_young()) {
4612     type = "EDEN";
4613   } else if (r->startsHumongous()) {
4614     type = "HUMS";
4615 
4616     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4617            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4618            "they should have been zeroed after the last time we used them");
4619     // Set up the _hum_* fields.
4620     _hum_capacity_bytes  = capacity_bytes;
4621     _hum_used_bytes      = used_bytes;
4622     _hum_prev_live_bytes = prev_live_bytes;
4623     _hum_next_live_bytes = next_live_bytes;
4624     get_hum_bytes(&used_bytes, &capacity_bytes,
4625                   &prev_live_bytes, &next_live_bytes);
4626     end = bottom + HeapRegion::GrainWords;
4627   } else if (r->continuesHumongous()) {
4628     type = "HUMC";
4629     get_hum_bytes(&used_bytes, &capacity_bytes,
4630                   &prev_live_bytes, &next_live_bytes);
4631     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4632   } else {
4633     type = "OLD";
4634   }
4635 
4636   _total_used_bytes      += used_bytes;
4637   _total_capacity_bytes  += capacity_bytes;
4638   _total_prev_live_bytes += prev_live_bytes;
4639   _total_next_live_bytes += next_live_bytes;
4640 
4641   // Print a line for this particular region.
4642   _out->print_cr(G1PPRL_LINE_PREFIX
4643                  G1PPRL_TYPE_FORMAT
4644                  G1PPRL_ADDR_BASE_FORMAT
4645                  G1PPRL_BYTE_FORMAT
4646                  G1PPRL_BYTE_FORMAT
4647                  G1PPRL_BYTE_FORMAT
4648                  G1PPRL_DOUBLE_FORMAT,
4649                  type, bottom, end,
4650                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4651 
4652   return false;
4653 }
4654 
4655 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4656   // Print the footer of the output.
4657   _out->print_cr(G1PPRL_LINE_PREFIX);
4658   _out->print_cr(G1PPRL_LINE_PREFIX
4659                  " SUMMARY"
4660                  G1PPRL_SUM_MB_FORMAT("capacity")
4661                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4662                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4663                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4664                  bytes_to_mb(_total_capacity_bytes),
4665                  bytes_to_mb(_total_used_bytes),
4666                  perc(_total_used_bytes, _total_capacity_bytes),
4667                  bytes_to_mb(_total_prev_live_bytes),
4668                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4669                  bytes_to_mb(_total_next_live_bytes),
4670                  perc(_total_next_live_bytes, _total_capacity_bytes));
4671   _out->cr();
4672 }