1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1RemSet.hpp"
  32 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  33 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  34 #include "gc_implementation/shared/vmGCOperations.hpp"
  35 #include "memory/genOopClosures.inline.hpp"
  36 #include "memory/referencePolicy.hpp"
  37 #include "memory/resourceArea.hpp"
  38 #include "oops/oop.inline.hpp"
  39 #include "runtime/handles.inline.hpp"
  40 #include "runtime/java.hpp"
  41 
  42 //
  43 // CMS Bit Map Wrapper
  44 
  45 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  46   _bm((uintptr_t*)NULL,0),
  47   _shifter(shifter) {
  48   _bmStartWord = (HeapWord*)(rs.base());
  49   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  50   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  51                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  52 
  53   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  54   // For now we'll just commit all of the bit map up fromt.
  55   // Later on we'll try to be more parsimonious with swap.
  56   guarantee(_virtual_space.initialize(brs, brs.size()),
  57             "couldn't reseve backing store for CMS bit map");
  58   assert(_virtual_space.committed_size() == brs.size(),
  59          "didn't reserve backing store for all of CMS bit map?");
  60   _bm.set_map((uintptr_t*)_virtual_space.low());
  61   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  62          _bmWordSize, "inconsistency in bit map sizing");
  63   _bm.set_size(_bmWordSize >> _shifter);
  64 }
  65 
  66 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  67                                                HeapWord* limit) const {
  68   // First we must round addr *up* to a possible object boundary.
  69   addr = (HeapWord*)align_size_up((intptr_t)addr,
  70                                   HeapWordSize << _shifter);
  71   size_t addrOffset = heapWordToOffset(addr);
  72   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  73   size_t limitOffset = heapWordToOffset(limit);
  74   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  75   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  76   assert(nextAddr >= addr, "get_next_one postcondition");
  77   assert(nextAddr == limit || isMarked(nextAddr),
  78          "get_next_one postcondition");
  79   return nextAddr;
  80 }
  81 
  82 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  83                                                  HeapWord* limit) const {
  84   size_t addrOffset = heapWordToOffset(addr);
  85   if (limit == NULL) limit = _bmStartWord + _bmWordSize;
  86   size_t limitOffset = heapWordToOffset(limit);
  87   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  88   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  89   assert(nextAddr >= addr, "get_next_one postcondition");
  90   assert(nextAddr == limit || !isMarked(nextAddr),
  91          "get_next_one postcondition");
  92   return nextAddr;
  93 }
  94 
  95 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
  96   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
  97   return (int) (diff >> _shifter);
  98 }
  99 
 100 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 101   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 102   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 103   if (right > left) {
 104     // Right-open interval [leftOffset, rightOffset).
 105     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 106   } else {
 107     return true;
 108   }
 109 }
 110 
 111 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 112                                              size_t    from_start_index,
 113                                              HeapWord* to_start_word,
 114                                              size_t    word_num) {
 115   _bm.mostly_disjoint_range_union(from_bitmap,
 116                                   from_start_index,
 117                                   heapWordToOffset(to_start_word),
 118                                   word_num);
 119 }
 120 
 121 #ifndef PRODUCT
 122 bool CMBitMapRO::covers(ReservedSpace rs) const {
 123   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 124   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 125          "size inconsistency");
 126   return _bmStartWord == (HeapWord*)(rs.base()) &&
 127          _bmWordSize  == rs.size()>>LogHeapWordSize;
 128 }
 129 #endif
 130 
 131 void CMBitMap::clearAll() {
 132   _bm.clear();
 133   return;
 134 }
 135 
 136 void CMBitMap::markRange(MemRegion mr) {
 137   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 138   assert(!mr.is_empty(), "unexpected empty region");
 139   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 140           ((HeapWord *) mr.end())),
 141          "markRange memory region end is not card aligned");
 142   // convert address range into offset range
 143   _bm.at_put_range(heapWordToOffset(mr.start()),
 144                    heapWordToOffset(mr.end()), true);
 145 }
 146 
 147 void CMBitMap::clearRange(MemRegion mr) {
 148   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 149   assert(!mr.is_empty(), "unexpected empty region");
 150   // convert address range into offset range
 151   _bm.at_put_range(heapWordToOffset(mr.start()),
 152                    heapWordToOffset(mr.end()), false);
 153 }
 154 
 155 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 156                                             HeapWord* end_addr) {
 157   HeapWord* start = getNextMarkedWordAddress(addr);
 158   start = MIN2(start, end_addr);
 159   HeapWord* end   = getNextUnmarkedWordAddress(start);
 160   end = MIN2(end, end_addr);
 161   assert(start <= end, "Consistency check");
 162   MemRegion mr(start, end);
 163   if (!mr.is_empty()) {
 164     clearRange(mr);
 165   }
 166   return mr;
 167 }
 168 
 169 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 170   _base(NULL), _cm(cm)
 171 #ifdef ASSERT
 172   , _drain_in_progress(false)
 173   , _drain_in_progress_yields(false)
 174 #endif
 175 {}
 176 
 177 void CMMarkStack::allocate(size_t size) {
 178   _base = NEW_C_HEAP_ARRAY(oop, size);
 179   if (_base == NULL)
 180     vm_exit_during_initialization("Failed to allocate "
 181                                   "CM region mark stack");
 182   _index = 0;
 183   // QQQQ cast ...
 184   _capacity = (jint) size;
 185   _oops_do_bound = -1;
 186   NOT_PRODUCT(_max_depth = 0);
 187 }
 188 
 189 CMMarkStack::~CMMarkStack() {
 190   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 191 }
 192 
 193 void CMMarkStack::par_push(oop ptr) {
 194   while (true) {
 195     if (isFull()) {
 196       _overflow = true;
 197       return;
 198     }
 199     // Otherwise...
 200     jint index = _index;
 201     jint next_index = index+1;
 202     jint res = Atomic::cmpxchg(next_index, &_index, index);
 203     if (res == index) {
 204       _base[index] = ptr;
 205       // Note that we don't maintain this atomically.  We could, but it
 206       // doesn't seem necessary.
 207       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 208       return;
 209     }
 210     // Otherwise, we need to try again.
 211   }
 212 }
 213 
 214 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 215   while (true) {
 216     if (isFull()) {
 217       _overflow = true;
 218       return;
 219     }
 220     // Otherwise...
 221     jint index = _index;
 222     jint next_index = index + n;
 223     if (next_index > _capacity) {
 224       _overflow = true;
 225       return;
 226     }
 227     jint res = Atomic::cmpxchg(next_index, &_index, index);
 228     if (res == index) {
 229       for (int i = 0; i < n; i++) {
 230         int ind = index + i;
 231         assert(ind < _capacity, "By overflow test above.");
 232         _base[ind] = ptr_arr[i];
 233       }
 234       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 235       return;
 236     }
 237     // Otherwise, we need to try again.
 238   }
 239 }
 240 
 241 
 242 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 243   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 244   jint start = _index;
 245   jint next_index = start + n;
 246   if (next_index > _capacity) {
 247     _overflow = true;
 248     return;
 249   }
 250   // Otherwise.
 251   _index = next_index;
 252   for (int i = 0; i < n; i++) {
 253     int ind = start + i;
 254     assert(ind < _capacity, "By overflow test above.");
 255     _base[ind] = ptr_arr[i];
 256   }
 257 }
 258 
 259 
 260 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 261   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 262   jint index = _index;
 263   if (index == 0) {
 264     *n = 0;
 265     return false;
 266   } else {
 267     int k = MIN2(max, index);
 268     jint new_ind = index - k;
 269     for (int j = 0; j < k; j++) {
 270       ptr_arr[j] = _base[new_ind + j];
 271     }
 272     _index = new_ind;
 273     *n = k;
 274     return true;
 275   }
 276 }
 277 
 278 
 279 CMRegionStack::CMRegionStack() : _base(NULL) {}
 280 
 281 void CMRegionStack::allocate(size_t size) {
 282   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 283   if (_base == NULL)
 284     vm_exit_during_initialization("Failed to allocate "
 285                                   "CM region mark stack");
 286   _index = 0;
 287   // QQQQ cast ...
 288   _capacity = (jint) size;
 289 }
 290 
 291 CMRegionStack::~CMRegionStack() {
 292   if (_base != NULL) FREE_C_HEAP_ARRAY(oop, _base);
 293 }
 294 
 295 void CMRegionStack::push_lock_free(MemRegion mr) {
 296   assert(mr.word_size() > 0, "Precondition");
 297   while (true) {
 298     jint index = _index;
 299 
 300     if (index >= _capacity) {
 301       _overflow = true;
 302       return;
 303     }
 304     // Otherwise...
 305     jint next_index = index+1;
 306     jint res = Atomic::cmpxchg(next_index, &_index, index);
 307     if (res == index) {
 308       _base[index] = mr;
 309       return;
 310     }
 311     // Otherwise, we need to try again.
 312   }
 313 }
 314 
 315 // Lock-free pop of the region stack. Called during the concurrent
 316 // marking / remark phases. Should only be called in tandem with
 317 // other lock-free pops.
 318 MemRegion CMRegionStack::pop_lock_free() {
 319   while (true) {
 320     jint index = _index;
 321 
 322     if (index == 0) {
 323       return MemRegion();
 324     }
 325     // Otherwise...
 326     jint next_index = index-1;
 327     jint res = Atomic::cmpxchg(next_index, &_index, index);
 328     if (res == index) {
 329       MemRegion mr = _base[next_index];
 330       if (mr.start() != NULL) {
 331         assert(mr.end() != NULL, "invariant");
 332         assert(mr.word_size() > 0, "invariant");
 333         return mr;
 334       } else {
 335         // that entry was invalidated... let's skip it
 336         assert(mr.end() == NULL, "invariant");
 337       }
 338     }
 339     // Otherwise, we need to try again.
 340   }
 341 }
 342 
 343 #if 0
 344 // The routines that manipulate the region stack with a lock are
 345 // not currently used. They should be retained, however, as a
 346 // diagnostic aid.
 347 
 348 void CMRegionStack::push_with_lock(MemRegion mr) {
 349   assert(mr.word_size() > 0, "Precondition");
 350   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 351 
 352   if (isFull()) {
 353     _overflow = true;
 354     return;
 355   }
 356 
 357   _base[_index] = mr;
 358   _index += 1;
 359 }
 360 
 361 MemRegion CMRegionStack::pop_with_lock() {
 362   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 363 
 364   while (true) {
 365     if (_index == 0) {
 366       return MemRegion();
 367     }
 368     _index -= 1;
 369 
 370     MemRegion mr = _base[_index];
 371     if (mr.start() != NULL) {
 372       assert(mr.end() != NULL, "invariant");
 373       assert(mr.word_size() > 0, "invariant");
 374       return mr;
 375     } else {
 376       // that entry was invalidated... let's skip it
 377       assert(mr.end() == NULL, "invariant");
 378     }
 379   }
 380 }
 381 #endif
 382 
 383 bool CMRegionStack::invalidate_entries_into_cset() {
 384   bool result = false;
 385   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 386   for (int i = 0; i < _oops_do_bound; ++i) {
 387     MemRegion mr = _base[i];
 388     if (mr.start() != NULL) {
 389       assert(mr.end() != NULL, "invariant");
 390       assert(mr.word_size() > 0, "invariant");
 391       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 392       assert(hr != NULL, "invariant");
 393       if (hr->in_collection_set()) {
 394         // The region points into the collection set
 395         _base[i] = MemRegion();
 396         result = true;
 397       }
 398     } else {
 399       // that entry was invalidated... let's skip it
 400       assert(mr.end() == NULL, "invariant");
 401     }
 402   }
 403   return result;
 404 }
 405 
 406 template<class OopClosureClass>
 407 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 408   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 409          || SafepointSynchronize::is_at_safepoint(),
 410          "Drain recursion must be yield-safe.");
 411   bool res = true;
 412   debug_only(_drain_in_progress = true);
 413   debug_only(_drain_in_progress_yields = yield_after);
 414   while (!isEmpty()) {
 415     oop newOop = pop();
 416     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 417     assert(newOop->is_oop(), "Expected an oop");
 418     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 419            "only grey objects on this stack");
 420     // iterate over the oops in this oop, marking and pushing
 421     // the ones in CMS generation.
 422     newOop->oop_iterate(cl);
 423     if (yield_after && _cm->do_yield_check()) {
 424       res = false; break;
 425     }
 426   }
 427   debug_only(_drain_in_progress = false);
 428   return res;
 429 }
 430 
 431 void CMMarkStack::oops_do(OopClosure* f) {
 432   if (_index == 0) return;
 433   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 434          "Bound must be set.");
 435   for (int i = 0; i < _oops_do_bound; i++) {
 436     f->do_oop(&_base[i]);
 437   }
 438   _oops_do_bound = -1;
 439 }
 440 
 441 bool ConcurrentMark::not_yet_marked(oop obj) const {
 442   return (_g1h->is_obj_ill(obj)
 443           || (_g1h->is_in_permanent(obj)
 444               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 445 }
 446 
 447 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 448 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 449 #endif // _MSC_VER
 450 
 451 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 452                                int max_regions) :
 453   _markBitMap1(rs, MinObjAlignment - 1),
 454   _markBitMap2(rs, MinObjAlignment - 1),
 455 
 456   _parallel_marking_threads(0),
 457   _sleep_factor(0.0),
 458   _marking_task_overhead(1.0),
 459   _cleanup_sleep_factor(0.0),
 460   _cleanup_task_overhead(1.0),
 461   _cleanup_list("Cleanup List"),
 462   _region_bm(max_regions, false /* in_resource_area*/),
 463   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 464            CardTableModRefBS::card_shift,
 465            false /* in_resource_area*/),
 466   _prevMarkBitMap(&_markBitMap1),
 467   _nextMarkBitMap(&_markBitMap2),
 468   _at_least_one_mark_complete(false),
 469 
 470   _markStack(this),
 471   _regionStack(),
 472   // _finger set in set_non_marking_state
 473 
 474   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 475   // _active_tasks set in set_non_marking_state
 476   // _tasks set inside the constructor
 477   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 478   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 479 
 480   _has_overflown(false),
 481   _concurrent(false),
 482   _has_aborted(false),
 483   _restart_for_overflow(false),
 484   _concurrent_marking_in_progress(false),
 485   _should_gray_objects(false),
 486 
 487   // _verbose_level set below
 488 
 489   _init_times(),
 490   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 491   _cleanup_times(),
 492   _total_counting_time(0.0),
 493   _total_rs_scrub_time(0.0),
 494 
 495   _parallel_workers(NULL)
 496 {
 497   CMVerboseLevel verbose_level =
 498     (CMVerboseLevel) G1MarkingVerboseLevel;
 499   if (verbose_level < no_verbose)
 500     verbose_level = no_verbose;
 501   if (verbose_level > high_verbose)
 502     verbose_level = high_verbose;
 503   _verbose_level = verbose_level;
 504 
 505   if (verbose_low())
 506     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 507                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 508 
 509   _markStack.allocate(MarkStackSize);
 510   _regionStack.allocate(G1MarkRegionStackSize);
 511 
 512   // Create & start a ConcurrentMark thread.
 513   _cmThread = new ConcurrentMarkThread(this);
 514   assert(cmThread() != NULL, "CM Thread should have been created");
 515   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 516 
 517   _g1h = G1CollectedHeap::heap();
 518   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 519   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 520   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 521 
 522   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 523   satb_qs.set_buffer_size(G1SATBBufferSize);
 524 
 525   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 526   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 527 
 528   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 529   _active_tasks = _max_task_num;
 530   for (int i = 0; i < (int) _max_task_num; ++i) {
 531     CMTaskQueue* task_queue = new CMTaskQueue();
 532     task_queue->initialize();
 533     _task_queues->register_queue(i, task_queue);
 534 
 535     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 536     _accum_task_vtime[i] = 0.0;
 537   }
 538 
 539   if (ConcGCThreads > ParallelGCThreads) {
 540     vm_exit_during_initialization("Can't have more ConcGCThreads "
 541                                   "than ParallelGCThreads.");
 542   }
 543   if (ParallelGCThreads == 0) {
 544     // if we are not running with any parallel GC threads we will not
 545     // spawn any marking threads either
 546     _parallel_marking_threads =   0;
 547     _sleep_factor             = 0.0;
 548     _marking_task_overhead    = 1.0;
 549   } else {
 550     if (ConcGCThreads > 0) {
 551       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 552       // if both are set
 553 
 554       _parallel_marking_threads = ConcGCThreads;
 555       _sleep_factor             = 0.0;
 556       _marking_task_overhead    = 1.0;
 557     } else if (G1MarkingOverheadPercent > 0) {
 558       // we will calculate the number of parallel marking threads
 559       // based on a target overhead with respect to the soft real-time
 560       // goal
 561 
 562       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 563       double overall_cm_overhead =
 564         (double) MaxGCPauseMillis * marking_overhead /
 565         (double) GCPauseIntervalMillis;
 566       double cpu_ratio = 1.0 / (double) os::processor_count();
 567       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 568       double marking_task_overhead =
 569         overall_cm_overhead / marking_thread_num *
 570                                                 (double) os::processor_count();
 571       double sleep_factor =
 572                          (1.0 - marking_task_overhead) / marking_task_overhead;
 573 
 574       _parallel_marking_threads = (size_t) marking_thread_num;
 575       _sleep_factor             = sleep_factor;
 576       _marking_task_overhead    = marking_task_overhead;
 577     } else {
 578       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 579       _sleep_factor             = 0.0;
 580       _marking_task_overhead    = 1.0;
 581     }
 582 
 583     if (parallel_marking_threads() > 1)
 584       _cleanup_task_overhead = 1.0;
 585     else
 586       _cleanup_task_overhead = marking_task_overhead();
 587     _cleanup_sleep_factor =
 588                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 589 
 590 #if 0
 591     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 592     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 593     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 594     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 595     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 596 #endif
 597 
 598     guarantee(parallel_marking_threads() > 0, "peace of mind");
 599     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 600          (int) _parallel_marking_threads, false, true);
 601     if (_parallel_workers == NULL) {
 602       vm_exit_during_initialization("Failed necessary allocation.");
 603     } else {
 604       _parallel_workers->initialize_workers();
 605     }
 606   }
 607 
 608   // so that the call below can read a sensible value
 609   _heap_start = (HeapWord*) rs.base();
 610   set_non_marking_state();
 611 }
 612 
 613 void ConcurrentMark::update_g1_committed(bool force) {
 614   // If concurrent marking is not in progress, then we do not need to
 615   // update _heap_end. This has a subtle and important
 616   // side-effect. Imagine that two evacuation pauses happen between
 617   // marking completion and remark. The first one can grow the
 618   // heap (hence now the finger is below the heap end). Then, the
 619   // second one could unnecessarily push regions on the region
 620   // stack. This causes the invariant that the region stack is empty
 621   // at the beginning of remark to be false. By ensuring that we do
 622   // not observe heap expansions after marking is complete, then we do
 623   // not have this problem.
 624   if (!concurrent_marking_in_progress() && !force)
 625     return;
 626 
 627   MemRegion committed = _g1h->g1_committed();
 628   assert(committed.start() == _heap_start, "start shouldn't change");
 629   HeapWord* new_end = committed.end();
 630   if (new_end > _heap_end) {
 631     // The heap has been expanded.
 632 
 633     _heap_end = new_end;
 634   }
 635   // Notice that the heap can also shrink. However, this only happens
 636   // during a Full GC (at least currently) and the entire marking
 637   // phase will bail out and the task will not be restarted. So, let's
 638   // do nothing.
 639 }
 640 
 641 void ConcurrentMark::reset() {
 642   // Starting values for these two. This should be called in a STW
 643   // phase. CM will be notified of any future g1_committed expansions
 644   // will be at the end of evacuation pauses, when tasks are
 645   // inactive.
 646   MemRegion committed = _g1h->g1_committed();
 647   _heap_start = committed.start();
 648   _heap_end   = committed.end();
 649 
 650   // Separated the asserts so that we know which one fires.
 651   assert(_heap_start != NULL, "heap bounds should look ok");
 652   assert(_heap_end != NULL, "heap bounds should look ok");
 653   assert(_heap_start < _heap_end, "heap bounds should look ok");
 654 
 655   // reset all the marking data structures and any necessary flags
 656   clear_marking_state();
 657 
 658   if (verbose_low())
 659     gclog_or_tty->print_cr("[global] resetting");
 660 
 661   // We do reset all of them, since different phases will use
 662   // different number of active threads. So, it's easiest to have all
 663   // of them ready.
 664   for (int i = 0; i < (int) _max_task_num; ++i) {
 665     _tasks[i]->reset(_nextMarkBitMap);
 666   }
 667 
 668   // we need this to make sure that the flag is on during the evac
 669   // pause with initial mark piggy-backed
 670   set_concurrent_marking_in_progress();
 671 }
 672 
 673 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 674   assert(active_tasks <= _max_task_num, "we should not have more");
 675 
 676   _active_tasks = active_tasks;
 677   // Need to update the three data structures below according to the
 678   // number of active threads for this phase.
 679   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 680   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 681   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 682 
 683   _concurrent = concurrent;
 684   // We propagate this to all tasks, not just the active ones.
 685   for (int i = 0; i < (int) _max_task_num; ++i)
 686     _tasks[i]->set_concurrent(concurrent);
 687 
 688   if (concurrent) {
 689     set_concurrent_marking_in_progress();
 690   } else {
 691     // We currently assume that the concurrent flag has been set to
 692     // false before we start remark. At this point we should also be
 693     // in a STW phase.
 694     assert(!concurrent_marking_in_progress(), "invariant");
 695     assert(_finger == _heap_end, "only way to get here");
 696     update_g1_committed(true);
 697   }
 698 }
 699 
 700 void ConcurrentMark::set_non_marking_state() {
 701   // We set the global marking state to some default values when we're
 702   // not doing marking.
 703   clear_marking_state();
 704   _active_tasks = 0;
 705   clear_concurrent_marking_in_progress();
 706 }
 707 
 708 ConcurrentMark::~ConcurrentMark() {
 709   for (int i = 0; i < (int) _max_task_num; ++i) {
 710     delete _task_queues->queue(i);
 711     delete _tasks[i];
 712   }
 713   delete _task_queues;
 714   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 715 }
 716 
 717 // This closure is used to mark refs into the g1 generation
 718 // from external roots in the CMS bit map.
 719 // Called at the first checkpoint.
 720 //
 721 
 722 void ConcurrentMark::clearNextBitmap() {
 723   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 724   G1CollectorPolicy* g1p = g1h->g1_policy();
 725 
 726   // Make sure that the concurrent mark thread looks to still be in
 727   // the current cycle.
 728   guarantee(cmThread()->during_cycle(), "invariant");
 729 
 730   // We are finishing up the current cycle by clearing the next
 731   // marking bitmap and getting it ready for the next cycle. During
 732   // this time no other cycle can start. So, let's make sure that this
 733   // is the case.
 734   guarantee(!g1h->mark_in_progress(), "invariant");
 735 
 736   // clear the mark bitmap (no grey objects to start with).
 737   // We need to do this in chunks and offer to yield in between
 738   // each chunk.
 739   HeapWord* start  = _nextMarkBitMap->startWord();
 740   HeapWord* end    = _nextMarkBitMap->endWord();
 741   HeapWord* cur    = start;
 742   size_t chunkSize = M;
 743   while (cur < end) {
 744     HeapWord* next = cur + chunkSize;
 745     if (next > end)
 746       next = end;
 747     MemRegion mr(cur,next);
 748     _nextMarkBitMap->clearRange(mr);
 749     cur = next;
 750     do_yield_check();
 751 
 752     // Repeat the asserts from above. We'll do them as asserts here to
 753     // minimize their overhead on the product. However, we'll have
 754     // them as guarantees at the beginning / end of the bitmap
 755     // clearing to get some checking in the product.
 756     assert(cmThread()->during_cycle(), "invariant");
 757     assert(!g1h->mark_in_progress(), "invariant");
 758   }
 759 
 760   // Repeat the asserts from above.
 761   guarantee(cmThread()->during_cycle(), "invariant");
 762   guarantee(!g1h->mark_in_progress(), "invariant");
 763 }
 764 
 765 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 766 public:
 767   bool doHeapRegion(HeapRegion* r) {
 768     if (!r->continuesHumongous()) {
 769       r->note_start_of_marking(true);
 770     }
 771     return false;
 772   }
 773 };
 774 
 775 void ConcurrentMark::checkpointRootsInitialPre() {
 776   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 777   G1CollectorPolicy* g1p = g1h->g1_policy();
 778 
 779   _has_aborted = false;
 780 
 781 #ifndef PRODUCT
 782   if (G1PrintReachableAtInitialMark) {
 783     print_reachable("at-cycle-start",
 784                     true /* use_prev_marking */, true /* all */);
 785   }
 786 #endif
 787 
 788   // Initialise marking structures. This has to be done in a STW phase.
 789   reset();
 790 }
 791 
 792 class CMMarkRootsClosure: public OopsInGenClosure {
 793 private:
 794   ConcurrentMark*  _cm;
 795   G1CollectedHeap* _g1h;
 796   bool             _do_barrier;
 797 
 798 public:
 799   CMMarkRootsClosure(ConcurrentMark* cm,
 800                      G1CollectedHeap* g1h,
 801                      bool do_barrier) : _cm(cm), _g1h(g1h),
 802                                         _do_barrier(do_barrier) { }
 803 
 804   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
 805   virtual void do_oop(      oop* p) { do_oop_work(p); }
 806 
 807   template <class T> void do_oop_work(T* p) {
 808     T heap_oop = oopDesc::load_heap_oop(p);
 809     if (!oopDesc::is_null(heap_oop)) {
 810       oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
 811       assert(obj->is_oop() || obj->mark() == NULL,
 812              "expected an oop, possibly with mark word displaced");
 813       HeapWord* addr = (HeapWord*)obj;
 814       if (_g1h->is_in_g1_reserved(addr)) {
 815         _cm->grayRoot(obj);
 816       }
 817     }
 818     if (_do_barrier) {
 819       assert(!_g1h->is_in_g1_reserved(p),
 820              "Should be called on external roots");
 821       do_barrier(p);
 822     }
 823   }
 824 };
 825 
 826 void ConcurrentMark::checkpointRootsInitialPost() {
 827   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 828 
 829   // For each region note start of marking.
 830   NoteStartOfMarkHRClosure startcl;
 831   g1h->heap_region_iterate(&startcl);
 832 
 833   // Start weak-reference discovery.
 834   ReferenceProcessor* rp = g1h->ref_processor();
 835   rp->verify_no_references_recorded();
 836   rp->enable_discovery(); // enable ("weak") refs discovery
 837   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 838 
 839   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 840   // This is the start of  the marking cycle, we're expected all
 841   // threads to have SATB queues with active set to false.
 842   satb_mq_set.set_active_all_threads(true, /* new active value */
 843                                      false /* expected_active */);
 844 
 845   // update_g1_committed() will be called at the end of an evac pause
 846   // when marking is on. So, it's also called at the end of the
 847   // initial-mark pause to update the heap end, if the heap expands
 848   // during it. No need to call it here.
 849 }
 850 
 851 // Checkpoint the roots into this generation from outside
 852 // this generation. [Note this initial checkpoint need only
 853 // be approximate -- we'll do a catch up phase subsequently.]
 854 void ConcurrentMark::checkpointRootsInitial() {
 855   assert(SafepointSynchronize::is_at_safepoint(), "world should be stopped");
 856   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 857 
 858   double start = os::elapsedTime();
 859 
 860   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
 861   g1p->record_concurrent_mark_init_start();
 862   checkpointRootsInitialPre();
 863 
 864   // YSR: when concurrent precleaning is in place, we'll
 865   // need to clear the cached card table here
 866 
 867   ResourceMark rm;
 868   HandleMark  hm;
 869 
 870   g1h->ensure_parsability(false);
 871   g1h->perm_gen()->save_marks();
 872 
 873   CMMarkRootsClosure notOlder(this, g1h, false);
 874   CMMarkRootsClosure older(this, g1h, true);
 875 
 876   g1h->set_marking_started();
 877   g1h->rem_set()->prepare_for_younger_refs_iterate(false);
 878 
 879   g1h->process_strong_roots(true,    // activate StrongRootsScope
 880                             false,   // fake perm gen collection
 881                             SharedHeap::SO_AllClasses,
 882                             &notOlder, // Regular roots
 883                             NULL,     // do not visit active blobs
 884                             &older    // Perm Gen Roots
 885                             );
 886   checkpointRootsInitialPost();
 887 
 888   // Statistics.
 889   double end = os::elapsedTime();
 890   _init_times.add((end - start) * 1000.0);
 891 
 892   g1p->record_concurrent_mark_init_end();
 893 }
 894 
 895 /*
 896    Notice that in the next two methods, we actually leave the STS
 897    during the barrier sync and join it immediately afterwards. If we
 898    do not do this, this then the following deadlock can occur: one
 899    thread could be in the barrier sync code, waiting for the other
 900    thread to also sync up, whereas another one could be trying to
 901    yield, while also waiting for the other threads to sync up too.
 902 
 903    Because the thread that does the sync barrier has left the STS, it
 904    is possible to be suspended for a Full GC or an evacuation pause
 905    could occur. This is actually safe, since the entering the sync
 906    barrier is one of the last things do_marking_step() does, and it
 907    doesn't manipulate any data structures afterwards.
 908 */
 909 
 910 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 911   if (verbose_low())
 912     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 913 
 914   ConcurrentGCThread::stsLeave();
 915   _first_overflow_barrier_sync.enter();
 916   ConcurrentGCThread::stsJoin();
 917   // at this point everyone should have synced up and not be doing any
 918   // more work
 919 
 920   if (verbose_low())
 921     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 922 
 923   // let task 0 do this
 924   if (task_num == 0) {
 925     // task 0 is responsible for clearing the global data structures
 926     clear_marking_state();
 927 
 928     if (PrintGC) {
 929       gclog_or_tty->date_stamp(PrintGCDateStamps);
 930       gclog_or_tty->stamp(PrintGCTimeStamps);
 931       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 932     }
 933   }
 934 
 935   // after this, each task should reset its own data structures then
 936   // then go into the second barrier
 937 }
 938 
 939 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 940   if (verbose_low())
 941     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 942 
 943   ConcurrentGCThread::stsLeave();
 944   _second_overflow_barrier_sync.enter();
 945   ConcurrentGCThread::stsJoin();
 946   // at this point everything should be re-initialised and ready to go
 947 
 948   if (verbose_low())
 949     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 950 }
 951 
 952 void ConcurrentMark::grayRoot(oop p) {
 953   HeapWord* addr = (HeapWord*) p;
 954   // We can't really check against _heap_start and _heap_end, since it
 955   // is possible during an evacuation pause with piggy-backed
 956   // initial-mark that the committed space is expanded during the
 957   // pause without CM observing this change. So the assertions below
 958   // is a bit conservative; but better than nothing.
 959   assert(_g1h->g1_committed().contains(addr),
 960          "address should be within the heap bounds");
 961 
 962   if (!_nextMarkBitMap->isMarked(addr))
 963     _nextMarkBitMap->parMark(addr);
 964 }
 965 
 966 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 967   // The objects on the region have already been marked "in bulk" by
 968   // the caller. We only need to decide whether to push the region on
 969   // the region stack or not.
 970 
 971   if (!concurrent_marking_in_progress() || !_should_gray_objects)
 972     // We're done with marking and waiting for remark. We do not need to
 973     // push anything else on the region stack.
 974     return;
 975 
 976   HeapWord* finger = _finger;
 977 
 978   if (verbose_low())
 979     gclog_or_tty->print_cr("[global] attempting to push "
 980                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 981                            PTR_FORMAT, mr.start(), mr.end(), finger);
 982 
 983   if (mr.start() < finger) {
 984     // The finger is always heap region aligned and it is not possible
 985     // for mr to span heap regions.
 986     assert(mr.end() <= finger, "invariant");
 987 
 988     // Separated the asserts so that we know which one fires.
 989     assert(mr.start() <= mr.end(),
 990            "region boundaries should fall within the committed space");
 991     assert(_heap_start <= mr.start(),
 992            "region boundaries should fall within the committed space");
 993     assert(mr.end() <= _heap_end,
 994            "region boundaries should fall within the committed space");
 995     if (verbose_low())
 996       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 997                              "below the finger, pushing it",
 998                              mr.start(), mr.end());
 999 
1000     if (!region_stack_push_lock_free(mr)) {
1001       if (verbose_low())
1002         gclog_or_tty->print_cr("[global] region stack has overflown.");
1003     }
1004   }
1005 }
1006 
1007 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1008   // The object is not marked by the caller. We need to at least mark
1009   // it and maybe push in on the stack.
1010 
1011   HeapWord* addr = (HeapWord*)p;
1012   if (!_nextMarkBitMap->isMarked(addr)) {
1013     // We definitely need to mark it, irrespective whether we bail out
1014     // because we're done with marking.
1015     if (_nextMarkBitMap->parMark(addr)) {
1016       if (!concurrent_marking_in_progress() || !_should_gray_objects)
1017         // If we're done with concurrent marking and we're waiting for
1018         // remark, then we're not pushing anything on the stack.
1019         return;
1020 
1021       // No OrderAccess:store_load() is needed. It is implicit in the
1022       // CAS done in parMark(addr) above
1023       HeapWord* finger = _finger;
1024 
1025       if (addr < finger) {
1026         if (!mark_stack_push(oop(addr))) {
1027           if (verbose_low())
1028             gclog_or_tty->print_cr("[global] global stack overflow "
1029                                    "during parMark");
1030         }
1031       }
1032     }
1033   }
1034 }
1035 
1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 private:
1038   ConcurrentMark*       _cm;
1039   ConcurrentMarkThread* _cmt;
1040 
1041 public:
1042   void work(int worker_i) {
1043     assert(Thread::current()->is_ConcurrentGC_thread(),
1044            "this should only be done by a conc GC thread");
1045     ResourceMark rm;
1046 
1047     double start_vtime = os::elapsedVTime();
1048 
1049     ConcurrentGCThread::stsJoin();
1050 
1051     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052     CMTask* the_task = _cm->task(worker_i);
1053     the_task->record_start_time();
1054     if (!_cm->has_aborted()) {
1055       do {
1056         double start_vtime_sec = os::elapsedVTime();
1057         double start_time_sec = os::elapsedTime();
1058         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 
1060         the_task->do_marking_step(mark_step_duration_ms,
1061                                   true /* do_stealing    */,
1062                                   true /* do_termination */);
1063 
1064         double end_time_sec = os::elapsedTime();
1065         double end_vtime_sec = os::elapsedVTime();
1066         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067         double elapsed_time_sec = end_time_sec - start_time_sec;
1068         _cm->clear_has_overflown();
1069 
1070         bool ret = _cm->do_yield_check(worker_i);
1071 
1072         jlong sleep_time_ms;
1073         if (!_cm->has_aborted() && the_task->has_aborted()) {
1074           sleep_time_ms =
1075             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076           ConcurrentGCThread::stsLeave();
1077           os::sleep(Thread::current(), sleep_time_ms, false);
1078           ConcurrentGCThread::stsJoin();
1079         }
1080         double end_time2_sec = os::elapsedTime();
1081         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 
1083 #if 0
1084           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085                                  "overhead %1.4lf",
1086                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 #endif
1091       } while (!_cm->has_aborted() && the_task->has_aborted());
1092     }
1093     the_task->record_end_time();
1094     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 
1096     ConcurrentGCThread::stsLeave();
1097 
1098     double end_vtime = os::elapsedVTime();
1099     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100   }
1101 
1102   CMConcurrentMarkingTask(ConcurrentMark* cm,
1103                           ConcurrentMarkThread* cmt) :
1104       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 
1106   ~CMConcurrentMarkingTask() { }
1107 };
1108 
1109 void ConcurrentMark::markFromRoots() {
1110   // we might be tempted to assert that:
1111   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112   //        "inconsistent argument?");
1113   // However that wouldn't be right, because it's possible that
1114   // a safepoint is indeed in progress as a younger generation
1115   // stop-the-world GC happens even as we mark in this generation.
1116 
1117   _restart_for_overflow = false;
1118 
1119   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120   set_phase(active_workers, true /* concurrent */);
1121 
1122   CMConcurrentMarkingTask markingTask(this, cmThread());
1123   if (parallel_marking_threads() > 0)
1124     _parallel_workers->run_task(&markingTask);
1125   else
1126     markingTask.work(0);
1127   print_stats();
1128 }
1129 
1130 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1131   // world is stopped at this checkpoint
1132   assert(SafepointSynchronize::is_at_safepoint(),
1133          "world should be stopped");
1134   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1135 
1136   // If a full collection has happened, we shouldn't do this.
1137   if (has_aborted()) {
1138     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1139     return;
1140   }
1141 
1142   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1143 
1144   if (VerifyDuringGC) {
1145     HandleMark hm;  // handle scope
1146     gclog_or_tty->print(" VerifyDuringGC:(before)");
1147     Universe::heap()->prepare_for_verify();
1148     Universe::verify(true, false, true);
1149   }
1150 
1151   G1CollectorPolicy* g1p = g1h->g1_policy();
1152   g1p->record_concurrent_mark_remark_start();
1153 
1154   double start = os::elapsedTime();
1155 
1156   checkpointRootsFinalWork();
1157 
1158   double mark_work_end = os::elapsedTime();
1159 
1160   weakRefsWork(clear_all_soft_refs);
1161 
1162   if (has_overflown()) {
1163     // Oops.  We overflowed.  Restart concurrent marking.
1164     _restart_for_overflow = true;
1165     // Clear the flag. We do not need it any more.
1166     clear_has_overflown();
1167     if (G1TraceMarkStackOverflow)
1168       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1169   } else {
1170     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1171     // We're done with marking.
1172     // This is the end of  the marking cycle, we're expected all
1173     // threads to have SATB queues with active set to true.
1174     satb_mq_set.set_active_all_threads(false, /* new active value */
1175                                        true /* expected_active */);
1176 
1177     if (VerifyDuringGC) {
1178       HandleMark hm;  // handle scope
1179       gclog_or_tty->print(" VerifyDuringGC:(after)");
1180       Universe::heap()->prepare_for_verify();
1181       Universe::heap()->verify(/* allow_dirty */      true,
1182                                /* silent */           false,
1183                                /* use_prev_marking */ false);
1184     }
1185     assert(!restart_for_overflow(), "sanity");
1186   }
1187 
1188   // Reset the marking state if marking completed
1189   if (!restart_for_overflow()) {
1190     set_non_marking_state();
1191   }
1192 
1193 #if VERIFY_OBJS_PROCESSED
1194   _scan_obj_cl.objs_processed = 0;
1195   ThreadLocalObjQueue::objs_enqueued = 0;
1196 #endif
1197 
1198   // Statistics
1199   double now = os::elapsedTime();
1200   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1201   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1202   _remark_times.add((now - start) * 1000.0);
1203 
1204   g1p->record_concurrent_mark_remark_end();
1205 }
1206 
1207 
1208 #define CARD_BM_TEST_MODE 0
1209 
1210 class CalcLiveObjectsClosure: public HeapRegionClosure {
1211 
1212   CMBitMapRO* _bm;
1213   ConcurrentMark* _cm;
1214   bool _changed;
1215   bool _yield;
1216   size_t _words_done;
1217   size_t _tot_live;
1218   size_t _tot_used;
1219   size_t _regions_done;
1220   double _start_vtime_sec;
1221 
1222   BitMap* _region_bm;
1223   BitMap* _card_bm;
1224   intptr_t _bottom_card_num;
1225   bool _final;
1226 
1227   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1228     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1229 #if CARD_BM_TEST_MODE
1230       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1231 #else
1232       _card_bm->par_at_put(i - _bottom_card_num, 1);
1233 #endif
1234     }
1235   }
1236 
1237 public:
1238   CalcLiveObjectsClosure(bool final,
1239                          CMBitMapRO *bm, ConcurrentMark *cm,
1240                          BitMap* region_bm, BitMap* card_bm) :
1241     _bm(bm), _cm(cm), _changed(false), _yield(true),
1242     _words_done(0), _tot_live(0), _tot_used(0),
1243     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1244     _regions_done(0), _start_vtime_sec(0.0)
1245   {
1246     _bottom_card_num =
1247       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1248                CardTableModRefBS::card_shift);
1249   }
1250 
1251   // It takes a region that's not empty (i.e., it has at least one
1252   // live object in it and sets its corresponding bit on the region
1253   // bitmap to 1. If the region is "starts humongous" it will also set
1254   // to 1 the bits on the region bitmap that correspond to its
1255   // associated "continues humongous" regions.
1256   void set_bit_for_region(HeapRegion* hr) {
1257     assert(!hr->continuesHumongous(), "should have filtered those out");
1258 
1259     size_t index = hr->hrs_index();
1260     if (!hr->startsHumongous()) {
1261       // Normal (non-humongous) case: just set the bit.
1262       _region_bm->par_at_put((BitMap::idx_t) index, true);
1263     } else {
1264       // Starts humongous case: calculate how many regions are part of
1265       // this humongous region and then set the bit range. It might
1266       // have been a bit more efficient to look at the object that
1267       // spans these humongous regions to calculate their number from
1268       // the object's size. However, it's a good idea to calculate
1269       // this based on the metadata itself, and not the region
1270       // contents, so that this code is not aware of what goes into
1271       // the humongous regions (in case this changes in the future).
1272       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1273       size_t end_index = index + 1;
1274       while (end_index < g1h->n_regions()) {
1275         HeapRegion* chr = g1h->region_at(end_index);
1276         if (!chr->continuesHumongous()) {
1277           break;
1278         }
1279         end_index += 1;
1280       }
1281       _region_bm->par_at_put_range((BitMap::idx_t) index,
1282                                    (BitMap::idx_t) end_index, true);
1283     }
1284   }
1285 
1286   bool doHeapRegion(HeapRegion* hr) {
1287     if (!_final && _regions_done == 0)
1288       _start_vtime_sec = os::elapsedVTime();
1289 
1290     if (hr->continuesHumongous()) {
1291       // We will ignore these here and process them when their
1292       // associated "starts humongous" region is processed (see
1293       // set_bit_for_heap_region()). Note that we cannot rely on their
1294       // associated "starts humongous" region to have their bit set to
1295       // 1 since, due to the region chunking in the parallel region
1296       // iteration, a "continues humongous" region might be visited
1297       // before its associated "starts humongous".
1298       return false;
1299     }
1300 
1301     HeapWord* nextTop = hr->next_top_at_mark_start();
1302     HeapWord* start   = hr->top_at_conc_mark_count();
1303     assert(hr->bottom() <= start && start <= hr->end() &&
1304            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1305            start <= nextTop,
1306            "Preconditions.");
1307     // Otherwise, record the number of word's we'll examine.
1308     size_t words_done = (nextTop - start);
1309     // Find the first marked object at or after "start".
1310     start = _bm->getNextMarkedWordAddress(start, nextTop);
1311     size_t marked_bytes = 0;
1312 
1313     // Below, the term "card num" means the result of shifting an address
1314     // by the card shift -- address 0 corresponds to card number 0.  One
1315     // must subtract the card num of the bottom of the heap to obtain a
1316     // card table index.
1317     // The first card num of the sequence of live cards currently being
1318     // constructed.  -1 ==> no sequence.
1319     intptr_t start_card_num = -1;
1320     // The last card num of the sequence of live cards currently being
1321     // constructed.  -1 ==> no sequence.
1322     intptr_t last_card_num = -1;
1323 
1324     while (start < nextTop) {
1325       if (_yield && _cm->do_yield_check()) {
1326         // We yielded.  It might be for a full collection, in which case
1327         // all bets are off; terminate the traversal.
1328         if (_cm->has_aborted()) {
1329           _changed = false;
1330           return true;
1331         } else {
1332           // Otherwise, it might be a collection pause, and the region
1333           // we're looking at might be in the collection set.  We'll
1334           // abandon this region.
1335           return false;
1336         }
1337       }
1338       oop obj = oop(start);
1339       int obj_sz = obj->size();
1340       // The card num of the start of the current object.
1341       intptr_t obj_card_num =
1342         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1343 
1344       HeapWord* obj_last = start + obj_sz - 1;
1345       intptr_t obj_last_card_num =
1346         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1347 
1348       if (obj_card_num != last_card_num) {
1349         if (start_card_num == -1) {
1350           assert(last_card_num == -1, "Both or neither.");
1351           start_card_num = obj_card_num;
1352         } else {
1353           assert(last_card_num != -1, "Both or neither.");
1354           assert(obj_card_num >= last_card_num, "Inv");
1355           if ((obj_card_num - last_card_num) > 1) {
1356             // Mark the last run, and start a new one.
1357             mark_card_num_range(start_card_num, last_card_num);
1358             start_card_num = obj_card_num;
1359           }
1360         }
1361 #if CARD_BM_TEST_MODE
1362         /*
1363         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1364                                obj_card_num - _bottom_card_num,
1365                                obj_last_card_num - _bottom_card_num);
1366         */
1367         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1368           _card_bm->par_at_put(j - _bottom_card_num, 1);
1369         }
1370 #endif
1371       }
1372       // In any case, we set the last card num.
1373       last_card_num = obj_last_card_num;
1374 
1375       marked_bytes += (size_t)obj_sz * HeapWordSize;
1376       // Find the next marked object after this one.
1377       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1378       _changed = true;
1379     }
1380     // Handle the last range, if any.
1381     if (start_card_num != -1)
1382       mark_card_num_range(start_card_num, last_card_num);
1383     if (_final) {
1384       // Mark the allocated-since-marking portion...
1385       HeapWord* tp = hr->top();
1386       if (nextTop < tp) {
1387         start_card_num =
1388           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1389         last_card_num =
1390           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1391         mark_card_num_range(start_card_num, last_card_num);
1392         // This definitely means the region has live objects.
1393         set_bit_for_region(hr);
1394       }
1395     }
1396 
1397     hr->add_to_marked_bytes(marked_bytes);
1398     // Update the live region bitmap.
1399     if (marked_bytes > 0) {
1400       set_bit_for_region(hr);
1401     }
1402     hr->set_top_at_conc_mark_count(nextTop);
1403     _tot_live += hr->next_live_bytes();
1404     _tot_used += hr->used();
1405     _words_done = words_done;
1406 
1407     if (!_final) {
1408       ++_regions_done;
1409       if (_regions_done % 10 == 0) {
1410         double end_vtime_sec = os::elapsedVTime();
1411         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1412         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1413           jlong sleep_time_ms =
1414             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1415           os::sleep(Thread::current(), sleep_time_ms, false);
1416           _start_vtime_sec = end_vtime_sec;
1417         }
1418       }
1419     }
1420 
1421     return false;
1422   }
1423 
1424   bool changed() { return _changed;  }
1425   void reset()   { _changed = false; _words_done = 0; }
1426   void no_yield() { _yield = false; }
1427   size_t words_done() { return _words_done; }
1428   size_t tot_live() { return _tot_live; }
1429   size_t tot_used() { return _tot_used; }
1430 };
1431 
1432 
1433 void ConcurrentMark::calcDesiredRegions() {
1434   _region_bm.clear();
1435   _card_bm.clear();
1436   CalcLiveObjectsClosure calccl(false /*final*/,
1437                                 nextMarkBitMap(), this,
1438                                 &_region_bm, &_card_bm);
1439   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1440   g1h->heap_region_iterate(&calccl);
1441 
1442   do {
1443     calccl.reset();
1444     g1h->heap_region_iterate(&calccl);
1445   } while (calccl.changed());
1446 }
1447 
1448 class G1ParFinalCountTask: public AbstractGangTask {
1449 protected:
1450   G1CollectedHeap* _g1h;
1451   CMBitMap* _bm;
1452   size_t _n_workers;
1453   size_t *_live_bytes;
1454   size_t *_used_bytes;
1455   BitMap* _region_bm;
1456   BitMap* _card_bm;
1457 public:
1458   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1459                       BitMap* region_bm, BitMap* card_bm) :
1460     AbstractGangTask("G1 final counting"), _g1h(g1h),
1461     _bm(bm), _region_bm(region_bm), _card_bm(card_bm)
1462   {
1463     if (ParallelGCThreads > 0)
1464       _n_workers = _g1h->workers()->total_workers();
1465     else
1466       _n_workers = 1;
1467     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1468     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1469   }
1470 
1471   ~G1ParFinalCountTask() {
1472     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1473     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1474   }
1475 
1476   void work(int i) {
1477     CalcLiveObjectsClosure calccl(true /*final*/,
1478                                   _bm, _g1h->concurrent_mark(),
1479                                   _region_bm, _card_bm);
1480     calccl.no_yield();
1481     if (G1CollectedHeap::use_parallel_gc_threads()) {
1482       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1483                                             HeapRegion::FinalCountClaimValue);
1484     } else {
1485       _g1h->heap_region_iterate(&calccl);
1486     }
1487     assert(calccl.complete(), "Shouldn't have yielded!");
1488 
1489     assert((size_t) i < _n_workers, "invariant");
1490     _live_bytes[i] = calccl.tot_live();
1491     _used_bytes[i] = calccl.tot_used();
1492   }
1493   size_t live_bytes()  {
1494     size_t live_bytes = 0;
1495     for (size_t i = 0; i < _n_workers; ++i)
1496       live_bytes += _live_bytes[i];
1497     return live_bytes;
1498   }
1499   size_t used_bytes()  {
1500     size_t used_bytes = 0;
1501     for (size_t i = 0; i < _n_workers; ++i)
1502       used_bytes += _used_bytes[i];
1503     return used_bytes;
1504   }
1505 };
1506 
1507 class G1ParNoteEndTask;
1508 
1509 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1510   G1CollectedHeap* _g1;
1511   int _worker_num;
1512   size_t _max_live_bytes;
1513   size_t _regions_claimed;
1514   size_t _freed_bytes;
1515   FreeRegionList* _local_cleanup_list;
1516   HumongousRegionSet* _humongous_proxy_set;
1517   HRRSCleanupTask* _hrrs_cleanup_task;
1518   double _claimed_region_time;
1519   double _max_region_time;
1520 
1521 public:
1522   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1523                              int worker_num,
1524                              FreeRegionList* local_cleanup_list,
1525                              HumongousRegionSet* humongous_proxy_set,
1526                              HRRSCleanupTask* hrrs_cleanup_task);
1527   size_t freed_bytes() { return _freed_bytes; }
1528 
1529   bool doHeapRegion(HeapRegion *r);
1530 
1531   size_t max_live_bytes() { return _max_live_bytes; }
1532   size_t regions_claimed() { return _regions_claimed; }
1533   double claimed_region_time_sec() { return _claimed_region_time; }
1534   double max_region_time_sec() { return _max_region_time; }
1535 };
1536 
1537 class G1ParNoteEndTask: public AbstractGangTask {
1538   friend class G1NoteEndOfConcMarkClosure;
1539 
1540 protected:
1541   G1CollectedHeap* _g1h;
1542   size_t _max_live_bytes;
1543   size_t _freed_bytes;
1544   FreeRegionList* _cleanup_list;
1545 
1546 public:
1547   G1ParNoteEndTask(G1CollectedHeap* g1h,
1548                    FreeRegionList* cleanup_list) :
1549     AbstractGangTask("G1 note end"), _g1h(g1h),
1550     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1551 
1552   void work(int i) {
1553     double start = os::elapsedTime();
1554     FreeRegionList local_cleanup_list("Local Cleanup List");
1555     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1556     HRRSCleanupTask hrrs_cleanup_task;
1557     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1558                                            &humongous_proxy_set,
1559                                            &hrrs_cleanup_task);
1560     if (G1CollectedHeap::use_parallel_gc_threads()) {
1561       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1562                                             HeapRegion::NoteEndClaimValue);
1563     } else {
1564       _g1h->heap_region_iterate(&g1_note_end);
1565     }
1566     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1567 
1568     // Now update the lists
1569     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1570                                             NULL /* free_list */,
1571                                             &humongous_proxy_set,
1572                                             true /* par */);
1573     {
1574       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1575       _max_live_bytes += g1_note_end.max_live_bytes();
1576       _freed_bytes += g1_note_end.freed_bytes();
1577 
1578       _cleanup_list->add_as_tail(&local_cleanup_list);
1579       assert(local_cleanup_list.is_empty(), "post-condition");
1580 
1581       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1582     }
1583     double end = os::elapsedTime();
1584     if (G1PrintParCleanupStats) {
1585       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1586                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1587                           i, start, end, (end-start)*1000.0,
1588                           g1_note_end.regions_claimed(),
1589                           g1_note_end.claimed_region_time_sec()*1000.0,
1590                           g1_note_end.max_region_time_sec()*1000.0);
1591     }
1592   }
1593   size_t max_live_bytes() { return _max_live_bytes; }
1594   size_t freed_bytes() { return _freed_bytes; }
1595 };
1596 
1597 class G1ParScrubRemSetTask: public AbstractGangTask {
1598 protected:
1599   G1RemSet* _g1rs;
1600   BitMap* _region_bm;
1601   BitMap* _card_bm;
1602 public:
1603   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1604                        BitMap* region_bm, BitMap* card_bm) :
1605     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1606     _region_bm(region_bm), _card_bm(card_bm)
1607   {}
1608 
1609   void work(int i) {
1610     if (G1CollectedHeap::use_parallel_gc_threads()) {
1611       _g1rs->scrub_par(_region_bm, _card_bm, i,
1612                        HeapRegion::ScrubRemSetClaimValue);
1613     } else {
1614       _g1rs->scrub(_region_bm, _card_bm);
1615     }
1616   }
1617 
1618 };
1619 
1620 G1NoteEndOfConcMarkClosure::
1621 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1622                            int worker_num,
1623                            FreeRegionList* local_cleanup_list,
1624                            HumongousRegionSet* humongous_proxy_set,
1625                            HRRSCleanupTask* hrrs_cleanup_task)
1626   : _g1(g1), _worker_num(worker_num),
1627     _max_live_bytes(0), _regions_claimed(0),
1628     _freed_bytes(0),
1629     _claimed_region_time(0.0), _max_region_time(0.0),
1630     _local_cleanup_list(local_cleanup_list),
1631     _humongous_proxy_set(humongous_proxy_set),
1632     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1633 
1634 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1635   // We use a claim value of zero here because all regions
1636   // were claimed with value 1 in the FinalCount task.
1637   hr->reset_gc_time_stamp();
1638   if (!hr->continuesHumongous()) {
1639     double start = os::elapsedTime();
1640     _regions_claimed++;
1641     hr->note_end_of_marking();
1642     _max_live_bytes += hr->max_live_bytes();
1643     _g1->free_region_if_empty(hr,
1644                               &_freed_bytes,
1645                               _local_cleanup_list,
1646                               _humongous_proxy_set,
1647                               _hrrs_cleanup_task,
1648                               true /* par */);
1649     double region_time = (os::elapsedTime() - start);
1650     _claimed_region_time += region_time;
1651     if (region_time > _max_region_time) _max_region_time = region_time;
1652   }
1653   return false;
1654 }
1655 
1656 void ConcurrentMark::cleanup() {
1657   // world is stopped at this checkpoint
1658   assert(SafepointSynchronize::is_at_safepoint(),
1659          "world should be stopped");
1660   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1661 
1662   // If a full collection has happened, we shouldn't do this.
1663   if (has_aborted()) {
1664     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1665     return;
1666   }
1667 
1668   g1h->verify_region_sets_optional();
1669 
1670   if (VerifyDuringGC) {
1671     HandleMark hm;  // handle scope
1672     gclog_or_tty->print(" VerifyDuringGC:(before)");
1673     Universe::heap()->prepare_for_verify();
1674     Universe::verify(/* allow dirty  */ true,
1675                      /* silent       */ false,
1676                      /* prev marking */ true);
1677   }
1678 
1679   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1680   g1p->record_concurrent_mark_cleanup_start();
1681 
1682   double start = os::elapsedTime();
1683 
1684   HeapRegionRemSet::reset_for_cleanup_tasks();
1685 
1686   // Do counting once more with the world stopped for good measure.
1687   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1688                                         &_region_bm, &_card_bm);
1689   if (G1CollectedHeap::use_parallel_gc_threads()) {
1690     assert(g1h->check_heap_region_claim_values(
1691                                                HeapRegion::InitialClaimValue),
1692            "sanity check");
1693 
1694     int n_workers = g1h->workers()->total_workers();
1695     g1h->set_par_threads(n_workers);
1696     g1h->workers()->run_task(&g1_par_count_task);
1697     g1h->set_par_threads(0);
1698 
1699     assert(g1h->check_heap_region_claim_values(
1700                                              HeapRegion::FinalCountClaimValue),
1701            "sanity check");
1702   } else {
1703     g1_par_count_task.work(0);
1704   }
1705 
1706   size_t known_garbage_bytes =
1707     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1708 #if 0
1709   gclog_or_tty->print_cr("used %1.2lf, live %1.2lf, garbage %1.2lf",
1710                          (double) g1_par_count_task.used_bytes() / (double) (1024 * 1024),
1711                          (double) g1_par_count_task.live_bytes() / (double) (1024 * 1024),
1712                          (double) known_garbage_bytes / (double) (1024 * 1024));
1713 #endif // 0
1714   g1p->set_known_garbage_bytes(known_garbage_bytes);
1715 
1716   size_t start_used_bytes = g1h->used();
1717   _at_least_one_mark_complete = true;
1718   g1h->set_marking_complete();
1719 
1720   double count_end = os::elapsedTime();
1721   double this_final_counting_time = (count_end - start);
1722   if (G1PrintParCleanupStats) {
1723     gclog_or_tty->print_cr("Cleanup:");
1724     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1725                            this_final_counting_time*1000.0);
1726   }
1727   _total_counting_time += this_final_counting_time;
1728 
1729   // Install newly created mark bitMap as "prev".
1730   swapMarkBitMaps();
1731 
1732   g1h->reset_gc_time_stamp();
1733 
1734   // Note end of marking in all heap regions.
1735   double note_end_start = os::elapsedTime();
1736   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1737   if (G1CollectedHeap::use_parallel_gc_threads()) {
1738     int n_workers = g1h->workers()->total_workers();
1739     g1h->set_par_threads(n_workers);
1740     g1h->workers()->run_task(&g1_par_note_end_task);
1741     g1h->set_par_threads(0);
1742 
1743     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1744            "sanity check");
1745   } else {
1746     g1_par_note_end_task.work(0);
1747   }
1748 
1749   if (!cleanup_list_is_empty()) {
1750     // The cleanup list is not empty, so we'll have to process it
1751     // concurrently. Notify anyone else that might be wanting free
1752     // regions that there will be more free regions coming soon.
1753     g1h->set_free_regions_coming();
1754   }
1755   double note_end_end = os::elapsedTime();
1756   if (G1PrintParCleanupStats) {
1757     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1758                            (note_end_end - note_end_start)*1000.0);
1759   }
1760 
1761 
1762   // call below, since it affects the metric by which we sort the heap
1763   // regions.
1764   if (G1ScrubRemSets) {
1765     double rs_scrub_start = os::elapsedTime();
1766     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1767     if (G1CollectedHeap::use_parallel_gc_threads()) {
1768       int n_workers = g1h->workers()->total_workers();
1769       g1h->set_par_threads(n_workers);
1770       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1771       g1h->set_par_threads(0);
1772 
1773       assert(g1h->check_heap_region_claim_values(
1774                                             HeapRegion::ScrubRemSetClaimValue),
1775              "sanity check");
1776     } else {
1777       g1_par_scrub_rs_task.work(0);
1778     }
1779 
1780     double rs_scrub_end = os::elapsedTime();
1781     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1782     _total_rs_scrub_time += this_rs_scrub_time;
1783   }
1784 
1785   // this will also free any regions totally full of garbage objects,
1786   // and sort the regions.
1787   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1788                         g1_par_note_end_task.freed_bytes(),
1789                         g1_par_note_end_task.max_live_bytes());
1790 
1791   // Statistics.
1792   double end = os::elapsedTime();
1793   _cleanup_times.add((end - start) * 1000.0);
1794 
1795   // G1CollectedHeap::heap()->print();
1796   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1797   // G1CollectedHeap::heap()->get_gc_time_stamp());
1798 
1799   if (PrintGC || PrintGCDetails) {
1800     g1h->print_size_transition(gclog_or_tty,
1801                                start_used_bytes,
1802                                g1h->used(),
1803                                g1h->capacity());
1804   }
1805 
1806   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1807   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1808 
1809   // We need to make this be a "collection" so any collection pause that
1810   // races with it goes around and waits for completeCleanup to finish.
1811   g1h->increment_total_collections();
1812 
1813   if (VerifyDuringGC) {
1814     HandleMark hm;  // handle scope
1815     gclog_or_tty->print(" VerifyDuringGC:(after)");
1816     Universe::heap()->prepare_for_verify();
1817     Universe::verify(/* allow dirty  */ true,
1818                      /* silent       */ false,
1819                      /* prev marking */ true);
1820   }
1821 
1822   g1h->verify_region_sets_optional();
1823 }
1824 
1825 void ConcurrentMark::completeCleanup() {
1826   if (has_aborted()) return;
1827 
1828   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1829 
1830   _cleanup_list.verify_optional();
1831   FreeRegionList tmp_free_list("Tmp Free List");
1832 
1833   if (G1ConcRegionFreeingVerbose) {
1834     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1835                            "cleanup list has "SIZE_FORMAT" entries",
1836                            _cleanup_list.length());
1837   }
1838 
1839   // Noone else should be accessing the _cleanup_list at this point,
1840   // so it's not necessary to take any locks
1841   while (!_cleanup_list.is_empty()) {
1842     HeapRegion* hr = _cleanup_list.remove_head();
1843     assert(hr != NULL, "the list was not empty");
1844     hr->rem_set()->clear();
1845     tmp_free_list.add_as_tail(hr);
1846 
1847     // Instead of adding one region at a time to the secondary_free_list,
1848     // we accumulate them in the local list and move them a few at a
1849     // time. This also cuts down on the number of notify_all() calls
1850     // we do during this process. We'll also append the local list when
1851     // _cleanup_list is empty (which means we just removed the last
1852     // region from the _cleanup_list).
1853     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1854         _cleanup_list.is_empty()) {
1855       if (G1ConcRegionFreeingVerbose) {
1856         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1857                                "appending "SIZE_FORMAT" entries to the "
1858                                "secondary_free_list, clean list still has "
1859                                SIZE_FORMAT" entries",
1860                                tmp_free_list.length(),
1861                                _cleanup_list.length());
1862       }
1863 
1864       {
1865         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1866         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1867         SecondaryFreeList_lock->notify_all();
1868       }
1869 
1870       if (G1StressConcRegionFreeing) {
1871         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1872           os::sleep(Thread::current(), (jlong) 1, false);
1873         }
1874       }
1875     }
1876   }
1877   assert(tmp_free_list.is_empty(), "post-condition");
1878 }
1879 
1880 // Support closures for reference procssing in G1
1881 
1882 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1883   HeapWord* addr = (HeapWord*)obj;
1884   return addr != NULL &&
1885          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1886 }
1887 
1888 class G1CMKeepAliveClosure: public OopClosure {
1889   G1CollectedHeap* _g1;
1890   ConcurrentMark*  _cm;
1891   CMBitMap*        _bitMap;
1892  public:
1893   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1894                        CMBitMap* bitMap) :
1895     _g1(g1), _cm(cm),
1896     _bitMap(bitMap) {}
1897 
1898   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1899   virtual void do_oop(      oop* p) { do_oop_work(p); }
1900 
1901   template <class T> void do_oop_work(T* p) {
1902     oop obj = oopDesc::load_decode_heap_oop(p);
1903     HeapWord* addr = (HeapWord*)obj;
1904 
1905     if (_cm->verbose_high())
1906       gclog_or_tty->print_cr("\t[0] we're looking at location "
1907                                "*"PTR_FORMAT" = "PTR_FORMAT,
1908                                p, (void*) obj);
1909 
1910     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1911       _bitMap->mark(addr);
1912       _cm->mark_stack_push(obj);
1913     }
1914   }
1915 };
1916 
1917 class G1CMDrainMarkingStackClosure: public VoidClosure {
1918   CMMarkStack*                  _markStack;
1919   CMBitMap*                     _bitMap;
1920   G1CMKeepAliveClosure*         _oopClosure;
1921  public:
1922   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1923                                G1CMKeepAliveClosure* oopClosure) :
1924     _bitMap(bitMap),
1925     _markStack(markStack),
1926     _oopClosure(oopClosure)
1927   {}
1928 
1929   void do_void() {
1930     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1931   }
1932 };
1933 
1934 // 'Keep Alive' closure used by parallel reference processing.
1935 // An instance of this closure is used in the parallel reference processing
1936 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1937 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1938 // placed on to discovered ref lists once so we can mark and push with no
1939 // need to check whether the object has already been marked. Using the
1940 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1941 // operating on the global mark stack. This means that an individual
1942 // worker would be doing lock-free pushes while it processes its own
1943 // discovered ref list followed by drain call. If the discovered ref lists
1944 // are unbalanced then this could cause interference with the other
1945 // workers. Using a CMTask (and its embedded local data structures)
1946 // avoids that potential interference.
1947 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1948   ConcurrentMark*  _cm;
1949   CMTask*          _task;
1950   CMBitMap*        _bitMap;
1951   int              _ref_counter_limit;
1952   int              _ref_counter;
1953  public:
1954   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1955                                   CMTask* task,
1956                                   CMBitMap* bitMap) :
1957     _cm(cm), _task(task), _bitMap(bitMap),
1958     _ref_counter_limit(G1RefProcDrainInterval)
1959   {
1960     assert(_ref_counter_limit > 0, "sanity");
1961     _ref_counter = _ref_counter_limit;
1962   }
1963 
1964   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1965   virtual void do_oop(      oop* p) { do_oop_work(p); }
1966 
1967   template <class T> void do_oop_work(T* p) {
1968     if (!_cm->has_overflown()) {
1969       oop obj = oopDesc::load_decode_heap_oop(p);
1970       if (_cm->verbose_high())
1971         gclog_or_tty->print_cr("\t[%d] we're looking at location "
1972                                "*"PTR_FORMAT" = "PTR_FORMAT,
1973                                _task->task_id(), p, (void*) obj);
1974 
1975       _task->deal_with_reference(obj);
1976       _ref_counter--;
1977 
1978       if (_ref_counter == 0) {
1979         // We have dealt with _ref_counter_limit references, pushing them and objects
1980         // reachable from them on to the local stack (and possibly the global stack).
1981         // Call do_marking_step() to process these entries. We call the routine in a
1982         // loop, which we'll exit if there's nothing more to do (i.e. we're done
1983         // with the entries that we've pushed as a result of the deal_with_reference
1984         // calls above) or we overflow.
1985         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
1986         // while there may still be some work to do. (See the comment at the
1987         // beginning of CMTask::do_marking_step() for those conditions - one of which
1988         // is reaching the specified time target.) It is only when
1989         // CMTask::do_marking_step() returns without setting the has_aborted() flag
1990         // that the marking has completed.
1991         do {
1992           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1993           _task->do_marking_step(mark_step_duration_ms,
1994                                  false /* do_stealing    */,
1995                                  false /* do_termination */);
1996         } while (_task->has_aborted() && !_cm->has_overflown());
1997         _ref_counter = _ref_counter_limit;
1998       }
1999     } else {
2000        if (_cm->verbose_high())
2001          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2002     }
2003   }
2004 };
2005 
2006 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2007   ConcurrentMark* _cm;
2008   CMTask* _task;
2009  public:
2010   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2011     _cm(cm), _task(task)
2012   {}
2013 
2014   void do_void() {
2015     do {
2016       if (_cm->verbose_high())
2017         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step", _task->task_id());
2018 
2019       // We call CMTask::do_marking_step() to completely drain the local and
2020       // global marking stacks. The routine is called in a loop, which we'll
2021       // exit if there's nothing more to do (i.e. we'completely drained the
2022       // entries that were pushed as a result of applying the
2023       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2024       // lists above) or we overflow the global marking stack.
2025       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2026       // while there may still be some work to do. (See the comment at the
2027       // beginning of CMTask::do_marking_step() for those conditions - one of which
2028       // is reaching the specified time target.) It is only when
2029       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2030       // that the marking has completed.
2031 
2032       _task->do_marking_step(1000000000.0 /* something very large */,
2033                              true /* do_stealing    */,
2034                              true /* do_termination */);
2035     } while (_task->has_aborted() && !_cm->has_overflown());
2036   }
2037 };
2038 
2039 // Implementation of AbstractRefProcTaskExecutor for G1
2040 class G1RefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2041 private:
2042   G1CollectedHeap* _g1h;
2043   ConcurrentMark*  _cm;
2044   CMBitMap*        _bitmap;
2045   WorkGang*        _workers;
2046   int              _active_workers;
2047 
2048 public:
2049   G1RefProcTaskExecutor(G1CollectedHeap* g1h,
2050                         ConcurrentMark* cm,
2051                         CMBitMap* bitmap,
2052                         WorkGang* workers,
2053                         int n_workers) :
2054     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2055     _workers(workers), _active_workers(n_workers)
2056   { }
2057 
2058   // Executes the given task using concurrent marking worker threads.
2059   virtual void execute(ProcessTask& task);
2060   virtual void execute(EnqueueTask& task);
2061 };
2062 
2063 class G1RefProcTaskProxy: public AbstractGangTask {
2064   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2065   ProcessTask&     _proc_task;
2066   G1CollectedHeap* _g1h;
2067   ConcurrentMark*  _cm;
2068   CMBitMap*        _bitmap;
2069 
2070 public:
2071   G1RefProcTaskProxy(ProcessTask& proc_task,
2072                      G1CollectedHeap* g1h,
2073                      ConcurrentMark* cm,
2074                      CMBitMap* bitmap) :
2075     AbstractGangTask("Process reference objects in parallel"),
2076     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2077   {}
2078 
2079   virtual void work(int i) {
2080     CMTask* marking_task = _cm->task(i);
2081     G1CMIsAliveClosure g1_is_alive(_g1h);
2082     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2083     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2084 
2085     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2086   }
2087 };
2088 
2089 void G1RefProcTaskExecutor::execute(ProcessTask& proc_task) {
2090   assert(_workers != NULL, "Need parallel worker threads.");
2091 
2092   G1RefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2093 
2094   // We need to reset the phase for each task execution so that
2095   // the termination protocol of CMTask::do_marking_step works.
2096   _cm->set_phase(_active_workers, false /* concurrent */);
2097   _g1h->set_par_threads(_active_workers);
2098   _workers->run_task(&proc_task_proxy);
2099   _g1h->set_par_threads(0);
2100 }
2101 
2102 class G1RefEnqueueTaskProxy: public AbstractGangTask {
2103   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2104   EnqueueTask& _enq_task;
2105 
2106 public:
2107   G1RefEnqueueTaskProxy(EnqueueTask& enq_task) :
2108     AbstractGangTask("Enqueue reference objects in parallel"),
2109     _enq_task(enq_task)
2110   { }
2111 
2112   virtual void work(int i) {
2113     _enq_task.work(i);
2114   }
2115 };
2116 
2117 void G1RefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2118   assert(_workers != NULL, "Need parallel worker threads.");
2119 
2120   G1RefEnqueueTaskProxy enq_task_proxy(enq_task);
2121 
2122   _g1h->set_par_threads(_active_workers);
2123   _workers->run_task(&enq_task_proxy);
2124   _g1h->set_par_threads(0);
2125 }
2126 
2127 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2128   ResourceMark rm;
2129   HandleMark   hm;
2130   G1CollectedHeap* g1h   = G1CollectedHeap::heap();
2131   ReferenceProcessor* rp = g1h->ref_processor();
2132 
2133   // See the comment in G1CollectedHeap::ref_processing_init()
2134   // about how reference processing currently works in G1.
2135 
2136   // Process weak references.
2137   rp->setup_policy(clear_all_soft_refs);
2138   assert(_markStack.isEmpty(), "mark stack should be empty");
2139 
2140   G1CMIsAliveClosure   g1_is_alive(g1h);
2141   G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2142   G1CMDrainMarkingStackClosure
2143     g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2144   // We use the work gang from the G1CollectedHeap and we utilize all
2145   // the worker threads.
2146   int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2147   active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2148 
2149   G1RefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2150                                           g1h->workers(), active_workers);
2151 
2152 
2153   if (rp->processing_is_mt()) {
2154     // Set the degree of MT here.  If the discovery is done MT, there
2155     // may have been a different number of threads doing the discovery
2156     // and a different number of discovered lists may have Ref objects.
2157     // That is OK as long as the Reference lists are balanced (see
2158     // balance_all_queues() and balance_queues()).
2159     rp->set_active_mt_degree(active_workers);
2160 
2161     rp->process_discovered_references(&g1_is_alive,
2162                                       &g1_keep_alive,
2163                                       &g1_drain_mark_stack,
2164                                       &par_task_executor);
2165 
2166     // The work routines of the parallel keep_alive and drain_marking_stack
2167     // will set the has_overflown flag if we overflow the global marking
2168     // stack.
2169   } else {
2170     rp->process_discovered_references(&g1_is_alive,
2171                                       &g1_keep_alive,
2172                                       &g1_drain_mark_stack,
2173                                       NULL);
2174 
2175   }
2176 
2177   assert(_markStack.overflow() || _markStack.isEmpty(),
2178       "mark stack should be empty (unless it overflowed)");
2179   if (_markStack.overflow()) {
2180     // Should have been done already when we tried to push an
2181     // entry on to the global mark stack. But let's do it again.
2182     set_has_overflown();
2183   }
2184 
2185   if (rp->processing_is_mt()) {
2186     assert(rp->num_q() == active_workers, "why not");
2187     rp->enqueue_discovered_references(&par_task_executor);
2188   } else {
2189     rp->enqueue_discovered_references();
2190   }
2191 
2192   rp->verify_no_references_recorded();
2193   assert(!rp->discovery_enabled(), "should have been disabled");
2194 
2195   // Now clean up stale oops in StringTable
2196   StringTable::unlink(&g1_is_alive);
2197   // Clean up unreferenced symbols in symbol table.
2198   SymbolTable::unlink();
2199 }
2200 
2201 void ConcurrentMark::swapMarkBitMaps() {
2202   CMBitMapRO* temp = _prevMarkBitMap;
2203   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2204   _nextMarkBitMap  = (CMBitMap*)  temp;
2205 }
2206 
2207 class CMRemarkTask: public AbstractGangTask {
2208 private:
2209   ConcurrentMark *_cm;
2210 
2211 public:
2212   void work(int worker_i) {
2213     // Since all available tasks are actually started, we should
2214     // only proceed if we're supposed to be actived.
2215     if ((size_t)worker_i < _cm->active_tasks()) {
2216       CMTask* task = _cm->task(worker_i);
2217       task->record_start_time();
2218       do {
2219         task->do_marking_step(1000000000.0 /* something very large */,
2220                               true /* do_stealing    */,
2221                               true /* do_termination */);
2222       } while (task->has_aborted() && !_cm->has_overflown());
2223       // If we overflow, then we do not want to restart. We instead
2224       // want to abort remark and do concurrent marking again.
2225       task->record_end_time();
2226     }
2227   }
2228 
2229   CMRemarkTask(ConcurrentMark* cm) :
2230     AbstractGangTask("Par Remark"), _cm(cm) { }
2231 };
2232 
2233 void ConcurrentMark::checkpointRootsFinalWork() {
2234   ResourceMark rm;
2235   HandleMark   hm;
2236   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2237 
2238   g1h->ensure_parsability(false);
2239 
2240   if (G1CollectedHeap::use_parallel_gc_threads()) {
2241     G1CollectedHeap::StrongRootsScope srs(g1h);
2242     // this is remark, so we'll use up all available threads
2243     int active_workers = ParallelGCThreads;
2244     set_phase(active_workers, false /* concurrent */);
2245 
2246     CMRemarkTask remarkTask(this);
2247     // We will start all available threads, even if we decide that the
2248     // active_workers will be fewer. The extra ones will just bail out
2249     // immediately.
2250     int n_workers = g1h->workers()->total_workers();
2251     g1h->set_par_threads(n_workers);
2252     g1h->workers()->run_task(&remarkTask);
2253     g1h->set_par_threads(0);
2254   } else {
2255     G1CollectedHeap::StrongRootsScope srs(g1h);
2256     // this is remark, so we'll use up all available threads
2257     int active_workers = 1;
2258     set_phase(active_workers, false /* concurrent */);
2259 
2260     CMRemarkTask remarkTask(this);
2261     // We will start all available threads, even if we decide that the
2262     // active_workers will be fewer. The extra ones will just bail out
2263     // immediately.
2264     remarkTask.work(0);
2265   }
2266   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2267   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2268 
2269   print_stats();
2270 
2271 #if VERIFY_OBJS_PROCESSED
2272   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2273     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2274                            _scan_obj_cl.objs_processed,
2275                            ThreadLocalObjQueue::objs_enqueued);
2276     guarantee(_scan_obj_cl.objs_processed ==
2277               ThreadLocalObjQueue::objs_enqueued,
2278               "Different number of objs processed and enqueued.");
2279   }
2280 #endif
2281 }
2282 
2283 #ifndef PRODUCT
2284 
2285 class PrintReachableOopClosure: public OopClosure {
2286 private:
2287   G1CollectedHeap* _g1h;
2288   CMBitMapRO*      _bitmap;
2289   outputStream*    _out;
2290   bool             _use_prev_marking;
2291   bool             _all;
2292 
2293 public:
2294   PrintReachableOopClosure(CMBitMapRO*   bitmap,
2295                            outputStream* out,
2296                            bool          use_prev_marking,
2297                            bool          all) :
2298     _g1h(G1CollectedHeap::heap()),
2299     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2300 
2301   void do_oop(narrowOop* p) { do_oop_work(p); }
2302   void do_oop(      oop* p) { do_oop_work(p); }
2303 
2304   template <class T> void do_oop_work(T* p) {
2305     oop         obj = oopDesc::load_decode_heap_oop(p);
2306     const char* str = NULL;
2307     const char* str2 = "";
2308 
2309     if (obj == NULL) {
2310       str = "";
2311     } else if (!_g1h->is_in_g1_reserved(obj)) {
2312       str = " O";
2313     } else {
2314       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2315       guarantee(hr != NULL, "invariant");
2316       bool over_tams = false;
2317       if (_use_prev_marking) {
2318         over_tams = hr->obj_allocated_since_prev_marking(obj);
2319       } else {
2320         over_tams = hr->obj_allocated_since_next_marking(obj);
2321       }
2322       bool marked = _bitmap->isMarked((HeapWord*) obj);
2323 
2324       if (over_tams) {
2325         str = " >";
2326         if (marked) {
2327           str2 = " AND MARKED";
2328         }
2329       } else if (marked) {
2330         str = " M";
2331       } else {
2332         str = " NOT";
2333       }
2334     }
2335 
2336     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2337                    p, (void*) obj, str, str2);
2338   }
2339 };
2340 
2341 class PrintReachableObjectClosure : public ObjectClosure {
2342 private:
2343   CMBitMapRO*   _bitmap;
2344   outputStream* _out;
2345   bool          _use_prev_marking;
2346   bool          _all;
2347   HeapRegion*   _hr;
2348 
2349 public:
2350   PrintReachableObjectClosure(CMBitMapRO*   bitmap,
2351                               outputStream* out,
2352                               bool          use_prev_marking,
2353                               bool          all,
2354                               HeapRegion*   hr) :
2355     _bitmap(bitmap), _out(out),
2356     _use_prev_marking(use_prev_marking), _all(all), _hr(hr) { }
2357 
2358   void do_object(oop o) {
2359     bool over_tams;
2360     if (_use_prev_marking) {
2361       over_tams = _hr->obj_allocated_since_prev_marking(o);
2362     } else {
2363       over_tams = _hr->obj_allocated_since_next_marking(o);
2364     }
2365     bool marked = _bitmap->isMarked((HeapWord*) o);
2366     bool print_it = _all || over_tams || marked;
2367 
2368     if (print_it) {
2369       _out->print_cr(" "PTR_FORMAT"%s",
2370                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2371       PrintReachableOopClosure oopCl(_bitmap, _out, _use_prev_marking, _all);
2372       o->oop_iterate(&oopCl);
2373     }
2374   }
2375 };
2376 
2377 class PrintReachableRegionClosure : public HeapRegionClosure {
2378 private:
2379   CMBitMapRO*   _bitmap;
2380   outputStream* _out;
2381   bool          _use_prev_marking;
2382   bool          _all;
2383 
2384 public:
2385   bool doHeapRegion(HeapRegion* hr) {
2386     HeapWord* b = hr->bottom();
2387     HeapWord* e = hr->end();
2388     HeapWord* t = hr->top();
2389     HeapWord* p = NULL;
2390     if (_use_prev_marking) {
2391       p = hr->prev_top_at_mark_start();
2392     } else {
2393       p = hr->next_top_at_mark_start();
2394     }
2395     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2396                    "TAMS: "PTR_FORMAT, b, e, t, p);
2397     _out->cr();
2398 
2399     HeapWord* from = b;
2400     HeapWord* to   = t;
2401 
2402     if (to > from) {
2403       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2404       _out->cr();
2405       PrintReachableObjectClosure ocl(_bitmap, _out,
2406                                       _use_prev_marking, _all, hr);
2407       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2408       _out->cr();
2409     }
2410 
2411     return false;
2412   }
2413 
2414   PrintReachableRegionClosure(CMBitMapRO*   bitmap,
2415                               outputStream* out,
2416                               bool          use_prev_marking,
2417                               bool          all) :
2418     _bitmap(bitmap), _out(out), _use_prev_marking(use_prev_marking), _all(all) { }
2419 };
2420 
2421 void ConcurrentMark::print_reachable(const char* str,
2422                                      bool use_prev_marking,
2423                                      bool all) {
2424   gclog_or_tty->cr();
2425   gclog_or_tty->print_cr("== Doing heap dump... ");
2426 
2427   if (G1PrintReachableBaseFile == NULL) {
2428     gclog_or_tty->print_cr("  #### error: no base file defined");
2429     return;
2430   }
2431 
2432   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2433       (JVM_MAXPATHLEN - 1)) {
2434     gclog_or_tty->print_cr("  #### error: file name too long");
2435     return;
2436   }
2437 
2438   char file_name[JVM_MAXPATHLEN];
2439   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2440   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2441 
2442   fileStream fout(file_name);
2443   if (!fout.is_open()) {
2444     gclog_or_tty->print_cr("  #### error: could not open file");
2445     return;
2446   }
2447 
2448   outputStream* out = &fout;
2449 
2450   CMBitMapRO* bitmap = NULL;
2451   if (use_prev_marking) {
2452     bitmap = _prevMarkBitMap;
2453   } else {
2454     bitmap = _nextMarkBitMap;
2455   }
2456 
2457   out->print_cr("-- USING %s", (use_prev_marking) ? "PTAMS" : "NTAMS");
2458   out->cr();
2459 
2460   out->print_cr("--- ITERATING OVER REGIONS");
2461   out->cr();
2462   PrintReachableRegionClosure rcl(bitmap, out, use_prev_marking, all);
2463   _g1h->heap_region_iterate(&rcl);
2464   out->cr();
2465 
2466   gclog_or_tty->print_cr("  done");
2467   gclog_or_tty->flush();
2468 }
2469 
2470 #endif // PRODUCT
2471 
2472 // This note is for drainAllSATBBuffers and the code in between.
2473 // In the future we could reuse a task to do this work during an
2474 // evacuation pause (since now tasks are not active and can be claimed
2475 // during an evacuation pause). This was a late change to the code and
2476 // is currently not being taken advantage of.
2477 
2478 class CMGlobalObjectClosure : public ObjectClosure {
2479 private:
2480   ConcurrentMark* _cm;
2481 
2482 public:
2483   void do_object(oop obj) {
2484     _cm->deal_with_reference(obj);
2485   }
2486 
2487   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2488 };
2489 
2490 void ConcurrentMark::deal_with_reference(oop obj) {
2491   if (verbose_high())
2492     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2493                            (void*) obj);
2494 
2495 
2496   HeapWord* objAddr = (HeapWord*) obj;
2497   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2498   if (_g1h->is_in_g1_reserved(objAddr)) {
2499     assert(obj != NULL, "is_in_g1_reserved should ensure this");
2500     HeapRegion* hr = _g1h->heap_region_containing(obj);
2501     if (_g1h->is_obj_ill(obj, hr)) {
2502       if (verbose_high())
2503         gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2504                                "marked", (void*) obj);
2505 
2506       // we need to mark it first
2507       if (_nextMarkBitMap->parMark(objAddr)) {
2508         // No OrderAccess:store_load() is needed. It is implicit in the
2509         // CAS done in parMark(objAddr) above
2510         HeapWord* finger = _finger;
2511         if (objAddr < finger) {
2512           if (verbose_high())
2513             gclog_or_tty->print_cr("[global] below the global finger "
2514                                    "("PTR_FORMAT"), pushing it", finger);
2515           if (!mark_stack_push(obj)) {
2516             if (verbose_low())
2517               gclog_or_tty->print_cr("[global] global stack overflow during "
2518                                      "deal_with_reference");
2519           }
2520         }
2521       }
2522     }
2523   }
2524 }
2525 
2526 void ConcurrentMark::drainAllSATBBuffers() {
2527   CMGlobalObjectClosure oc(this);
2528   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2529   satb_mq_set.set_closure(&oc);
2530 
2531   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2532     if (verbose_medium())
2533       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2534   }
2535 
2536   // no need to check whether we should do this, as this is only
2537   // called during an evacuation pause
2538   satb_mq_set.iterate_closure_all_threads();
2539 
2540   satb_mq_set.set_closure(NULL);
2541   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2542 }
2543 
2544 void ConcurrentMark::markPrev(oop p) {
2545   // Note we are overriding the read-only view of the prev map here, via
2546   // the cast.
2547   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2548 }
2549 
2550 void ConcurrentMark::clear(oop p) {
2551   assert(p != NULL && p->is_oop(), "expected an oop");
2552   HeapWord* addr = (HeapWord*)p;
2553   assert(addr >= _nextMarkBitMap->startWord() ||
2554          addr < _nextMarkBitMap->endWord(), "in a region");
2555 
2556   _nextMarkBitMap->clear(addr);
2557 }
2558 
2559 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2560   // Note we are overriding the read-only view of the prev map here, via
2561   // the cast.
2562   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2563   _nextMarkBitMap->clearRange(mr);
2564 }
2565 
2566 HeapRegion*
2567 ConcurrentMark::claim_region(int task_num) {
2568   // "checkpoint" the finger
2569   HeapWord* finger = _finger;
2570 
2571   // _heap_end will not change underneath our feet; it only changes at
2572   // yield points.
2573   while (finger < _heap_end) {
2574     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2575 
2576     // is the gap between reading the finger and doing the CAS too long?
2577 
2578     HeapRegion* curr_region   = _g1h->heap_region_containing(finger);
2579     HeapWord*   bottom        = curr_region->bottom();
2580     HeapWord*   end           = curr_region->end();
2581     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2582 
2583     if (verbose_low())
2584       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2585                              "["PTR_FORMAT", "PTR_FORMAT"), "
2586                              "limit = "PTR_FORMAT,
2587                              task_num, curr_region, bottom, end, limit);
2588 
2589     HeapWord* res =
2590       (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2591     if (res == finger) {
2592       // we succeeded
2593 
2594       // notice that _finger == end cannot be guaranteed here since,
2595       // someone else might have moved the finger even further
2596       assert(_finger >= end, "the finger should have moved forward");
2597 
2598       if (verbose_low())
2599         gclog_or_tty->print_cr("[%d] we were successful with region = "
2600                                PTR_FORMAT, task_num, curr_region);
2601 
2602       if (limit > bottom) {
2603         if (verbose_low())
2604           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2605                                  "returning it ", task_num, curr_region);
2606         return curr_region;
2607       } else {
2608         assert(limit == bottom,
2609                "the region limit should be at bottom");
2610         if (verbose_low())
2611           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2612                                  "returning NULL", task_num, curr_region);
2613         // we return NULL and the caller should try calling
2614         // claim_region() again.
2615         return NULL;
2616       }
2617     } else {
2618       assert(_finger > finger, "the finger should have moved forward");
2619       if (verbose_low())
2620         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2621                                "global finger = "PTR_FORMAT", "
2622                                "our finger = "PTR_FORMAT,
2623                                task_num, _finger, finger);
2624 
2625       // read it again
2626       finger = _finger;
2627     }
2628   }
2629 
2630   return NULL;
2631 }
2632 
2633 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2634   bool result = false;
2635   for (int i = 0; i < (int)_max_task_num; ++i) {
2636     CMTask* the_task = _tasks[i];
2637     MemRegion mr = the_task->aborted_region();
2638     if (mr.start() != NULL) {
2639       assert(mr.end() != NULL, "invariant");
2640       assert(mr.word_size() > 0, "invariant");
2641       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2642       assert(hr != NULL, "invariant");
2643       if (hr->in_collection_set()) {
2644         // The region points into the collection set
2645         the_task->set_aborted_region(MemRegion());
2646         result = true;
2647       }
2648     }
2649   }
2650   return result;
2651 }
2652 
2653 bool ConcurrentMark::has_aborted_regions() {
2654   for (int i = 0; i < (int)_max_task_num; ++i) {
2655     CMTask* the_task = _tasks[i];
2656     MemRegion mr = the_task->aborted_region();
2657     if (mr.start() != NULL) {
2658       assert(mr.end() != NULL, "invariant");
2659       assert(mr.word_size() > 0, "invariant");
2660       return true;
2661     }
2662   }
2663   return false;
2664 }
2665 
2666 void ConcurrentMark::oops_do(OopClosure* cl) {
2667   if (_markStack.size() > 0 && verbose_low())
2668     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2669                            "size = %d", _markStack.size());
2670   // we first iterate over the contents of the mark stack...
2671   _markStack.oops_do(cl);
2672 
2673   for (int i = 0; i < (int)_max_task_num; ++i) {
2674     OopTaskQueue* queue = _task_queues->queue((int)i);
2675 
2676     if (queue->size() > 0 && verbose_low())
2677       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2678                              "size = %d", i, queue->size());
2679 
2680     // ...then over the contents of the all the task queues.
2681     queue->oops_do(cl);
2682   }
2683 
2684   // Invalidate any entries, that are in the region stack, that
2685   // point into the collection set
2686   if (_regionStack.invalidate_entries_into_cset()) {
2687     // otherwise, any gray objects copied during the evacuation pause
2688     // might not be visited.
2689     assert(_should_gray_objects, "invariant");
2690   }
2691 
2692   // Invalidate any aborted regions, recorded in the individual CM
2693   // tasks, that point into the collection set.
2694   if (invalidate_aborted_regions_in_cset()) {
2695     // otherwise, any gray objects copied during the evacuation pause
2696     // might not be visited.
2697     assert(_should_gray_objects, "invariant");
2698   }
2699 
2700 }
2701 
2702 void ConcurrentMark::clear_marking_state() {
2703   _markStack.setEmpty();
2704   _markStack.clear_overflow();
2705   _regionStack.setEmpty();
2706   _regionStack.clear_overflow();
2707   clear_has_overflown();
2708   _finger = _heap_start;
2709 
2710   for (int i = 0; i < (int)_max_task_num; ++i) {
2711     OopTaskQueue* queue = _task_queues->queue(i);
2712     queue->set_empty();
2713     // Clear any partial regions from the CMTasks
2714     _tasks[i]->clear_aborted_region();
2715   }
2716 }
2717 
2718 void ConcurrentMark::print_stats() {
2719   if (verbose_stats()) {
2720     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2721     for (size_t i = 0; i < _active_tasks; ++i) {
2722       _tasks[i]->print_stats();
2723       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2724     }
2725   }
2726 }
2727 
2728 class CSMarkOopClosure: public OopClosure {
2729   friend class CSMarkBitMapClosure;
2730 
2731   G1CollectedHeap* _g1h;
2732   CMBitMap*        _bm;
2733   ConcurrentMark*  _cm;
2734   oop*             _ms;
2735   jint*            _array_ind_stack;
2736   int              _ms_size;
2737   int              _ms_ind;
2738   int              _array_increment;
2739 
2740   bool push(oop obj, int arr_ind = 0) {
2741     if (_ms_ind == _ms_size) {
2742       gclog_or_tty->print_cr("Mark stack is full.");
2743       return false;
2744     }
2745     _ms[_ms_ind] = obj;
2746     if (obj->is_objArray()) _array_ind_stack[_ms_ind] = arr_ind;
2747     _ms_ind++;
2748     return true;
2749   }
2750 
2751   oop pop() {
2752     if (_ms_ind == 0) return NULL;
2753     else {
2754       _ms_ind--;
2755       return _ms[_ms_ind];
2756     }
2757   }
2758 
2759   template <class T> bool drain() {
2760     while (_ms_ind > 0) {
2761       oop obj = pop();
2762       assert(obj != NULL, "Since index was non-zero.");
2763       if (obj->is_objArray()) {
2764         jint arr_ind = _array_ind_stack[_ms_ind];
2765         objArrayOop aobj = objArrayOop(obj);
2766         jint len = aobj->length();
2767         jint next_arr_ind = arr_ind + _array_increment;
2768         if (next_arr_ind < len) {
2769           push(obj, next_arr_ind);
2770         }
2771         // Now process this portion of this one.
2772         int lim = MIN2(next_arr_ind, len);
2773         for (int j = arr_ind; j < lim; j++) {
2774           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2775         }
2776 
2777       } else {
2778         obj->oop_iterate(this);
2779       }
2780       if (abort()) return false;
2781     }
2782     return true;
2783   }
2784 
2785 public:
2786   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2787     _g1h(G1CollectedHeap::heap()),
2788     _cm(cm),
2789     _bm(cm->nextMarkBitMap()),
2790     _ms_size(ms_size), _ms_ind(0),
2791     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2792     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2793     _array_increment(MAX2(ms_size/8, 16))
2794   {}
2795 
2796   ~CSMarkOopClosure() {
2797     FREE_C_HEAP_ARRAY(oop, _ms);
2798     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2799   }
2800 
2801   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2802   virtual void do_oop(      oop* p) { do_oop_work(p); }
2803 
2804   template <class T> void do_oop_work(T* p) {
2805     T heap_oop = oopDesc::load_heap_oop(p);
2806     if (oopDesc::is_null(heap_oop)) return;
2807     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2808     if (obj->is_forwarded()) {
2809       // If the object has already been forwarded, we have to make sure
2810       // that it's marked.  So follow the forwarding pointer.  Note that
2811       // this does the right thing for self-forwarding pointers in the
2812       // evacuation failure case.
2813       obj = obj->forwardee();
2814     }
2815     HeapRegion* hr = _g1h->heap_region_containing(obj);
2816     if (hr != NULL) {
2817       if (hr->in_collection_set()) {
2818         if (_g1h->is_obj_ill(obj)) {
2819           _bm->mark((HeapWord*)obj);
2820           if (!push(obj)) {
2821             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2822             set_abort();
2823           }
2824         }
2825       } else {
2826         // Outside the collection set; we need to gray it
2827         _cm->deal_with_reference(obj);
2828       }
2829     }
2830   }
2831 };
2832 
2833 class CSMarkBitMapClosure: public BitMapClosure {
2834   G1CollectedHeap* _g1h;
2835   CMBitMap*        _bitMap;
2836   ConcurrentMark*  _cm;
2837   CSMarkOopClosure _oop_cl;
2838 public:
2839   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2840     _g1h(G1CollectedHeap::heap()),
2841     _bitMap(cm->nextMarkBitMap()),
2842     _oop_cl(cm, ms_size)
2843   {}
2844 
2845   ~CSMarkBitMapClosure() {}
2846 
2847   bool do_bit(size_t offset) {
2848     // convert offset into a HeapWord*
2849     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2850     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2851            "address out of range");
2852     assert(_bitMap->isMarked(addr), "tautology");
2853     oop obj = oop(addr);
2854     if (!obj->is_forwarded()) {
2855       if (!_oop_cl.push(obj)) return false;
2856       if (UseCompressedOops) {
2857         if (!_oop_cl.drain<narrowOop>()) return false;
2858       } else {
2859         if (!_oop_cl.drain<oop>()) return false;
2860       }
2861     }
2862     // Otherwise...
2863     return true;
2864   }
2865 };
2866 
2867 
2868 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2869   CMBitMap* _bm;
2870   CSMarkBitMapClosure _bit_cl;
2871   enum SomePrivateConstants {
2872     MSSize = 1000
2873   };
2874   bool _completed;
2875 public:
2876   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
2877     _bm(cm->nextMarkBitMap()),
2878     _bit_cl(cm, MSSize),
2879     _completed(true)
2880   {}
2881 
2882   ~CompleteMarkingInCSHRClosure() {}
2883 
2884   bool doHeapRegion(HeapRegion* r) {
2885     if (!r->evacuation_failed()) {
2886       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
2887       if (!mr.is_empty()) {
2888         if (!_bm->iterate(&_bit_cl, mr)) {
2889           _completed = false;
2890           return true;
2891         }
2892       }
2893     }
2894     return false;
2895   }
2896 
2897   bool completed() { return _completed; }
2898 };
2899 
2900 class ClearMarksInHRClosure: public HeapRegionClosure {
2901   CMBitMap* _bm;
2902 public:
2903   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
2904 
2905   bool doHeapRegion(HeapRegion* r) {
2906     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
2907       MemRegion usedMR = r->used_region();
2908       _bm->clearRange(r->used_region());
2909     }
2910     return false;
2911   }
2912 };
2913 
2914 void ConcurrentMark::complete_marking_in_collection_set() {
2915   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
2916 
2917   if (!g1h->mark_in_progress()) {
2918     g1h->g1_policy()->record_mark_closure_time(0.0);
2919     return;
2920   }
2921 
2922   int i = 1;
2923   double start = os::elapsedTime();
2924   while (true) {
2925     i++;
2926     CompleteMarkingInCSHRClosure cmplt(this);
2927     g1h->collection_set_iterate(&cmplt);
2928     if (cmplt.completed()) break;
2929   }
2930   double end_time = os::elapsedTime();
2931   double elapsed_time_ms = (end_time - start) * 1000.0;
2932   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
2933 
2934   ClearMarksInHRClosure clr(nextMarkBitMap());
2935   g1h->collection_set_iterate(&clr);
2936 }
2937 
2938 // The next two methods deal with the following optimisation. Some
2939 // objects are gray by being marked and located above the finger. If
2940 // they are copied, during an evacuation pause, below the finger then
2941 // the need to be pushed on the stack. The observation is that, if
2942 // there are no regions in the collection set located above the
2943 // finger, then the above cannot happen, hence we do not need to
2944 // explicitly gray any objects when copying them to below the
2945 // finger. The global stack will be scanned to ensure that, if it
2946 // points to objects being copied, it will update their
2947 // location. There is a tricky situation with the gray objects in
2948 // region stack that are being coped, however. See the comment in
2949 // newCSet().
2950 
2951 void ConcurrentMark::newCSet() {
2952   if (!concurrent_marking_in_progress())
2953     // nothing to do if marking is not in progress
2954     return;
2955 
2956   // find what the lowest finger is among the global and local fingers
2957   _min_finger = _finger;
2958   for (int i = 0; i < (int)_max_task_num; ++i) {
2959     CMTask* task = _tasks[i];
2960     HeapWord* task_finger = task->finger();
2961     if (task_finger != NULL && task_finger < _min_finger)
2962       _min_finger = task_finger;
2963   }
2964 
2965   _should_gray_objects = false;
2966 
2967   // This fixes a very subtle and fustrating bug. It might be the case
2968   // that, during en evacuation pause, heap regions that contain
2969   // objects that are gray (by being in regions contained in the
2970   // region stack) are included in the collection set. Since such gray
2971   // objects will be moved, and because it's not easy to redirect
2972   // region stack entries to point to a new location (because objects
2973   // in one region might be scattered to multiple regions after they
2974   // are copied), one option is to ensure that all marked objects
2975   // copied during a pause are pushed on the stack. Notice, however,
2976   // that this problem can only happen when the region stack is not
2977   // empty during an evacuation pause. So, we make the fix a bit less
2978   // conservative and ensure that regions are pushed on the stack,
2979   // irrespective whether all collection set regions are below the
2980   // finger, if the region stack is not empty. This is expected to be
2981   // a rare case, so I don't think it's necessary to be smarted about it.
2982   if (!region_stack_empty() || has_aborted_regions())
2983     _should_gray_objects = true;
2984 }
2985 
2986 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
2987   if (!concurrent_marking_in_progress())
2988     return;
2989 
2990   HeapWord* region_end = hr->end();
2991   if (region_end > _min_finger)
2992     _should_gray_objects = true;
2993 }
2994 
2995 // abandon current marking iteration due to a Full GC
2996 void ConcurrentMark::abort() {
2997   // Clear all marks to force marking thread to do nothing
2998   _nextMarkBitMap->clearAll();
2999   // Empty mark stack
3000   clear_marking_state();
3001   for (int i = 0; i < (int)_max_task_num; ++i) {
3002     _tasks[i]->clear_region_fields();
3003   }
3004   _has_aborted = true;
3005 
3006   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3007   satb_mq_set.abandon_partial_marking();
3008   // This can be called either during or outside marking, we'll read
3009   // the expected_active value from the SATB queue set.
3010   satb_mq_set.set_active_all_threads(
3011                                  false, /* new active value */
3012                                  satb_mq_set.is_active() /* expected_active */);
3013 }
3014 
3015 static void print_ms_time_info(const char* prefix, const char* name,
3016                                NumberSeq& ns) {
3017   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3018                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3019   if (ns.num() > 0) {
3020     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3021                            prefix, ns.sd(), ns.maximum());
3022   }
3023 }
3024 
3025 void ConcurrentMark::print_summary_info() {
3026   gclog_or_tty->print_cr(" Concurrent marking:");
3027   print_ms_time_info("  ", "init marks", _init_times);
3028   print_ms_time_info("  ", "remarks", _remark_times);
3029   {
3030     print_ms_time_info("     ", "final marks", _remark_mark_times);
3031     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3032 
3033   }
3034   print_ms_time_info("  ", "cleanups", _cleanup_times);
3035   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3036                          _total_counting_time,
3037                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3038                           (double)_cleanup_times.num()
3039                          : 0.0));
3040   if (G1ScrubRemSets) {
3041     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3042                            _total_rs_scrub_time,
3043                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3044                             (double)_cleanup_times.num()
3045                            : 0.0));
3046   }
3047   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3048                          (_init_times.sum() + _remark_times.sum() +
3049                           _cleanup_times.sum())/1000.0);
3050   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3051                 "(%8.2f s marking, %8.2f s counting).",
3052                 cmThread()->vtime_accum(),
3053                 cmThread()->vtime_mark_accum(),
3054                 cmThread()->vtime_count_accum());
3055 }
3056 
3057 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3058   _parallel_workers->print_worker_threads_on(st);
3059 }
3060 
3061 // Closures
3062 // XXX: there seems to be a lot of code  duplication here;
3063 // should refactor and consolidate the shared code.
3064 
3065 // This closure is used to mark refs into the CMS generation in
3066 // the CMS bit map. Called at the first checkpoint.
3067 
3068 // We take a break if someone is trying to stop the world.
3069 bool ConcurrentMark::do_yield_check(int worker_i) {
3070   if (should_yield()) {
3071     if (worker_i == 0)
3072       _g1h->g1_policy()->record_concurrent_pause();
3073     cmThread()->yield();
3074     if (worker_i == 0)
3075       _g1h->g1_policy()->record_concurrent_pause_end();
3076     return true;
3077   } else {
3078     return false;
3079   }
3080 }
3081 
3082 bool ConcurrentMark::should_yield() {
3083   return cmThread()->should_yield();
3084 }
3085 
3086 bool ConcurrentMark::containing_card_is_marked(void* p) {
3087   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3088   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3089 }
3090 
3091 bool ConcurrentMark::containing_cards_are_marked(void* start,
3092                                                  void* last) {
3093   return
3094     containing_card_is_marked(start) &&
3095     containing_card_is_marked(last);
3096 }
3097 
3098 #ifndef PRODUCT
3099 // for debugging purposes
3100 void ConcurrentMark::print_finger() {
3101   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3102                          _heap_start, _heap_end, _finger);
3103   for (int i = 0; i < (int) _max_task_num; ++i) {
3104     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3105   }
3106   gclog_or_tty->print_cr("");
3107 }
3108 #endif
3109 
3110 // Closure for iteration over bitmaps
3111 class CMBitMapClosure : public BitMapClosure {
3112 private:
3113   // the bitmap that is being iterated over
3114   CMBitMap*                   _nextMarkBitMap;
3115   ConcurrentMark*             _cm;
3116   CMTask*                     _task;
3117   // true if we're scanning a heap region claimed by the task (so that
3118   // we move the finger along), false if we're not, i.e. currently when
3119   // scanning a heap region popped from the region stack (so that we
3120   // do not move the task finger along; it'd be a mistake if we did so).
3121   bool                        _scanning_heap_region;
3122 
3123 public:
3124   CMBitMapClosure(CMTask *task,
3125                   ConcurrentMark* cm,
3126                   CMBitMap* nextMarkBitMap)
3127     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3128 
3129   void set_scanning_heap_region(bool scanning_heap_region) {
3130     _scanning_heap_region = scanning_heap_region;
3131   }
3132 
3133   bool do_bit(size_t offset) {
3134     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3135     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3136     assert( addr < _cm->finger(), "invariant");
3137 
3138     if (_scanning_heap_region) {
3139       statsOnly( _task->increase_objs_found_on_bitmap() );
3140       assert(addr >= _task->finger(), "invariant");
3141       // We move that task's local finger along.
3142       _task->move_finger_to(addr);
3143     } else {
3144       // We move the task's region finger along.
3145       _task->move_region_finger_to(addr);
3146     }
3147 
3148     _task->scan_object(oop(addr));
3149     // we only partially drain the local queue and global stack
3150     _task->drain_local_queue(true);
3151     _task->drain_global_stack(true);
3152 
3153     // if the has_aborted flag has been raised, we need to bail out of
3154     // the iteration
3155     return !_task->has_aborted();
3156   }
3157 };
3158 
3159 // Closure for iterating over objects, currently only used for
3160 // processing SATB buffers.
3161 class CMObjectClosure : public ObjectClosure {
3162 private:
3163   CMTask* _task;
3164 
3165 public:
3166   void do_object(oop obj) {
3167     _task->deal_with_reference(obj);
3168   }
3169 
3170   CMObjectClosure(CMTask* task) : _task(task) { }
3171 };
3172 
3173 // Closure for iterating over object fields
3174 class CMOopClosure : public OopClosure {
3175 private:
3176   G1CollectedHeap*   _g1h;
3177   ConcurrentMark*    _cm;
3178   CMTask*            _task;
3179 
3180 public:
3181   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
3182   virtual void do_oop(      oop* p) { do_oop_work(p); }
3183 
3184   template <class T> void do_oop_work(T* p) {
3185     assert( _g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
3186     assert(!_g1h->is_on_master_free_list(
3187                     _g1h->heap_region_containing((HeapWord*) p)), "invariant");
3188 
3189     oop obj = oopDesc::load_decode_heap_oop(p);
3190     if (_cm->verbose_high())
3191       gclog_or_tty->print_cr("[%d] we're looking at location "
3192                              "*"PTR_FORMAT" = "PTR_FORMAT,
3193                              _task->task_id(), p, (void*) obj);
3194     _task->deal_with_reference(obj);
3195   }
3196 
3197   CMOopClosure(G1CollectedHeap* g1h,
3198                ConcurrentMark* cm,
3199                CMTask* task)
3200     : _g1h(g1h), _cm(cm), _task(task)
3201   {
3202     _ref_processor = g1h->ref_processor();
3203     assert(_ref_processor != NULL, "should not be NULL");
3204   }
3205 };
3206 
3207 void CMTask::setup_for_region(HeapRegion* hr) {
3208   // Separated the asserts so that we know which one fires.
3209   assert(hr != NULL,
3210         "claim_region() should have filtered out continues humongous regions");
3211   assert(!hr->continuesHumongous(),
3212         "claim_region() should have filtered out continues humongous regions");
3213 
3214   if (_cm->verbose_low())
3215     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3216                            _task_id, hr);
3217 
3218   _curr_region  = hr;
3219   _finger       = hr->bottom();
3220   update_region_limit();
3221 }
3222 
3223 void CMTask::update_region_limit() {
3224   HeapRegion* hr            = _curr_region;
3225   HeapWord* bottom          = hr->bottom();
3226   HeapWord* limit           = hr->next_top_at_mark_start();
3227 
3228   if (limit == bottom) {
3229     if (_cm->verbose_low())
3230       gclog_or_tty->print_cr("[%d] found an empty region "
3231                              "["PTR_FORMAT", "PTR_FORMAT")",
3232                              _task_id, bottom, limit);
3233     // The region was collected underneath our feet.
3234     // We set the finger to bottom to ensure that the bitmap
3235     // iteration that will follow this will not do anything.
3236     // (this is not a condition that holds when we set the region up,
3237     // as the region is not supposed to be empty in the first place)
3238     _finger = bottom;
3239   } else if (limit >= _region_limit) {
3240     assert(limit >= _finger, "peace of mind");
3241   } else {
3242     assert(limit < _region_limit, "only way to get here");
3243     // This can happen under some pretty unusual circumstances.  An
3244     // evacuation pause empties the region underneath our feet (NTAMS
3245     // at bottom). We then do some allocation in the region (NTAMS
3246     // stays at bottom), followed by the region being used as a GC
3247     // alloc region (NTAMS will move to top() and the objects
3248     // originally below it will be grayed). All objects now marked in
3249     // the region are explicitly grayed, if below the global finger,
3250     // and we do not need in fact to scan anything else. So, we simply
3251     // set _finger to be limit to ensure that the bitmap iteration
3252     // doesn't do anything.
3253     _finger = limit;
3254   }
3255 
3256   _region_limit = limit;
3257 }
3258 
3259 void CMTask::giveup_current_region() {
3260   assert(_curr_region != NULL, "invariant");
3261   if (_cm->verbose_low())
3262     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3263                            _task_id, _curr_region);
3264   clear_region_fields();
3265 }
3266 
3267 void CMTask::clear_region_fields() {
3268   // Values for these three fields that indicate that we're not
3269   // holding on to a region.
3270   _curr_region   = NULL;
3271   _finger        = NULL;
3272   _region_limit  = NULL;
3273 
3274   _region_finger = NULL;
3275 }
3276 
3277 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3278   guarantee(nextMarkBitMap != NULL, "invariant");
3279 
3280   if (_cm->verbose_low())
3281     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3282 
3283   _nextMarkBitMap                = nextMarkBitMap;
3284   clear_region_fields();
3285   assert(_aborted_region.is_empty(), "should have been cleared");
3286 
3287   _calls                         = 0;
3288   _elapsed_time_ms               = 0.0;
3289   _termination_time_ms           = 0.0;
3290   _termination_start_time_ms     = 0.0;
3291 
3292 #if _MARKING_STATS_
3293   _local_pushes                  = 0;
3294   _local_pops                    = 0;
3295   _local_max_size                = 0;
3296   _objs_scanned                  = 0;
3297   _global_pushes                 = 0;
3298   _global_pops                   = 0;
3299   _global_max_size               = 0;
3300   _global_transfers_to           = 0;
3301   _global_transfers_from         = 0;
3302   _region_stack_pops             = 0;
3303   _regions_claimed               = 0;
3304   _objs_found_on_bitmap          = 0;
3305   _satb_buffers_processed        = 0;
3306   _steal_attempts                = 0;
3307   _steals                        = 0;
3308   _aborted                       = 0;
3309   _aborted_overflow              = 0;
3310   _aborted_cm_aborted            = 0;
3311   _aborted_yield                 = 0;
3312   _aborted_timed_out             = 0;
3313   _aborted_satb                  = 0;
3314   _aborted_termination           = 0;
3315 #endif // _MARKING_STATS_
3316 }
3317 
3318 bool CMTask::should_exit_termination() {
3319   regular_clock_call();
3320   // This is called when we are in the termination protocol. We should
3321   // quit if, for some reason, this task wants to abort or the global
3322   // stack is not empty (this means that we can get work from it).
3323   return !_cm->mark_stack_empty() || has_aborted();
3324 }
3325 
3326 // This determines whether the method below will check both the local
3327 // and global fingers when determining whether to push on the stack a
3328 // gray object (value 1) or whether it will only check the global one
3329 // (value 0). The tradeoffs are that the former will be a bit more
3330 // accurate and possibly push less on the stack, but it might also be
3331 // a little bit slower.
3332 
3333 #define _CHECK_BOTH_FINGERS_      1
3334 
3335 void CMTask::deal_with_reference(oop obj) {
3336   if (_cm->verbose_high())
3337     gclog_or_tty->print_cr("[%d] we're dealing with reference = "PTR_FORMAT,
3338                            _task_id, (void*) obj);
3339 
3340   ++_refs_reached;
3341 
3342   HeapWord* objAddr = (HeapWord*) obj;
3343   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
3344   if (_g1h->is_in_g1_reserved(objAddr)) {
3345     assert(obj != NULL, "is_in_g1_reserved should ensure this");
3346     HeapRegion* hr =  _g1h->heap_region_containing(obj);
3347     if (_g1h->is_obj_ill(obj, hr)) {
3348       if (_cm->verbose_high())
3349         gclog_or_tty->print_cr("[%d] "PTR_FORMAT" is not considered marked",
3350                                _task_id, (void*) obj);
3351 
3352       // we need to mark it first
3353       if (_nextMarkBitMap->parMark(objAddr)) {
3354         // No OrderAccess:store_load() is needed. It is implicit in the
3355         // CAS done in parMark(objAddr) above
3356         HeapWord* global_finger = _cm->finger();
3357 
3358 #if _CHECK_BOTH_FINGERS_
3359         // we will check both the local and global fingers
3360 
3361         if (_finger != NULL && objAddr < _finger) {
3362           if (_cm->verbose_high())
3363             gclog_or_tty->print_cr("[%d] below the local finger ("PTR_FORMAT"), "
3364                                    "pushing it", _task_id, _finger);
3365           push(obj);
3366         } else if (_curr_region != NULL && objAddr < _region_limit) {
3367           // do nothing
3368         } else if (objAddr < global_finger) {
3369           // Notice that the global finger might be moving forward
3370           // concurrently. This is not a problem. In the worst case, we
3371           // mark the object while it is above the global finger and, by
3372           // the time we read the global finger, it has moved forward
3373           // passed this object. In this case, the object will probably
3374           // be visited when a task is scanning the region and will also
3375           // be pushed on the stack. So, some duplicate work, but no
3376           // correctness problems.
3377 
3378           if (_cm->verbose_high())
3379             gclog_or_tty->print_cr("[%d] below the global finger "
3380                                    "("PTR_FORMAT"), pushing it",
3381                                    _task_id, global_finger);
3382           push(obj);
3383         } else {
3384           // do nothing
3385         }
3386 #else // _CHECK_BOTH_FINGERS_
3387         // we will only check the global finger
3388 
3389         if (objAddr < global_finger) {
3390           // see long comment above
3391 
3392           if (_cm->verbose_high())
3393             gclog_or_tty->print_cr("[%d] below the global finger "
3394                                    "("PTR_FORMAT"), pushing it",
3395                                    _task_id, global_finger);
3396           push(obj);
3397         }
3398 #endif // _CHECK_BOTH_FINGERS_
3399       }
3400     }
3401   }
3402 }
3403 
3404 void CMTask::push(oop obj) {
3405   HeapWord* objAddr = (HeapWord*) obj;
3406   assert(_g1h->is_in_g1_reserved(objAddr), "invariant");
3407   assert(!_g1h->is_on_master_free_list(
3408               _g1h->heap_region_containing((HeapWord*) objAddr)), "invariant");
3409   assert(!_g1h->is_obj_ill(obj), "invariant");
3410   assert(_nextMarkBitMap->isMarked(objAddr), "invariant");
3411 
3412   if (_cm->verbose_high())
3413     gclog_or_tty->print_cr("[%d] pushing "PTR_FORMAT, _task_id, (void*) obj);
3414 
3415   if (!_task_queue->push(obj)) {
3416     // The local task queue looks full. We need to push some entries
3417     // to the global stack.
3418 
3419     if (_cm->verbose_medium())
3420       gclog_or_tty->print_cr("[%d] task queue overflow, "
3421                              "moving entries to the global stack",
3422                              _task_id);
3423     move_entries_to_global_stack();
3424 
3425     // this should succeed since, even if we overflow the global
3426     // stack, we should have definitely removed some entries from the
3427     // local queue. So, there must be space on it.
3428     bool success = _task_queue->push(obj);
3429     assert(success, "invariant");
3430   }
3431 
3432   statsOnly( int tmp_size = _task_queue->size();
3433              if (tmp_size > _local_max_size)
3434                _local_max_size = tmp_size;
3435              ++_local_pushes );
3436 }
3437 
3438 void CMTask::reached_limit() {
3439   assert(_words_scanned >= _words_scanned_limit ||
3440          _refs_reached >= _refs_reached_limit ,
3441          "shouldn't have been called otherwise");
3442   regular_clock_call();
3443 }
3444 
3445 void CMTask::regular_clock_call() {
3446   if (has_aborted())
3447     return;
3448 
3449   // First, we need to recalculate the words scanned and refs reached
3450   // limits for the next clock call.
3451   recalculate_limits();
3452 
3453   // During the regular clock call we do the following
3454 
3455   // (1) If an overflow has been flagged, then we abort.
3456   if (_cm->has_overflown()) {
3457     set_has_aborted();
3458     return;
3459   }
3460 
3461   // If we are not concurrent (i.e. we're doing remark) we don't need
3462   // to check anything else. The other steps are only needed during
3463   // the concurrent marking phase.
3464   if (!concurrent())
3465     return;
3466 
3467   // (2) If marking has been aborted for Full GC, then we also abort.
3468   if (_cm->has_aborted()) {
3469     set_has_aborted();
3470     statsOnly( ++_aborted_cm_aborted );
3471     return;
3472   }
3473 
3474   double curr_time_ms = os::elapsedVTime() * 1000.0;
3475 
3476   // (3) If marking stats are enabled, then we update the step history.
3477 #if _MARKING_STATS_
3478   if (_words_scanned >= _words_scanned_limit)
3479     ++_clock_due_to_scanning;
3480   if (_refs_reached >= _refs_reached_limit)
3481     ++_clock_due_to_marking;
3482 
3483   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3484   _interval_start_time_ms = curr_time_ms;
3485   _all_clock_intervals_ms.add(last_interval_ms);
3486 
3487   if (_cm->verbose_medium()) {
3488     gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3489                            "scanned = %d%s, refs reached = %d%s",
3490                            _task_id, last_interval_ms,
3491                            _words_scanned,
3492                            (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3493                            _refs_reached,
3494                            (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3495   }
3496 #endif // _MARKING_STATS_
3497 
3498   // (4) We check whether we should yield. If we have to, then we abort.
3499   if (_cm->should_yield()) {
3500     // We should yield. To do this we abort the task. The caller is
3501     // responsible for yielding.
3502     set_has_aborted();
3503     statsOnly( ++_aborted_yield );
3504     return;
3505   }
3506 
3507   // (5) We check whether we've reached our time quota. If we have,
3508   // then we abort.
3509   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3510   if (elapsed_time_ms > _time_target_ms) {
3511     set_has_aborted();
3512     _has_timed_out = true;
3513     statsOnly( ++_aborted_timed_out );
3514     return;
3515   }
3516 
3517   // (6) Finally, we check whether there are enough completed STAB
3518   // buffers available for processing. If there are, we abort.
3519   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3520   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3521     if (_cm->verbose_low())
3522       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3523                              _task_id);
3524     // we do need to process SATB buffers, we'll abort and restart
3525     // the marking task to do so
3526     set_has_aborted();
3527     statsOnly( ++_aborted_satb );
3528     return;
3529   }
3530 }
3531 
3532 void CMTask::recalculate_limits() {
3533   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3534   _words_scanned_limit      = _real_words_scanned_limit;
3535 
3536   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3537   _refs_reached_limit       = _real_refs_reached_limit;
3538 }
3539 
3540 void CMTask::decrease_limits() {
3541   // This is called when we believe that we're going to do an infrequent
3542   // operation which will increase the per byte scanned cost (i.e. move
3543   // entries to/from the global stack). It basically tries to decrease the
3544   // scanning limit so that the clock is called earlier.
3545 
3546   if (_cm->verbose_medium())
3547     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3548 
3549   _words_scanned_limit = _real_words_scanned_limit -
3550     3 * words_scanned_period / 4;
3551   _refs_reached_limit  = _real_refs_reached_limit -
3552     3 * refs_reached_period / 4;
3553 }
3554 
3555 void CMTask::move_entries_to_global_stack() {
3556   // local array where we'll store the entries that will be popped
3557   // from the local queue
3558   oop buffer[global_stack_transfer_size];
3559 
3560   int n = 0;
3561   oop obj;
3562   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3563     buffer[n] = obj;
3564     ++n;
3565   }
3566 
3567   if (n > 0) {
3568     // we popped at least one entry from the local queue
3569 
3570     statsOnly( ++_global_transfers_to; _local_pops += n );
3571 
3572     if (!_cm->mark_stack_push(buffer, n)) {
3573       if (_cm->verbose_low())
3574         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow", _task_id);
3575       set_has_aborted();
3576     } else {
3577       // the transfer was successful
3578 
3579       if (_cm->verbose_medium())
3580         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3581                                _task_id, n);
3582       statsOnly( int tmp_size = _cm->mark_stack_size();
3583                  if (tmp_size > _global_max_size)
3584                    _global_max_size = tmp_size;
3585                  _global_pushes += n );
3586     }
3587   }
3588 
3589   // this operation was quite expensive, so decrease the limits
3590   decrease_limits();
3591 }
3592 
3593 void CMTask::get_entries_from_global_stack() {
3594   // local array where we'll store the entries that will be popped
3595   // from the global stack.
3596   oop buffer[global_stack_transfer_size];
3597   int n;
3598   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3599   assert(n <= global_stack_transfer_size,
3600          "we should not pop more than the given limit");
3601   if (n > 0) {
3602     // yes, we did actually pop at least one entry
3603 
3604     statsOnly( ++_global_transfers_from; _global_pops += n );
3605     if (_cm->verbose_medium())
3606       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3607                              _task_id, n);
3608     for (int i = 0; i < n; ++i) {
3609       bool success = _task_queue->push(buffer[i]);
3610       // We only call this when the local queue is empty or under a
3611       // given target limit. So, we do not expect this push to fail.
3612       assert(success, "invariant");
3613     }
3614 
3615     statsOnly( int tmp_size = _task_queue->size();
3616                if (tmp_size > _local_max_size)
3617                  _local_max_size = tmp_size;
3618                _local_pushes += n );
3619   }
3620 
3621   // this operation was quite expensive, so decrease the limits
3622   decrease_limits();
3623 }
3624 
3625 void CMTask::drain_local_queue(bool partially) {
3626   if (has_aborted())
3627     return;
3628 
3629   // Decide what the target size is, depending whether we're going to
3630   // drain it partially (so that other tasks can steal if they run out
3631   // of things to do) or totally (at the very end).
3632   size_t target_size;
3633   if (partially)
3634     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3635   else
3636     target_size = 0;
3637 
3638   if (_task_queue->size() > target_size) {
3639     if (_cm->verbose_high())
3640       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3641                              _task_id, target_size);
3642 
3643     oop obj;
3644     bool ret = _task_queue->pop_local(obj);
3645     while (ret) {
3646       statsOnly( ++_local_pops );
3647 
3648       if (_cm->verbose_high())
3649         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3650                                (void*) obj);
3651 
3652       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3653       assert(!_g1h->is_on_master_free_list(
3654                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3655 
3656       scan_object(obj);
3657 
3658       if (_task_queue->size() <= target_size || has_aborted())
3659         ret = false;
3660       else
3661         ret = _task_queue->pop_local(obj);
3662     }
3663 
3664     if (_cm->verbose_high())
3665       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3666                              _task_id, _task_queue->size());
3667   }
3668 }
3669 
3670 void CMTask::drain_global_stack(bool partially) {
3671   if (has_aborted())
3672     return;
3673 
3674   // We have a policy to drain the local queue before we attempt to
3675   // drain the global stack.
3676   assert(partially || _task_queue->size() == 0, "invariant");
3677 
3678   // Decide what the target size is, depending whether we're going to
3679   // drain it partially (so that other tasks can steal if they run out
3680   // of things to do) or totally (at the very end).  Notice that,
3681   // because we move entries from the global stack in chunks or
3682   // because another task might be doing the same, we might in fact
3683   // drop below the target. But, this is not a problem.
3684   size_t target_size;
3685   if (partially)
3686     target_size = _cm->partial_mark_stack_size_target();
3687   else
3688     target_size = 0;
3689 
3690   if (_cm->mark_stack_size() > target_size) {
3691     if (_cm->verbose_low())
3692       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3693                              _task_id, target_size);
3694 
3695     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3696       get_entries_from_global_stack();
3697       drain_local_queue(partially);
3698     }
3699 
3700     if (_cm->verbose_low())
3701       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3702                              _task_id, _cm->mark_stack_size());
3703   }
3704 }
3705 
3706 // SATB Queue has several assumptions on whether to call the par or
3707 // non-par versions of the methods. this is why some of the code is
3708 // replicated. We should really get rid of the single-threaded version
3709 // of the code to simplify things.
3710 void CMTask::drain_satb_buffers() {
3711   if (has_aborted())
3712     return;
3713 
3714   // We set this so that the regular clock knows that we're in the
3715   // middle of draining buffers and doesn't set the abort flag when it
3716   // notices that SATB buffers are available for draining. It'd be
3717   // very counter productive if it did that. :-)
3718   _draining_satb_buffers = true;
3719 
3720   CMObjectClosure oc(this);
3721   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3722   if (G1CollectedHeap::use_parallel_gc_threads())
3723     satb_mq_set.set_par_closure(_task_id, &oc);
3724   else
3725     satb_mq_set.set_closure(&oc);
3726 
3727   // This keeps claiming and applying the closure to completed buffers
3728   // until we run out of buffers or we need to abort.
3729   if (G1CollectedHeap::use_parallel_gc_threads()) {
3730     while (!has_aborted() &&
3731            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3732       if (_cm->verbose_medium())
3733         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3734       statsOnly( ++_satb_buffers_processed );
3735       regular_clock_call();
3736     }
3737   } else {
3738     while (!has_aborted() &&
3739            satb_mq_set.apply_closure_to_completed_buffer()) {
3740       if (_cm->verbose_medium())
3741         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3742       statsOnly( ++_satb_buffers_processed );
3743       regular_clock_call();
3744     }
3745   }
3746 
3747   if (!concurrent() && !has_aborted()) {
3748     // We should only do this during remark.
3749     if (G1CollectedHeap::use_parallel_gc_threads())
3750       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3751     else
3752       satb_mq_set.iterate_closure_all_threads();
3753   }
3754 
3755   _draining_satb_buffers = false;
3756 
3757   assert(has_aborted() ||
3758          concurrent() ||
3759          satb_mq_set.completed_buffers_num() == 0, "invariant");
3760 
3761   if (G1CollectedHeap::use_parallel_gc_threads())
3762     satb_mq_set.set_par_closure(_task_id, NULL);
3763   else
3764     satb_mq_set.set_closure(NULL);
3765 
3766   // again, this was a potentially expensive operation, decrease the
3767   // limits to get the regular clock call early
3768   decrease_limits();
3769 }
3770 
3771 void CMTask::drain_region_stack(BitMapClosure* bc) {
3772   if (has_aborted())
3773     return;
3774 
3775   assert(_region_finger == NULL,
3776          "it should be NULL when we're not scanning a region");
3777 
3778   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3779     if (_cm->verbose_low())
3780       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3781                              _task_id, _cm->region_stack_size());
3782 
3783     MemRegion mr;
3784 
3785     if (!_aborted_region.is_empty()) {
3786       mr = _aborted_region;
3787       _aborted_region = MemRegion();
3788 
3789       if (_cm->verbose_low())
3790         gclog_or_tty->print_cr("[%d] scanning aborted region [ " PTR_FORMAT ", " PTR_FORMAT " )",
3791                              _task_id, mr.start(), mr.end());
3792     } else {
3793       mr = _cm->region_stack_pop_lock_free();
3794       // it returns MemRegion() if the pop fails
3795       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3796     }
3797 
3798     while (mr.start() != NULL) {
3799       if (_cm->verbose_medium())
3800         gclog_or_tty->print_cr("[%d] we are scanning region "
3801                                "["PTR_FORMAT", "PTR_FORMAT")",
3802                                _task_id, mr.start(), mr.end());
3803 
3804       assert(mr.end() <= _cm->finger(),
3805              "otherwise the region shouldn't be on the stack");
3806       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3807       if (_nextMarkBitMap->iterate(bc, mr)) {
3808         assert(!has_aborted(),
3809                "cannot abort the task without aborting the bitmap iteration");
3810 
3811         // We finished iterating over the region without aborting.
3812         regular_clock_call();
3813         if (has_aborted())
3814           mr = MemRegion();
3815         else {
3816           mr = _cm->region_stack_pop_lock_free();
3817           // it returns MemRegion() if the pop fails
3818           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3819         }
3820       } else {
3821         assert(has_aborted(), "currently the only way to do so");
3822 
3823         // The only way to abort the bitmap iteration is to return
3824         // false from the do_bit() method. However, inside the
3825         // do_bit() method we move the _region_finger to point to the
3826         // object currently being looked at. So, if we bail out, we
3827         // have definitely set _region_finger to something non-null.
3828         assert(_region_finger != NULL, "invariant");
3829 
3830         // Make sure that any previously aborted region has been
3831         // cleared.
3832         assert(_aborted_region.is_empty(), "aborted region not cleared");
3833 
3834         // The iteration was actually aborted. So now _region_finger
3835         // points to the address of the object we last scanned. If we
3836         // leave it there, when we restart this task, we will rescan
3837         // the object. It is easy to avoid this. We move the finger by
3838         // enough to point to the next possible object header (the
3839         // bitmap knows by how much we need to move it as it knows its
3840         // granularity).
3841         MemRegion newRegion =
3842           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3843 
3844         if (!newRegion.is_empty()) {
3845           if (_cm->verbose_low()) {
3846             gclog_or_tty->print_cr("[%d] recording unscanned region"
3847                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3848                                    _task_id,
3849                                    newRegion.start(), newRegion.end());
3850           }
3851           // Now record the part of the region we didn't scan to
3852           // make sure this task scans it later.
3853           _aborted_region = newRegion;
3854         }
3855         // break from while
3856         mr = MemRegion();
3857       }
3858       _region_finger = NULL;
3859     }
3860 
3861     if (_cm->verbose_low())
3862       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3863                              _task_id, _cm->region_stack_size());
3864   }
3865 }
3866 
3867 void CMTask::print_stats() {
3868   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3869                          _task_id, _calls);
3870   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3871                          _elapsed_time_ms, _termination_time_ms);
3872   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3873                          _step_times_ms.num(), _step_times_ms.avg(),
3874                          _step_times_ms.sd());
3875   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3876                          _step_times_ms.maximum(), _step_times_ms.sum());
3877 
3878 #if _MARKING_STATS_
3879   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3880                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3881                          _all_clock_intervals_ms.sd());
3882   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3883                          _all_clock_intervals_ms.maximum(),
3884                          _all_clock_intervals_ms.sum());
3885   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3886                          _clock_due_to_scanning, _clock_due_to_marking);
3887   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3888                          _objs_scanned, _objs_found_on_bitmap);
3889   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3890                          _local_pushes, _local_pops, _local_max_size);
3891   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3892                          _global_pushes, _global_pops, _global_max_size);
3893   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3894                          _global_transfers_to,_global_transfers_from);
3895   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3896                          _regions_claimed, _region_stack_pops);
3897   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3898   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3899                          _steal_attempts, _steals);
3900   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3901   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3902                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3903   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3904                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3905 #endif // _MARKING_STATS_
3906 }
3907 
3908 /*****************************************************************************
3909 
3910     The do_marking_step(time_target_ms) method is the building block
3911     of the parallel marking framework. It can be called in parallel
3912     with other invocations of do_marking_step() on different tasks
3913     (but only one per task, obviously) and concurrently with the
3914     mutator threads, or during remark, hence it eliminates the need
3915     for two versions of the code. When called during remark, it will
3916     pick up from where the task left off during the concurrent marking
3917     phase. Interestingly, tasks are also claimable during evacuation
3918     pauses too, since do_marking_step() ensures that it aborts before
3919     it needs to yield.
3920 
3921     The data structures that is uses to do marking work are the
3922     following:
3923 
3924       (1) Marking Bitmap. If there are gray objects that appear only
3925       on the bitmap (this happens either when dealing with an overflow
3926       or when the initial marking phase has simply marked the roots
3927       and didn't push them on the stack), then tasks claim heap
3928       regions whose bitmap they then scan to find gray objects. A
3929       global finger indicates where the end of the last claimed region
3930       is. A local finger indicates how far into the region a task has
3931       scanned. The two fingers are used to determine how to gray an
3932       object (i.e. whether simply marking it is OK, as it will be
3933       visited by a task in the future, or whether it needs to be also
3934       pushed on a stack).
3935 
3936       (2) Local Queue. The local queue of the task which is accessed
3937       reasonably efficiently by the task. Other tasks can steal from
3938       it when they run out of work. Throughout the marking phase, a
3939       task attempts to keep its local queue short but not totally
3940       empty, so that entries are available for stealing by other
3941       tasks. Only when there is no more work, a task will totally
3942       drain its local queue.
3943 
3944       (3) Global Mark Stack. This handles local queue overflow. During
3945       marking only sets of entries are moved between it and the local
3946       queues, as access to it requires a mutex and more fine-grain
3947       interaction with it which might cause contention. If it
3948       overflows, then the marking phase should restart and iterate
3949       over the bitmap to identify gray objects. Throughout the marking
3950       phase, tasks attempt to keep the global mark stack at a small
3951       length but not totally empty, so that entries are available for
3952       popping by other tasks. Only when there is no more work, tasks
3953       will totally drain the global mark stack.
3954 
3955       (4) Global Region Stack. Entries on it correspond to areas of
3956       the bitmap that need to be scanned since they contain gray
3957       objects. Pushes on the region stack only happen during
3958       evacuation pauses and typically correspond to areas covered by
3959       GC LABS. If it overflows, then the marking phase should restart
3960       and iterate over the bitmap to identify gray objects. Tasks will
3961       try to totally drain the region stack as soon as possible.
3962 
3963       (5) SATB Buffer Queue. This is where completed SATB buffers are
3964       made available. Buffers are regularly removed from this queue
3965       and scanned for roots, so that the queue doesn't get too
3966       long. During remark, all completed buffers are processed, as
3967       well as the filled in parts of any uncompleted buffers.
3968 
3969     The do_marking_step() method tries to abort when the time target
3970     has been reached. There are a few other cases when the
3971     do_marking_step() method also aborts:
3972 
3973       (1) When the marking phase has been aborted (after a Full GC).
3974 
3975       (2) When a global overflow (either on the global stack or the
3976       region stack) has been triggered. Before the task aborts, it
3977       will actually sync up with the other tasks to ensure that all
3978       the marking data structures (local queues, stacks, fingers etc.)
3979       are re-initialised so that when do_marking_step() completes,
3980       the marking phase can immediately restart.
3981 
3982       (3) When enough completed SATB buffers are available. The
3983       do_marking_step() method only tries to drain SATB buffers right
3984       at the beginning. So, if enough buffers are available, the
3985       marking step aborts and the SATB buffers are processed at
3986       the beginning of the next invocation.
3987 
3988       (4) To yield. when we have to yield then we abort and yield
3989       right at the end of do_marking_step(). This saves us from a lot
3990       of hassle as, by yielding we might allow a Full GC. If this
3991       happens then objects will be compacted underneath our feet, the
3992       heap might shrink, etc. We save checking for this by just
3993       aborting and doing the yield right at the end.
3994 
3995     From the above it follows that the do_marking_step() method should
3996     be called in a loop (or, otherwise, regularly) until it completes.
3997 
3998     If a marking step completes without its has_aborted() flag being
3999     true, it means it has completed the current marking phase (and
4000     also all other marking tasks have done so and have all synced up).
4001 
4002     A method called regular_clock_call() is invoked "regularly" (in
4003     sub ms intervals) throughout marking. It is this clock method that
4004     checks all the abort conditions which were mentioned above and
4005     decides when the task should abort. A work-based scheme is used to
4006     trigger this clock method: when the number of object words the
4007     marking phase has scanned or the number of references the marking
4008     phase has visited reach a given limit. Additional invocations to
4009     the method clock have been planted in a few other strategic places
4010     too. The initial reason for the clock method was to avoid calling
4011     vtime too regularly, as it is quite expensive. So, once it was in
4012     place, it was natural to piggy-back all the other conditions on it
4013     too and not constantly check them throughout the code.
4014 
4015  *****************************************************************************/
4016 
4017 void CMTask::do_marking_step(double time_target_ms,
4018                              bool do_stealing,
4019                              bool do_termination) {
4020   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4021   assert(concurrent() == _cm->concurrent(), "they should be the same");
4022 
4023   assert(concurrent() || _cm->region_stack_empty(),
4024          "the region stack should have been cleared before remark");
4025   assert(concurrent() || !_cm->has_aborted_regions(),
4026          "aborted regions should have been cleared before remark");
4027   assert(_region_finger == NULL,
4028          "this should be non-null only when a region is being scanned");
4029 
4030   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4031   assert(_task_queues != NULL, "invariant");
4032   assert(_task_queue != NULL, "invariant");
4033   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4034 
4035   assert(!_claimed,
4036          "only one thread should claim this task at any one time");
4037 
4038   // OK, this doesn't safeguard again all possible scenarios, as it is
4039   // possible for two threads to set the _claimed flag at the same
4040   // time. But it is only for debugging purposes anyway and it will
4041   // catch most problems.
4042   _claimed = true;
4043 
4044   _start_time_ms = os::elapsedVTime() * 1000.0;
4045   statsOnly( _interval_start_time_ms = _start_time_ms );
4046 
4047   double diff_prediction_ms =
4048     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4049   _time_target_ms = time_target_ms - diff_prediction_ms;
4050 
4051   // set up the variables that are used in the work-based scheme to
4052   // call the regular clock method
4053   _words_scanned = 0;
4054   _refs_reached  = 0;
4055   recalculate_limits();
4056 
4057   // clear all flags
4058   clear_has_aborted();
4059   _has_timed_out = false;
4060   _draining_satb_buffers = false;
4061 
4062   ++_calls;
4063 
4064   if (_cm->verbose_low())
4065     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4066                            "target = %1.2lfms >>>>>>>>>>",
4067                            _task_id, _calls, _time_target_ms);
4068 
4069   // Set up the bitmap and oop closures. Anything that uses them is
4070   // eventually called from this method, so it is OK to allocate these
4071   // statically.
4072   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4073   CMOopClosure    oop_closure(_g1h, _cm, this);
4074   set_oop_closure(&oop_closure);
4075 
4076   if (_cm->has_overflown()) {
4077     // This can happen if the region stack or the mark stack overflows
4078     // during a GC pause and this task, after a yield point,
4079     // restarts. We have to abort as we need to get into the overflow
4080     // protocol which happens right at the end of this task.
4081     set_has_aborted();
4082   }
4083 
4084   // First drain any available SATB buffers. After this, we will not
4085   // look at SATB buffers before the next invocation of this method.
4086   // If enough completed SATB buffers are queued up, the regular clock
4087   // will abort this task so that it restarts.
4088   drain_satb_buffers();
4089   // ...then partially drain the local queue and the global stack
4090   drain_local_queue(true);
4091   drain_global_stack(true);
4092 
4093   // Then totally drain the region stack.  We will not look at
4094   // it again before the next invocation of this method. Entries on
4095   // the region stack are only added during evacuation pauses, for
4096   // which we have to yield. When we do, we abort the task anyway so
4097   // it will look at the region stack again when it restarts.
4098   bitmap_closure.set_scanning_heap_region(false);
4099   drain_region_stack(&bitmap_closure);
4100   // ...then partially drain the local queue and the global stack
4101   drain_local_queue(true);
4102   drain_global_stack(true);
4103 
4104   do {
4105     if (!has_aborted() && _curr_region != NULL) {
4106       // This means that we're already holding on to a region.
4107       assert(_finger != NULL, "if region is not NULL, then the finger "
4108              "should not be NULL either");
4109 
4110       // We might have restarted this task after an evacuation pause
4111       // which might have evacuated the region we're holding on to
4112       // underneath our feet. Let's read its limit again to make sure
4113       // that we do not iterate over a region of the heap that
4114       // contains garbage (update_region_limit() will also move
4115       // _finger to the start of the region if it is found empty).
4116       update_region_limit();
4117       // We will start from _finger not from the start of the region,
4118       // as we might be restarting this task after aborting half-way
4119       // through scanning this region. In this case, _finger points to
4120       // the address where we last found a marked object. If this is a
4121       // fresh region, _finger points to start().
4122       MemRegion mr = MemRegion(_finger, _region_limit);
4123 
4124       if (_cm->verbose_low())
4125         gclog_or_tty->print_cr("[%d] we're scanning part "
4126                                "["PTR_FORMAT", "PTR_FORMAT") "
4127                                "of region "PTR_FORMAT,
4128                                _task_id, _finger, _region_limit, _curr_region);
4129 
4130       // Let's iterate over the bitmap of the part of the
4131       // region that is left.
4132       bitmap_closure.set_scanning_heap_region(true);
4133       if (mr.is_empty() ||
4134           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4135         // We successfully completed iterating over the region. Now,
4136         // let's give up the region.
4137         giveup_current_region();
4138         regular_clock_call();
4139       } else {
4140         assert(has_aborted(), "currently the only way to do so");
4141         // The only way to abort the bitmap iteration is to return
4142         // false from the do_bit() method. However, inside the
4143         // do_bit() method we move the _finger to point to the
4144         // object currently being looked at. So, if we bail out, we
4145         // have definitely set _finger to something non-null.
4146         assert(_finger != NULL, "invariant");
4147 
4148         // Region iteration was actually aborted. So now _finger
4149         // points to the address of the object we last scanned. If we
4150         // leave it there, when we restart this task, we will rescan
4151         // the object. It is easy to avoid this. We move the finger by
4152         // enough to point to the next possible object header (the
4153         // bitmap knows by how much we need to move it as it knows its
4154         // granularity).
4155         assert(_finger < _region_limit, "invariant");
4156         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4157         // Check if bitmap iteration was aborted while scanning the last object
4158         if (new_finger >= _region_limit) {
4159             giveup_current_region();
4160         } else {
4161             move_finger_to(new_finger);
4162         }
4163       }
4164     }
4165     // At this point we have either completed iterating over the
4166     // region we were holding on to, or we have aborted.
4167 
4168     // We then partially drain the local queue and the global stack.
4169     // (Do we really need this?)
4170     drain_local_queue(true);
4171     drain_global_stack(true);
4172 
4173     // Read the note on the claim_region() method on why it might
4174     // return NULL with potentially more regions available for
4175     // claiming and why we have to check out_of_regions() to determine
4176     // whether we're done or not.
4177     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4178       // We are going to try to claim a new region. We should have
4179       // given up on the previous one.
4180       // Separated the asserts so that we know which one fires.
4181       assert(_curr_region  == NULL, "invariant");
4182       assert(_finger       == NULL, "invariant");
4183       assert(_region_limit == NULL, "invariant");
4184       if (_cm->verbose_low())
4185         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4186       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4187       if (claimed_region != NULL) {
4188         // Yes, we managed to claim one
4189         statsOnly( ++_regions_claimed );
4190 
4191         if (_cm->verbose_low())
4192           gclog_or_tty->print_cr("[%d] we successfully claimed "
4193                                  "region "PTR_FORMAT,
4194                                  _task_id, claimed_region);
4195 
4196         setup_for_region(claimed_region);
4197         assert(_curr_region == claimed_region, "invariant");
4198       }
4199       // It is important to call the regular clock here. It might take
4200       // a while to claim a region if, for example, we hit a large
4201       // block of empty regions. So we need to call the regular clock
4202       // method once round the loop to make sure it's called
4203       // frequently enough.
4204       regular_clock_call();
4205     }
4206 
4207     if (!has_aborted() && _curr_region == NULL) {
4208       assert(_cm->out_of_regions(),
4209              "at this point we should be out of regions");
4210     }
4211   } while ( _curr_region != NULL && !has_aborted());
4212 
4213   if (!has_aborted()) {
4214     // We cannot check whether the global stack is empty, since other
4215     // tasks might be pushing objects to it concurrently. We also cannot
4216     // check if the region stack is empty because if a thread is aborting
4217     // it can push a partially done region back.
4218     assert(_cm->out_of_regions(),
4219            "at this point we should be out of regions");
4220 
4221     if (_cm->verbose_low())
4222       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4223 
4224     // Try to reduce the number of available SATB buffers so that
4225     // remark has less work to do.
4226     drain_satb_buffers();
4227   }
4228 
4229   // Since we've done everything else, we can now totally drain the
4230   // local queue and global stack.
4231   drain_local_queue(false);
4232   drain_global_stack(false);
4233 
4234   // Attempt at work stealing from other task's queues.
4235   if (do_stealing && !has_aborted()) {
4236     // We have not aborted. This means that we have finished all that
4237     // we could. Let's try to do some stealing...
4238 
4239     // We cannot check whether the global stack is empty, since other
4240     // tasks might be pushing objects to it concurrently. We also cannot
4241     // check if the region stack is empty because if a thread is aborting
4242     // it can push a partially done region back.
4243     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4244            "only way to reach here");
4245 
4246     if (_cm->verbose_low())
4247       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4248 
4249     while (!has_aborted()) {
4250       oop obj;
4251       statsOnly( ++_steal_attempts );
4252 
4253       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4254         if (_cm->verbose_medium())
4255           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4256                                  _task_id, (void*) obj);
4257 
4258         statsOnly( ++_steals );
4259 
4260         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4261                "any stolen object should be marked");
4262         scan_object(obj);
4263 
4264         // And since we're towards the end, let's totally drain the
4265         // local queue and global stack.
4266         drain_local_queue(false);
4267         drain_global_stack(false);
4268       } else {
4269         break;
4270       }
4271     }
4272   }
4273 
4274   // We still haven't aborted. Now, let's try to get into the
4275   // termination protocol.
4276   if (do_termination && !has_aborted()) {
4277     // We cannot check whether the global stack is empty, since other
4278     // tasks might be concurrently pushing objects on it. We also cannot
4279     // check if the region stack is empty because if a thread is aborting
4280     // it can push a partially done region back.
4281     // Separated the asserts so that we know which one fires.
4282     assert(_cm->out_of_regions(), "only way to reach here");
4283     assert(_task_queue->size() == 0, "only way to reach here");
4284 
4285     if (_cm->verbose_low())
4286       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4287 
4288     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4289     // The CMTask class also extends the TerminatorTerminator class,
4290     // hence its should_exit_termination() method will also decide
4291     // whether to exit the termination protocol or not.
4292     bool finished = _cm->terminator()->offer_termination(this);
4293     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4294     _termination_time_ms +=
4295       termination_end_time_ms - _termination_start_time_ms;
4296 
4297     if (finished) {
4298       // We're all done.
4299 
4300       if (_task_id == 0) {
4301         // let's allow task 0 to do this
4302         if (concurrent()) {
4303           assert(_cm->concurrent_marking_in_progress(), "invariant");
4304           // we need to set this to false before the next
4305           // safepoint. This way we ensure that the marking phase
4306           // doesn't observe any more heap expansions.
4307           _cm->clear_concurrent_marking_in_progress();
4308         }
4309       }
4310 
4311       // We can now guarantee that the global stack is empty, since
4312       // all other tasks have finished. We separated the guarantees so
4313       // that, if a condition is false, we can immediately find out
4314       // which one.
4315       guarantee(_cm->out_of_regions(), "only way to reach here");
4316       guarantee(_aborted_region.is_empty(), "only way to reach here");
4317       guarantee(_cm->region_stack_empty(), "only way to reach here");
4318       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4319       guarantee(_task_queue->size() == 0, "only way to reach here");
4320       guarantee(!_cm->has_overflown(), "only way to reach here");
4321       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4322       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4323 
4324       if (_cm->verbose_low())
4325         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4326     } else {
4327       // Apparently there's more work to do. Let's abort this task. It
4328       // will restart it and we can hopefully find more things to do.
4329 
4330       if (_cm->verbose_low())
4331         gclog_or_tty->print_cr("[%d] apparently there is more work to do", _task_id);
4332 
4333       set_has_aborted();
4334       statsOnly( ++_aborted_termination );
4335     }
4336   }
4337 
4338   // Mainly for debugging purposes to make sure that a pointer to the
4339   // closure which was statically allocated in this frame doesn't
4340   // escape it by accident.
4341   set_oop_closure(NULL);
4342   double end_time_ms = os::elapsedVTime() * 1000.0;
4343   double elapsed_time_ms = end_time_ms - _start_time_ms;
4344   // Update the step history.
4345   _step_times_ms.add(elapsed_time_ms);
4346 
4347   if (has_aborted()) {
4348     // The task was aborted for some reason.
4349 
4350     statsOnly( ++_aborted );
4351 
4352     if (_has_timed_out) {
4353       double diff_ms = elapsed_time_ms - _time_target_ms;
4354       // Keep statistics of how well we did with respect to hitting
4355       // our target only if we actually timed out (if we aborted for
4356       // other reasons, then the results might get skewed).
4357       _marking_step_diffs_ms.add(diff_ms);
4358     }
4359 
4360     if (_cm->has_overflown()) {
4361       // This is the interesting one. We aborted because a global
4362       // overflow was raised. This means we have to restart the
4363       // marking phase and start iterating over regions. However, in
4364       // order to do this we have to make sure that all tasks stop
4365       // what they are doing and re-initialise in a safe manner. We
4366       // will achieve this with the use of two barrier sync points.
4367 
4368       if (_cm->verbose_low())
4369         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4370 
4371       _cm->enter_first_sync_barrier(_task_id);
4372       // When we exit this sync barrier we know that all tasks have
4373       // stopped doing marking work. So, it's now safe to
4374       // re-initialise our data structures. At the end of this method,
4375       // task 0 will clear the global data structures.
4376 
4377       statsOnly( ++_aborted_overflow );
4378 
4379       // We clear the local state of this task...
4380       clear_region_fields();
4381 
4382       // ...and enter the second barrier.
4383       _cm->enter_second_sync_barrier(_task_id);
4384       // At this point everything has bee re-initialised and we're
4385       // ready to restart.
4386     }
4387 
4388     if (_cm->verbose_low()) {
4389       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4390                              "elapsed = %1.2lfms <<<<<<<<<<",
4391                              _task_id, _time_target_ms, elapsed_time_ms);
4392       if (_cm->has_aborted())
4393         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4394                                _task_id);
4395     }
4396   } else {
4397     if (_cm->verbose_low())
4398       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4399                              "elapsed = %1.2lfms <<<<<<<<<<",
4400                              _task_id, _time_target_ms, elapsed_time_ms);
4401   }
4402 
4403   _claimed = false;
4404 }
4405 
4406 CMTask::CMTask(int task_id,
4407                ConcurrentMark* cm,
4408                CMTaskQueue* task_queue,
4409                CMTaskQueueSet* task_queues)
4410   : _g1h(G1CollectedHeap::heap()),
4411     _task_id(task_id), _cm(cm),
4412     _claimed(false),
4413     _nextMarkBitMap(NULL), _hash_seed(17),
4414     _task_queue(task_queue),
4415     _task_queues(task_queues),
4416     _oop_closure(NULL),
4417     _aborted_region(MemRegion()) {
4418   guarantee(task_queue != NULL, "invariant");
4419   guarantee(task_queues != NULL, "invariant");
4420 
4421   statsOnly( _clock_due_to_scanning = 0;
4422              _clock_due_to_marking  = 0 );
4423 
4424   _marking_step_diffs_ms.add(0.5);
4425 }