1 /*
   2  * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
   3  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
   4  *
   5  * This code is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License version 2 only, as
   7  * published by the Free Software Foundation.
   8  *
   9  * This code is distributed in the hope that it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  12  * version 2 for more details (a copy is included in the LICENSE file that
  13  * accompanied this code).
  14  *
  15  * You should have received a copy of the GNU General Public License version
  16  * 2 along with this work; if not, write to the Free Software Foundation,
  17  * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  18  *
  19  * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
  20  * or visit www.oracle.com if you need additional information or have any
  21  * questions.
  22  *
  23  */
  24 
  25 #include "precompiled.hpp"
  26 #include "classfile/symbolTable.hpp"
  27 #include "gc_implementation/g1/concurrentMark.inline.hpp"
  28 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
  29 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
  30 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
  31 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
  32 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
  33 #include "gc_implementation/g1/g1RemSet.hpp"
  34 #include "gc_implementation/g1/heapRegionRemSet.hpp"
  35 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
  36 #include "gc_implementation/shared/vmGCOperations.hpp"
  37 #include "memory/genOopClosures.inline.hpp"
  38 #include "memory/referencePolicy.hpp"
  39 #include "memory/resourceArea.hpp"
  40 #include "oops/oop.inline.hpp"
  41 #include "runtime/handles.inline.hpp"
  42 #include "runtime/java.hpp"
  43 
  44 //
  45 // CMS Bit Map Wrapper
  46 
  47 CMBitMapRO::CMBitMapRO(ReservedSpace rs, int shifter):
  48   _bm((uintptr_t*)NULL,0),
  49   _shifter(shifter) {
  50   _bmStartWord = (HeapWord*)(rs.base());
  51   _bmWordSize  = rs.size()/HeapWordSize;    // rs.size() is in bytes
  52   ReservedSpace brs(ReservedSpace::allocation_align_size_up(
  53                      (_bmWordSize >> (_shifter + LogBitsPerByte)) + 1));
  54 
  55   guarantee(brs.is_reserved(), "couldn't allocate CMS bit map");
  56   // For now we'll just commit all of the bit map up fromt.
  57   // Later on we'll try to be more parsimonious with swap.
  58   guarantee(_virtual_space.initialize(brs, brs.size()),
  59             "couldn't reseve backing store for CMS bit map");
  60   assert(_virtual_space.committed_size() == brs.size(),
  61          "didn't reserve backing store for all of CMS bit map?");
  62   _bm.set_map((uintptr_t*)_virtual_space.low());
  63   assert(_virtual_space.committed_size() << (_shifter + LogBitsPerByte) >=
  64          _bmWordSize, "inconsistency in bit map sizing");
  65   _bm.set_size(_bmWordSize >> _shifter);
  66 }
  67 
  68 HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
  69                                                HeapWord* limit) const {
  70   // First we must round addr *up* to a possible object boundary.
  71   addr = (HeapWord*)align_size_up((intptr_t)addr,
  72                                   HeapWordSize << _shifter);
  73   size_t addrOffset = heapWordToOffset(addr);
  74   if (limit == NULL) {
  75     limit = _bmStartWord + _bmWordSize;
  76   }
  77   size_t limitOffset = heapWordToOffset(limit);
  78   size_t nextOffset = _bm.get_next_one_offset(addrOffset, limitOffset);
  79   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  80   assert(nextAddr >= addr, "get_next_one postcondition");
  81   assert(nextAddr == limit || isMarked(nextAddr),
  82          "get_next_one postcondition");
  83   return nextAddr;
  84 }
  85 
  86 HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
  87                                                  HeapWord* limit) const {
  88   size_t addrOffset = heapWordToOffset(addr);
  89   if (limit == NULL) {
  90     limit = _bmStartWord + _bmWordSize;
  91   }
  92   size_t limitOffset = heapWordToOffset(limit);
  93   size_t nextOffset = _bm.get_next_zero_offset(addrOffset, limitOffset);
  94   HeapWord* nextAddr = offsetToHeapWord(nextOffset);
  95   assert(nextAddr >= addr, "get_next_one postcondition");
  96   assert(nextAddr == limit || !isMarked(nextAddr),
  97          "get_next_one postcondition");
  98   return nextAddr;
  99 }
 100 
 101 int CMBitMapRO::heapWordDiffToOffsetDiff(size_t diff) const {
 102   assert((diff & ((1 << _shifter) - 1)) == 0, "argument check");
 103   return (int) (diff >> _shifter);
 104 }
 105 
 106 bool CMBitMapRO::iterate(BitMapClosure* cl, MemRegion mr) {
 107   HeapWord* left  = MAX2(_bmStartWord, mr.start());
 108   HeapWord* right = MIN2(_bmStartWord + _bmWordSize, mr.end());
 109   if (right > left) {
 110     // Right-open interval [leftOffset, rightOffset).
 111     return _bm.iterate(cl, heapWordToOffset(left), heapWordToOffset(right));
 112   } else {
 113     return true;
 114   }
 115 }
 116 
 117 void CMBitMapRO::mostly_disjoint_range_union(BitMap*   from_bitmap,
 118                                              size_t    from_start_index,
 119                                              HeapWord* to_start_word,
 120                                              size_t    word_num) {
 121   _bm.mostly_disjoint_range_union(from_bitmap,
 122                                   from_start_index,
 123                                   heapWordToOffset(to_start_word),
 124                                   word_num);
 125 }
 126 
 127 #ifndef PRODUCT
 128 bool CMBitMapRO::covers(ReservedSpace rs) const {
 129   // assert(_bm.map() == _virtual_space.low(), "map inconsistency");
 130   assert(((size_t)_bm.size() * (size_t)(1 << _shifter)) == _bmWordSize,
 131          "size inconsistency");
 132   return _bmStartWord == (HeapWord*)(rs.base()) &&
 133          _bmWordSize  == rs.size()>>LogHeapWordSize;
 134 }
 135 #endif
 136 
 137 void CMBitMap::clearAll() {
 138   _bm.clear();
 139   return;
 140 }
 141 
 142 void CMBitMap::markRange(MemRegion mr) {
 143   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 144   assert(!mr.is_empty(), "unexpected empty region");
 145   assert((offsetToHeapWord(heapWordToOffset(mr.end())) ==
 146           ((HeapWord *) mr.end())),
 147          "markRange memory region end is not card aligned");
 148   // convert address range into offset range
 149   _bm.at_put_range(heapWordToOffset(mr.start()),
 150                    heapWordToOffset(mr.end()), true);
 151 }
 152 
 153 void CMBitMap::clearRange(MemRegion mr) {
 154   mr.intersection(MemRegion(_bmStartWord, _bmWordSize));
 155   assert(!mr.is_empty(), "unexpected empty region");
 156   // convert address range into offset range
 157   _bm.at_put_range(heapWordToOffset(mr.start()),
 158                    heapWordToOffset(mr.end()), false);
 159 }
 160 
 161 MemRegion CMBitMap::getAndClearMarkedRegion(HeapWord* addr,
 162                                             HeapWord* end_addr) {
 163   HeapWord* start = getNextMarkedWordAddress(addr);
 164   start = MIN2(start, end_addr);
 165   HeapWord* end   = getNextUnmarkedWordAddress(start);
 166   end = MIN2(end, end_addr);
 167   assert(start <= end, "Consistency check");
 168   MemRegion mr(start, end);
 169   if (!mr.is_empty()) {
 170     clearRange(mr);
 171   }
 172   return mr;
 173 }
 174 
 175 CMMarkStack::CMMarkStack(ConcurrentMark* cm) :
 176   _base(NULL), _cm(cm)
 177 #ifdef ASSERT
 178   , _drain_in_progress(false)
 179   , _drain_in_progress_yields(false)
 180 #endif
 181 {}
 182 
 183 void CMMarkStack::allocate(size_t size) {
 184   _base = NEW_C_HEAP_ARRAY(oop, size);
 185   if (_base == NULL) {
 186     vm_exit_during_initialization("Failed to allocate "
 187                                   "CM region mark stack");
 188   }
 189   _index = 0;
 190   _capacity = (jint) size;
 191   _oops_do_bound = -1;
 192   NOT_PRODUCT(_max_depth = 0);
 193 }
 194 
 195 CMMarkStack::~CMMarkStack() {
 196   if (_base != NULL) {
 197     FREE_C_HEAP_ARRAY(oop, _base);
 198   }
 199 }
 200 
 201 void CMMarkStack::par_push(oop ptr) {
 202   while (true) {
 203     if (isFull()) {
 204       _overflow = true;
 205       return;
 206     }
 207     // Otherwise...
 208     jint index = _index;
 209     jint next_index = index+1;
 210     jint res = Atomic::cmpxchg(next_index, &_index, index);
 211     if (res == index) {
 212       _base[index] = ptr;
 213       // Note that we don't maintain this atomically.  We could, but it
 214       // doesn't seem necessary.
 215       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 216       return;
 217     }
 218     // Otherwise, we need to try again.
 219   }
 220 }
 221 
 222 void CMMarkStack::par_adjoin_arr(oop* ptr_arr, int n) {
 223   while (true) {
 224     if (isFull()) {
 225       _overflow = true;
 226       return;
 227     }
 228     // Otherwise...
 229     jint index = _index;
 230     jint next_index = index + n;
 231     if (next_index > _capacity) {
 232       _overflow = true;
 233       return;
 234     }
 235     jint res = Atomic::cmpxchg(next_index, &_index, index);
 236     if (res == index) {
 237       for (int i = 0; i < n; i++) {
 238         int ind = index + i;
 239         assert(ind < _capacity, "By overflow test above.");
 240         _base[ind] = ptr_arr[i];
 241       }
 242       NOT_PRODUCT(_max_depth = MAX2(_max_depth, next_index));
 243       return;
 244     }
 245     // Otherwise, we need to try again.
 246   }
 247 }
 248 
 249 
 250 void CMMarkStack::par_push_arr(oop* ptr_arr, int n) {
 251   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 252   jint start = _index;
 253   jint next_index = start + n;
 254   if (next_index > _capacity) {
 255     _overflow = true;
 256     return;
 257   }
 258   // Otherwise.
 259   _index = next_index;
 260   for (int i = 0; i < n; i++) {
 261     int ind = start + i;
 262     assert(ind < _capacity, "By overflow test above.");
 263     _base[ind] = ptr_arr[i];
 264   }
 265 }
 266 
 267 
 268 bool CMMarkStack::par_pop_arr(oop* ptr_arr, int max, int* n) {
 269   MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
 270   jint index = _index;
 271   if (index == 0) {
 272     *n = 0;
 273     return false;
 274   } else {
 275     int k = MIN2(max, index);
 276     jint new_ind = index - k;
 277     for (int j = 0; j < k; j++) {
 278       ptr_arr[j] = _base[new_ind + j];
 279     }
 280     _index = new_ind;
 281     *n = k;
 282     return true;
 283   }
 284 }
 285 
 286 
 287 CMRegionStack::CMRegionStack() : _base(NULL) {}
 288 
 289 void CMRegionStack::allocate(size_t size) {
 290   _base = NEW_C_HEAP_ARRAY(MemRegion, size);
 291   if (_base == NULL) {
 292     vm_exit_during_initialization("Failed to allocate CM region mark stack");
 293   }
 294   _index = 0;
 295   _capacity = (jint) size;
 296 }
 297 
 298 CMRegionStack::~CMRegionStack() {
 299   if (_base != NULL) {
 300     FREE_C_HEAP_ARRAY(oop, _base);
 301   }
 302 }
 303 
 304 void CMRegionStack::push_lock_free(MemRegion mr) {
 305   assert(mr.word_size() > 0, "Precondition");
 306   while (true) {
 307     jint index = _index;
 308 
 309     if (index >= _capacity) {
 310       _overflow = true;
 311       return;
 312     }
 313     // Otherwise...
 314     jint next_index = index+1;
 315     jint res = Atomic::cmpxchg(next_index, &_index, index);
 316     if (res == index) {
 317       _base[index] = mr;
 318       return;
 319     }
 320     // Otherwise, we need to try again.
 321   }
 322 }
 323 
 324 // Lock-free pop of the region stack. Called during the concurrent
 325 // marking / remark phases. Should only be called in tandem with
 326 // other lock-free pops.
 327 MemRegion CMRegionStack::pop_lock_free() {
 328   while (true) {
 329     jint index = _index;
 330 
 331     if (index == 0) {
 332       return MemRegion();
 333     }
 334     // Otherwise...
 335     jint next_index = index-1;
 336     jint res = Atomic::cmpxchg(next_index, &_index, index);
 337     if (res == index) {
 338       MemRegion mr = _base[next_index];
 339       if (mr.start() != NULL) {
 340         assert(mr.end() != NULL, "invariant");
 341         assert(mr.word_size() > 0, "invariant");
 342         return mr;
 343       } else {
 344         // that entry was invalidated... let's skip it
 345         assert(mr.end() == NULL, "invariant");
 346       }
 347     }
 348     // Otherwise, we need to try again.
 349   }
 350 }
 351 
 352 #if 0
 353 // The routines that manipulate the region stack with a lock are
 354 // not currently used. They should be retained, however, as a
 355 // diagnostic aid.
 356 
 357 void CMRegionStack::push_with_lock(MemRegion mr) {
 358   assert(mr.word_size() > 0, "Precondition");
 359   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 360 
 361   if (isFull()) {
 362     _overflow = true;
 363     return;
 364   }
 365 
 366   _base[_index] = mr;
 367   _index += 1;
 368 }
 369 
 370 MemRegion CMRegionStack::pop_with_lock() {
 371   MutexLockerEx x(CMRegionStack_lock, Mutex::_no_safepoint_check_flag);
 372 
 373   while (true) {
 374     if (_index == 0) {
 375       return MemRegion();
 376     }
 377     _index -= 1;
 378 
 379     MemRegion mr = _base[_index];
 380     if (mr.start() != NULL) {
 381       assert(mr.end() != NULL, "invariant");
 382       assert(mr.word_size() > 0, "invariant");
 383       return mr;
 384     } else {
 385       // that entry was invalidated... let's skip it
 386       assert(mr.end() == NULL, "invariant");
 387     }
 388   }
 389 }
 390 #endif
 391 
 392 bool CMRegionStack::invalidate_entries_into_cset() {
 393   bool result = false;
 394   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 395   for (int i = 0; i < _oops_do_bound; ++i) {
 396     MemRegion mr = _base[i];
 397     if (mr.start() != NULL) {
 398       assert(mr.end() != NULL, "invariant");
 399       assert(mr.word_size() > 0, "invariant");
 400       HeapRegion* hr = g1h->heap_region_containing(mr.start());
 401       assert(hr != NULL, "invariant");
 402       if (hr->in_collection_set()) {
 403         // The region points into the collection set
 404         _base[i] = MemRegion();
 405         result = true;
 406       }
 407     } else {
 408       // that entry was invalidated... let's skip it
 409       assert(mr.end() == NULL, "invariant");
 410     }
 411   }
 412   return result;
 413 }
 414 
 415 template<class OopClosureClass>
 416 bool CMMarkStack::drain(OopClosureClass* cl, CMBitMap* bm, bool yield_after) {
 417   assert(!_drain_in_progress || !_drain_in_progress_yields || yield_after
 418          || SafepointSynchronize::is_at_safepoint(),
 419          "Drain recursion must be yield-safe.");
 420   bool res = true;
 421   debug_only(_drain_in_progress = true);
 422   debug_only(_drain_in_progress_yields = yield_after);
 423   while (!isEmpty()) {
 424     oop newOop = pop();
 425     assert(G1CollectedHeap::heap()->is_in_reserved(newOop), "Bad pop");
 426     assert(newOop->is_oop(), "Expected an oop");
 427     assert(bm == NULL || bm->isMarked((HeapWord*)newOop),
 428            "only grey objects on this stack");
 429     // iterate over the oops in this oop, marking and pushing
 430     // the ones in CMS generation.
 431     newOop->oop_iterate(cl);
 432     if (yield_after && _cm->do_yield_check()) {
 433       res = false;
 434       break;
 435     }
 436   }
 437   debug_only(_drain_in_progress = false);
 438   return res;
 439 }
 440 
 441 void CMMarkStack::oops_do(OopClosure* f) {
 442   if (_index == 0) return;
 443   assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
 444          "Bound must be set.");
 445   for (int i = 0; i < _oops_do_bound; i++) {
 446     f->do_oop(&_base[i]);
 447   }
 448   _oops_do_bound = -1;
 449 }
 450 
 451 bool ConcurrentMark::not_yet_marked(oop obj) const {
 452   return (_g1h->is_obj_ill(obj)
 453           || (_g1h->is_in_permanent(obj)
 454               && !nextMarkBitMap()->isMarked((HeapWord*)obj)));
 455 }
 456 
 457 #ifdef _MSC_VER // the use of 'this' below gets a warning, make it go away
 458 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 459 #endif // _MSC_VER
 460 
 461 ConcurrentMark::ConcurrentMark(ReservedSpace rs,
 462                                int max_regions) :
 463   _markBitMap1(rs, MinObjAlignment - 1),
 464   _markBitMap2(rs, MinObjAlignment - 1),
 465 
 466   _parallel_marking_threads(0),
 467   _sleep_factor(0.0),
 468   _marking_task_overhead(1.0),
 469   _cleanup_sleep_factor(0.0),
 470   _cleanup_task_overhead(1.0),
 471   _cleanup_list("Cleanup List"),
 472   _region_bm(max_regions, false /* in_resource_area*/),
 473   _card_bm((rs.size() + CardTableModRefBS::card_size - 1) >>
 474            CardTableModRefBS::card_shift,
 475            false /* in_resource_area*/),
 476   _prevMarkBitMap(&_markBitMap1),
 477   _nextMarkBitMap(&_markBitMap2),
 478   _at_least_one_mark_complete(false),
 479 
 480   _markStack(this),
 481   _regionStack(),
 482   // _finger set in set_non_marking_state
 483 
 484   _max_task_num(MAX2(ParallelGCThreads, (size_t)1)),
 485   // _active_tasks set in set_non_marking_state
 486   // _tasks set inside the constructor
 487   _task_queues(new CMTaskQueueSet((int) _max_task_num)),
 488   _terminator(ParallelTaskTerminator((int) _max_task_num, _task_queues)),
 489 
 490   _has_overflown(false),
 491   _concurrent(false),
 492   _has_aborted(false),
 493   _restart_for_overflow(false),
 494   _concurrent_marking_in_progress(false),
 495   _should_gray_objects(false),
 496 
 497   // _verbose_level set below
 498 
 499   _init_times(),
 500   _remark_times(), _remark_mark_times(), _remark_weak_ref_times(),
 501   _cleanup_times(),
 502   _total_counting_time(0.0),
 503   _total_rs_scrub_time(0.0),
 504 
 505   _parallel_workers(NULL) {
 506   CMVerboseLevel verbose_level = (CMVerboseLevel) G1MarkingVerboseLevel;
 507   if (verbose_level < no_verbose) {
 508     verbose_level = no_verbose;
 509   }
 510   if (verbose_level > high_verbose) {
 511     verbose_level = high_verbose;
 512   }
 513   _verbose_level = verbose_level;
 514 
 515   if (verbose_low()) {
 516     gclog_or_tty->print_cr("[global] init, heap start = "PTR_FORMAT", "
 517                            "heap end = "PTR_FORMAT, _heap_start, _heap_end);
 518   }
 519 
 520   _markStack.allocate(MarkStackSize);
 521   _regionStack.allocate(G1MarkRegionStackSize);
 522 
 523   // Create & start a ConcurrentMark thread.
 524   _cmThread = new ConcurrentMarkThread(this);
 525   assert(cmThread() != NULL, "CM Thread should have been created");
 526   assert(cmThread()->cm() != NULL, "CM Thread should refer to this cm");
 527 
 528   _g1h = G1CollectedHeap::heap();
 529   assert(CGC_lock != NULL, "Where's the CGC_lock?");
 530   assert(_markBitMap1.covers(rs), "_markBitMap1 inconsistency");
 531   assert(_markBitMap2.covers(rs), "_markBitMap2 inconsistency");
 532 
 533   SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
 534   satb_qs.set_buffer_size(G1SATBBufferSize);
 535 
 536   _tasks = NEW_C_HEAP_ARRAY(CMTask*, _max_task_num);
 537   _accum_task_vtime = NEW_C_HEAP_ARRAY(double, _max_task_num);
 538 
 539   // so that the assertion in MarkingTaskQueue::task_queue doesn't fail
 540   _active_tasks = _max_task_num;
 541   for (int i = 0; i < (int) _max_task_num; ++i) {
 542     CMTaskQueue* task_queue = new CMTaskQueue();
 543     task_queue->initialize();
 544     _task_queues->register_queue(i, task_queue);
 545 
 546     _tasks[i] = new CMTask(i, this, task_queue, _task_queues);
 547     _accum_task_vtime[i] = 0.0;
 548   }
 549 
 550   if (ConcGCThreads > ParallelGCThreads) {
 551     vm_exit_during_initialization("Can't have more ConcGCThreads "
 552                                   "than ParallelGCThreads.");
 553   }
 554   if (ParallelGCThreads == 0) {
 555     // if we are not running with any parallel GC threads we will not
 556     // spawn any marking threads either
 557     _parallel_marking_threads =   0;
 558     _sleep_factor             = 0.0;
 559     _marking_task_overhead    = 1.0;
 560   } else {
 561     if (ConcGCThreads > 0) {
 562       // notice that ConcGCThreads overwrites G1MarkingOverheadPercent
 563       // if both are set
 564 
 565       _parallel_marking_threads = ConcGCThreads;
 566       _sleep_factor             = 0.0;
 567       _marking_task_overhead    = 1.0;
 568     } else if (G1MarkingOverheadPercent > 0) {
 569       // we will calculate the number of parallel marking threads
 570       // based on a target overhead with respect to the soft real-time
 571       // goal
 572 
 573       double marking_overhead = (double) G1MarkingOverheadPercent / 100.0;
 574       double overall_cm_overhead =
 575         (double) MaxGCPauseMillis * marking_overhead /
 576         (double) GCPauseIntervalMillis;
 577       double cpu_ratio = 1.0 / (double) os::processor_count();
 578       double marking_thread_num = ceil(overall_cm_overhead / cpu_ratio);
 579       double marking_task_overhead =
 580         overall_cm_overhead / marking_thread_num *
 581                                                 (double) os::processor_count();
 582       double sleep_factor =
 583                          (1.0 - marking_task_overhead) / marking_task_overhead;
 584 
 585       _parallel_marking_threads = (size_t) marking_thread_num;
 586       _sleep_factor             = sleep_factor;
 587       _marking_task_overhead    = marking_task_overhead;
 588     } else {
 589       _parallel_marking_threads = MAX2((ParallelGCThreads + 2) / 4, (size_t)1);
 590       _sleep_factor             = 0.0;
 591       _marking_task_overhead    = 1.0;
 592     }
 593 
 594     if (parallel_marking_threads() > 1) {
 595       _cleanup_task_overhead = 1.0;
 596     } else {
 597       _cleanup_task_overhead = marking_task_overhead();
 598     }
 599     _cleanup_sleep_factor =
 600                      (1.0 - cleanup_task_overhead()) / cleanup_task_overhead();
 601 
 602 #if 0
 603     gclog_or_tty->print_cr("Marking Threads          %d", parallel_marking_threads());
 604     gclog_or_tty->print_cr("CM Marking Task Overhead %1.4lf", marking_task_overhead());
 605     gclog_or_tty->print_cr("CM Sleep Factor          %1.4lf", sleep_factor());
 606     gclog_or_tty->print_cr("CL Marking Task Overhead %1.4lf", cleanup_task_overhead());
 607     gclog_or_tty->print_cr("CL Sleep Factor          %1.4lf", cleanup_sleep_factor());
 608 #endif
 609 
 610     guarantee(parallel_marking_threads() > 0, "peace of mind");
 611     _parallel_workers = new FlexibleWorkGang("G1 Parallel Marking Threads",
 612          (int) _parallel_marking_threads, false, true);
 613     if (_parallel_workers == NULL) {
 614       vm_exit_during_initialization("Failed necessary allocation.");
 615     } else {
 616       _parallel_workers->initialize_workers();
 617     }
 618   }
 619 
 620   // so that the call below can read a sensible value
 621   _heap_start = (HeapWord*) rs.base();
 622   set_non_marking_state();
 623 }
 624 
 625 void ConcurrentMark::update_g1_committed(bool force) {
 626   // If concurrent marking is not in progress, then we do not need to
 627   // update _heap_end. This has a subtle and important
 628   // side-effect. Imagine that two evacuation pauses happen between
 629   // marking completion and remark. The first one can grow the
 630   // heap (hence now the finger is below the heap end). Then, the
 631   // second one could unnecessarily push regions on the region
 632   // stack. This causes the invariant that the region stack is empty
 633   // at the beginning of remark to be false. By ensuring that we do
 634   // not observe heap expansions after marking is complete, then we do
 635   // not have this problem.
 636   if (!concurrent_marking_in_progress() && !force) return;
 637 
 638   MemRegion committed = _g1h->g1_committed();
 639   assert(committed.start() == _heap_start, "start shouldn't change");
 640   HeapWord* new_end = committed.end();
 641   if (new_end > _heap_end) {
 642     // The heap has been expanded.
 643 
 644     _heap_end = new_end;
 645   }
 646   // Notice that the heap can also shrink. However, this only happens
 647   // during a Full GC (at least currently) and the entire marking
 648   // phase will bail out and the task will not be restarted. So, let's
 649   // do nothing.
 650 }
 651 
 652 void ConcurrentMark::reset() {
 653   // Starting values for these two. This should be called in a STW
 654   // phase. CM will be notified of any future g1_committed expansions
 655   // will be at the end of evacuation pauses, when tasks are
 656   // inactive.
 657   MemRegion committed = _g1h->g1_committed();
 658   _heap_start = committed.start();
 659   _heap_end   = committed.end();
 660 
 661   // Separated the asserts so that we know which one fires.
 662   assert(_heap_start != NULL, "heap bounds should look ok");
 663   assert(_heap_end != NULL, "heap bounds should look ok");
 664   assert(_heap_start < _heap_end, "heap bounds should look ok");
 665 
 666   // reset all the marking data structures and any necessary flags
 667   clear_marking_state();
 668 
 669   if (verbose_low()) {
 670     gclog_or_tty->print_cr("[global] resetting");
 671   }
 672 
 673   // We do reset all of them, since different phases will use
 674   // different number of active threads. So, it's easiest to have all
 675   // of them ready.
 676   for (int i = 0; i < (int) _max_task_num; ++i) {
 677     _tasks[i]->reset(_nextMarkBitMap);
 678   }
 679 
 680   // we need this to make sure that the flag is on during the evac
 681   // pause with initial mark piggy-backed
 682   set_concurrent_marking_in_progress();
 683 }
 684 
 685 void ConcurrentMark::set_phase(size_t active_tasks, bool concurrent) {
 686   assert(active_tasks <= _max_task_num, "we should not have more");
 687 
 688   _active_tasks = active_tasks;
 689   // Need to update the three data structures below according to the
 690   // number of active threads for this phase.
 691   _terminator   = ParallelTaskTerminator((int) active_tasks, _task_queues);
 692   _first_overflow_barrier_sync.set_n_workers((int) active_tasks);
 693   _second_overflow_barrier_sync.set_n_workers((int) active_tasks);
 694 
 695   _concurrent = concurrent;
 696   // We propagate this to all tasks, not just the active ones.
 697   for (int i = 0; i < (int) _max_task_num; ++i)
 698     _tasks[i]->set_concurrent(concurrent);
 699 
 700   if (concurrent) {
 701     set_concurrent_marking_in_progress();
 702   } else {
 703     // We currently assume that the concurrent flag has been set to
 704     // false before we start remark. At this point we should also be
 705     // in a STW phase.
 706     assert(!concurrent_marking_in_progress(), "invariant");
 707     assert(_finger == _heap_end, "only way to get here");
 708     update_g1_committed(true);
 709   }
 710 }
 711 
 712 void ConcurrentMark::set_non_marking_state() {
 713   // We set the global marking state to some default values when we're
 714   // not doing marking.
 715   clear_marking_state();
 716   _active_tasks = 0;
 717   clear_concurrent_marking_in_progress();
 718 }
 719 
 720 ConcurrentMark::~ConcurrentMark() {
 721   for (int i = 0; i < (int) _max_task_num; ++i) {
 722     delete _task_queues->queue(i);
 723     delete _tasks[i];
 724   }
 725   delete _task_queues;
 726   FREE_C_HEAP_ARRAY(CMTask*, _max_task_num);
 727 }
 728 
 729 // This closure is used to mark refs into the g1 generation
 730 // from external roots in the CMS bit map.
 731 // Called at the first checkpoint.
 732 //
 733 
 734 void ConcurrentMark::clearNextBitmap() {
 735   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 736   G1CollectorPolicy* g1p = g1h->g1_policy();
 737 
 738   // Make sure that the concurrent mark thread looks to still be in
 739   // the current cycle.
 740   guarantee(cmThread()->during_cycle(), "invariant");
 741 
 742   // We are finishing up the current cycle by clearing the next
 743   // marking bitmap and getting it ready for the next cycle. During
 744   // this time no other cycle can start. So, let's make sure that this
 745   // is the case.
 746   guarantee(!g1h->mark_in_progress(), "invariant");
 747 
 748   // clear the mark bitmap (no grey objects to start with).
 749   // We need to do this in chunks and offer to yield in between
 750   // each chunk.
 751   HeapWord* start  = _nextMarkBitMap->startWord();
 752   HeapWord* end    = _nextMarkBitMap->endWord();
 753   HeapWord* cur    = start;
 754   size_t chunkSize = M;
 755   while (cur < end) {
 756     HeapWord* next = cur + chunkSize;
 757     if (next > end) {
 758       next = end;
 759     }
 760     MemRegion mr(cur,next);
 761     _nextMarkBitMap->clearRange(mr);
 762     cur = next;
 763     do_yield_check();
 764 
 765     // Repeat the asserts from above. We'll do them as asserts here to
 766     // minimize their overhead on the product. However, we'll have
 767     // them as guarantees at the beginning / end of the bitmap
 768     // clearing to get some checking in the product.
 769     assert(cmThread()->during_cycle(), "invariant");
 770     assert(!g1h->mark_in_progress(), "invariant");
 771   }
 772 
 773   // Repeat the asserts from above.
 774   guarantee(cmThread()->during_cycle(), "invariant");
 775   guarantee(!g1h->mark_in_progress(), "invariant");
 776 }
 777 
 778 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 779 public:
 780   bool doHeapRegion(HeapRegion* r) {
 781     if (!r->continuesHumongous()) {
 782       r->note_start_of_marking(true);
 783     }
 784     return false;
 785   }
 786 };
 787 
 788 void ConcurrentMark::checkpointRootsInitialPre() {
 789   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 790   G1CollectorPolicy* g1p = g1h->g1_policy();
 791 
 792   _has_aborted = false;
 793 
 794 #ifndef PRODUCT
 795   if (G1PrintReachableAtInitialMark) {
 796     print_reachable("at-cycle-start",
 797                     VerifyOption_G1UsePrevMarking, true /* all */);
 798   }
 799 #endif
 800 
 801   // Initialise marking structures. This has to be done in a STW phase.
 802   reset();
 803 }
 804 
 805 
 806 void ConcurrentMark::checkpointRootsInitialPost() {
 807   G1CollectedHeap*   g1h = G1CollectedHeap::heap();
 808 
 809   // If we force an overflow during remark, the remark operation will
 810   // actually abort and we'll restart concurrent marking. If we always
 811   // force an oveflow during remark we'll never actually complete the
 812   // marking phase. So, we initilize this here, at the start of the
 813   // cycle, so that at the remaining overflow number will decrease at
 814   // every remark and we'll eventually not need to cause one.
 815   force_overflow_stw()->init();
 816 
 817   // For each region note start of marking.
 818   NoteStartOfMarkHRClosure startcl;
 819   g1h->heap_region_iterate(&startcl);
 820 
 821   // Start Concurrent Marking weak-reference discovery.
 822   ReferenceProcessor* rp = g1h->ref_processor_cm();
 823   // enable ("weak") refs discovery
 824   rp->enable_discovery(true /*verify_disabled*/, true /*verify_no_refs*/);
 825   rp->setup_policy(false); // snapshot the soft ref policy to be used in this cycle
 826 
 827   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
 828   // This is the start of  the marking cycle, we're expected all
 829   // threads to have SATB queues with active set to false.
 830   satb_mq_set.set_active_all_threads(true, /* new active value */
 831                                      false /* expected_active */);
 832 
 833   // update_g1_committed() will be called at the end of an evac pause
 834   // when marking is on. So, it's also called at the end of the
 835   // initial-mark pause to update the heap end, if the heap expands
 836   // during it. No need to call it here.
 837 }
 838 
 839 /*
 840  * Notice that in the next two methods, we actually leave the STS
 841  * during the barrier sync and join it immediately afterwards. If we
 842  * do not do this, the following deadlock can occur: one thread could
 843  * be in the barrier sync code, waiting for the other thread to also
 844  * sync up, whereas another one could be trying to yield, while also
 845  * waiting for the other threads to sync up too.
 846  *
 847  * Note, however, that this code is also used during remark and in
 848  * this case we should not attempt to leave / enter the STS, otherwise
 849  * we'll either hit an asseert (debug / fastdebug) or deadlock
 850  * (product). So we should only leave / enter the STS if we are
 851  * operating concurrently.
 852  *
 853  * Because the thread that does the sync barrier has left the STS, it
 854  * is possible to be suspended for a Full GC or an evacuation pause
 855  * could occur. This is actually safe, since the entering the sync
 856  * barrier is one of the last things do_marking_step() does, and it
 857  * doesn't manipulate any data structures afterwards.
 858  */
 859 
 860 void ConcurrentMark::enter_first_sync_barrier(int task_num) {
 861   if (verbose_low()) {
 862     gclog_or_tty->print_cr("[%d] entering first barrier", task_num);
 863   }
 864 
 865   if (concurrent()) {
 866     ConcurrentGCThread::stsLeave();
 867   }
 868   _first_overflow_barrier_sync.enter();
 869   if (concurrent()) {
 870     ConcurrentGCThread::stsJoin();
 871   }
 872   // at this point everyone should have synced up and not be doing any
 873   // more work
 874 
 875   if (verbose_low()) {
 876     gclog_or_tty->print_cr("[%d] leaving first barrier", task_num);
 877   }
 878 
 879   // let task 0 do this
 880   if (task_num == 0) {
 881     // task 0 is responsible for clearing the global data structures
 882     // We should be here because of an overflow. During STW we should
 883     // not clear the overflow flag since we rely on it being true when
 884     // we exit this method to abort the pause and restart concurent
 885     // marking.
 886     clear_marking_state(concurrent() /* clear_overflow */);
 887     force_overflow()->update();
 888 
 889     if (PrintGC) {
 890       gclog_or_tty->date_stamp(PrintGCDateStamps);
 891       gclog_or_tty->stamp(PrintGCTimeStamps);
 892       gclog_or_tty->print_cr("[GC concurrent-mark-reset-for-overflow]");
 893     }
 894   }
 895 
 896   // after this, each task should reset its own data structures then
 897   // then go into the second barrier
 898 }
 899 
 900 void ConcurrentMark::enter_second_sync_barrier(int task_num) {
 901   if (verbose_low()) {
 902     gclog_or_tty->print_cr("[%d] entering second barrier", task_num);
 903   }
 904 
 905   if (concurrent()) {
 906     ConcurrentGCThread::stsLeave();
 907   }
 908   _second_overflow_barrier_sync.enter();
 909   if (concurrent()) {
 910     ConcurrentGCThread::stsJoin();
 911   }
 912   // at this point everything should be re-initialised and ready to go
 913 
 914   if (verbose_low()) {
 915     gclog_or_tty->print_cr("[%d] leaving second barrier", task_num);
 916   }
 917 }
 918 
 919 #ifndef PRODUCT
 920 void ForceOverflowSettings::init() {
 921   _num_remaining = G1ConcMarkForceOverflow;
 922   _force = false;
 923   update();
 924 }
 925 
 926 void ForceOverflowSettings::update() {
 927   if (_num_remaining > 0) {
 928     _num_remaining -= 1;
 929     _force = true;
 930   } else {
 931     _force = false;
 932   }
 933 }
 934 
 935 bool ForceOverflowSettings::should_force() {
 936   if (_force) {
 937     _force = false;
 938     return true;
 939   } else {
 940     return false;
 941   }
 942 }
 943 #endif // !PRODUCT
 944 
 945 void ConcurrentMark::grayRoot(oop p) {
 946   HeapWord* addr = (HeapWord*) p;
 947   // We can't really check against _heap_start and _heap_end, since it
 948   // is possible during an evacuation pause with piggy-backed
 949   // initial-mark that the committed space is expanded during the
 950   // pause without CM observing this change. So the assertions below
 951   // is a bit conservative; but better than nothing.
 952   assert(_g1h->g1_committed().contains(addr),
 953          "address should be within the heap bounds");
 954 
 955   if (!_nextMarkBitMap->isMarked(addr)) {
 956     _nextMarkBitMap->parMark(addr);
 957   }
 958 }
 959 
 960 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
 961   // The objects on the region have already been marked "in bulk" by
 962   // the caller. We only need to decide whether to push the region on
 963   // the region stack or not.
 964 
 965   if (!concurrent_marking_in_progress() || !_should_gray_objects) {
 966     // We're done with marking and waiting for remark. We do not need to
 967     // push anything else on the region stack.
 968     return;
 969   }
 970 
 971   HeapWord* finger = _finger;
 972 
 973   if (verbose_low()) {
 974     gclog_or_tty->print_cr("[global] attempting to push "
 975                            "region ["PTR_FORMAT", "PTR_FORMAT"), finger is at "
 976                            PTR_FORMAT, mr.start(), mr.end(), finger);
 977   }
 978 
 979   if (mr.start() < finger) {
 980     // The finger is always heap region aligned and it is not possible
 981     // for mr to span heap regions.
 982     assert(mr.end() <= finger, "invariant");
 983 
 984     // Separated the asserts so that we know which one fires.
 985     assert(mr.start() <= mr.end(),
 986            "region boundaries should fall within the committed space");
 987     assert(_heap_start <= mr.start(),
 988            "region boundaries should fall within the committed space");
 989     assert(mr.end() <= _heap_end,
 990            "region boundaries should fall within the committed space");
 991     if (verbose_low()) {
 992       gclog_or_tty->print_cr("[global] region ["PTR_FORMAT", "PTR_FORMAT") "
 993                              "below the finger, pushing it",
 994                              mr.start(), mr.end());
 995     }
 996 
 997     if (!region_stack_push_lock_free(mr)) {
 998       if (verbose_low()) {
 999         gclog_or_tty->print_cr("[global] region stack has overflown.");
1000       }
1001     }
1002   }
1003 }
1004 
1005 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
1006   // The object is not marked by the caller. We need to at least mark
1007   // it and maybe push in on the stack.
1008 
1009   HeapWord* addr = (HeapWord*)p;
1010   if (!_nextMarkBitMap->isMarked(addr)) {
1011     // We definitely need to mark it, irrespective whether we bail out
1012     // because we're done with marking.
1013     if (_nextMarkBitMap->parMark(addr)) {
1014       if (!concurrent_marking_in_progress() || !_should_gray_objects) {
1015         // If we're done with concurrent marking and we're waiting for
1016         // remark, then we're not pushing anything on the stack.
1017         return;
1018       }
1019 
1020       // No OrderAccess:store_load() is needed. It is implicit in the
1021       // CAS done in parMark(addr) above
1022       HeapWord* finger = _finger;
1023 
1024       if (addr < finger) {
1025         if (!mark_stack_push(oop(addr))) {
1026           if (verbose_low()) {
1027             gclog_or_tty->print_cr("[global] global stack overflow "
1028                                    "during parMark");
1029           }
1030         }
1031       }
1032     }
1033   }
1034 }
1035 
1036 class CMConcurrentMarkingTask: public AbstractGangTask {
1037 private:
1038   ConcurrentMark*       _cm;
1039   ConcurrentMarkThread* _cmt;
1040 
1041 public:
1042   void work(int worker_i) {
1043     assert(Thread::current()->is_ConcurrentGC_thread(),
1044            "this should only be done by a conc GC thread");
1045     ResourceMark rm;
1046 
1047     double start_vtime = os::elapsedVTime();
1048 
1049     ConcurrentGCThread::stsJoin();
1050 
1051     assert((size_t) worker_i < _cm->active_tasks(), "invariant");
1052     CMTask* the_task = _cm->task(worker_i);
1053     the_task->record_start_time();
1054     if (!_cm->has_aborted()) {
1055       do {
1056         double start_vtime_sec = os::elapsedVTime();
1057         double start_time_sec = os::elapsedTime();
1058         double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
1059 
1060         the_task->do_marking_step(mark_step_duration_ms,
1061                                   true /* do_stealing    */,
1062                                   true /* do_termination */);
1063 
1064         double end_time_sec = os::elapsedTime();
1065         double end_vtime_sec = os::elapsedVTime();
1066         double elapsed_vtime_sec = end_vtime_sec - start_vtime_sec;
1067         double elapsed_time_sec = end_time_sec - start_time_sec;
1068         _cm->clear_has_overflown();
1069 
1070         bool ret = _cm->do_yield_check(worker_i);
1071 
1072         jlong sleep_time_ms;
1073         if (!_cm->has_aborted() && the_task->has_aborted()) {
1074           sleep_time_ms =
1075             (jlong) (elapsed_vtime_sec * _cm->sleep_factor() * 1000.0);
1076           ConcurrentGCThread::stsLeave();
1077           os::sleep(Thread::current(), sleep_time_ms, false);
1078           ConcurrentGCThread::stsJoin();
1079         }
1080         double end_time2_sec = os::elapsedTime();
1081         double elapsed_time2_sec = end_time2_sec - start_time_sec;
1082 
1083 #if 0
1084           gclog_or_tty->print_cr("CM: elapsed %1.4lf ms, sleep %1.4lf ms, "
1085                                  "overhead %1.4lf",
1086                                  elapsed_vtime_sec * 1000.0, (double) sleep_time_ms,
1087                                  the_task->conc_overhead(os::elapsedTime()) * 8.0);
1088           gclog_or_tty->print_cr("elapsed time %1.4lf ms, time 2: %1.4lf ms",
1089                                  elapsed_time_sec * 1000.0, elapsed_time2_sec * 1000.0);
1090 #endif
1091       } while (!_cm->has_aborted() && the_task->has_aborted());
1092     }
1093     the_task->record_end_time();
1094     guarantee(!the_task->has_aborted() || _cm->has_aborted(), "invariant");
1095 
1096     ConcurrentGCThread::stsLeave();
1097 
1098     double end_vtime = os::elapsedVTime();
1099     _cm->update_accum_task_vtime(worker_i, end_vtime - start_vtime);
1100   }
1101 
1102   CMConcurrentMarkingTask(ConcurrentMark* cm,
1103                           ConcurrentMarkThread* cmt) :
1104       AbstractGangTask("Concurrent Mark"), _cm(cm), _cmt(cmt) { }
1105 
1106   ~CMConcurrentMarkingTask() { }
1107 };
1108 
1109 void ConcurrentMark::markFromRoots() {
1110   // we might be tempted to assert that:
1111   // assert(asynch == !SafepointSynchronize::is_at_safepoint(),
1112   //        "inconsistent argument?");
1113   // However that wouldn't be right, because it's possible that
1114   // a safepoint is indeed in progress as a younger generation
1115   // stop-the-world GC happens even as we mark in this generation.
1116 
1117   _restart_for_overflow = false;
1118 
1119   size_t active_workers = MAX2((size_t) 1, parallel_marking_threads());
1120   force_overflow_conc()->init();
1121   set_phase(active_workers, true /* concurrent */);
1122 
1123   CMConcurrentMarkingTask markingTask(this, cmThread());
1124   if (parallel_marking_threads() > 0) {
1125     _parallel_workers->run_task(&markingTask);
1126   } else {
1127     markingTask.work(0);
1128   }
1129   print_stats();
1130 }
1131 
1132 void ConcurrentMark::checkpointRootsFinal(bool clear_all_soft_refs) {
1133   // world is stopped at this checkpoint
1134   assert(SafepointSynchronize::is_at_safepoint(),
1135          "world should be stopped");
1136 
1137   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1138 
1139   // If a full collection has happened, we shouldn't do this.
1140   if (has_aborted()) {
1141     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1142     return;
1143   }
1144 
1145   SvcGCMarker sgcm(SvcGCMarker::OTHER);
1146 
1147   if (VerifyDuringGC) {
1148     HandleMark hm;  // handle scope
1149     gclog_or_tty->print(" VerifyDuringGC:(before)");
1150     Universe::heap()->prepare_for_verify();
1151     Universe::verify(/* allow dirty */ true,
1152                      /* silent      */ false,
1153                      /* option      */ VerifyOption_G1UsePrevMarking);
1154   }
1155 
1156   G1CollectorPolicy* g1p = g1h->g1_policy();
1157   g1p->record_concurrent_mark_remark_start();
1158 
1159   double start = os::elapsedTime();
1160 
1161   checkpointRootsFinalWork();
1162 
1163   double mark_work_end = os::elapsedTime();
1164 
1165   weakRefsWork(clear_all_soft_refs);
1166 
1167   if (has_overflown()) {
1168     // Oops.  We overflowed.  Restart concurrent marking.
1169     _restart_for_overflow = true;
1170     // Clear the flag. We do not need it any more.
1171     clear_has_overflown();
1172     if (G1TraceMarkStackOverflow) {
1173       gclog_or_tty->print_cr("\nRemark led to restart for overflow.");
1174     }
1175   } else {
1176     SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
1177     // We're done with marking.
1178     // This is the end of  the marking cycle, we're expected all
1179     // threads to have SATB queues with active set to true.
1180     satb_mq_set.set_active_all_threads(false, /* new active value */
1181                                        true /* expected_active */);
1182 
1183     if (VerifyDuringGC) {
1184       HandleMark hm;  // handle scope
1185       gclog_or_tty->print(" VerifyDuringGC:(after)");
1186       Universe::heap()->prepare_for_verify();
1187       Universe::verify(/* allow dirty */ true,
1188                        /* silent      */ false,
1189                        /* option      */ VerifyOption_G1UseNextMarking);
1190     }
1191     assert(!restart_for_overflow(), "sanity");
1192   }
1193 
1194   // Reset the marking state if marking completed
1195   if (!restart_for_overflow()) {
1196     set_non_marking_state();
1197   }
1198 
1199 #if VERIFY_OBJS_PROCESSED
1200   _scan_obj_cl.objs_processed = 0;
1201   ThreadLocalObjQueue::objs_enqueued = 0;
1202 #endif
1203 
1204   // Statistics
1205   double now = os::elapsedTime();
1206   _remark_mark_times.add((mark_work_end - start) * 1000.0);
1207   _remark_weak_ref_times.add((now - mark_work_end) * 1000.0);
1208   _remark_times.add((now - start) * 1000.0);
1209 
1210   g1p->record_concurrent_mark_remark_end();
1211 }
1212 
1213 #define CARD_BM_TEST_MODE 0
1214 
1215 class CalcLiveObjectsClosure: public HeapRegionClosure {
1216 
1217   CMBitMapRO* _bm;
1218   ConcurrentMark* _cm;
1219   bool _changed;
1220   bool _yield;
1221   size_t _words_done;
1222   size_t _tot_live;
1223   size_t _tot_used;
1224   size_t _regions_done;
1225   double _start_vtime_sec;
1226 
1227   BitMap* _region_bm;
1228   BitMap* _card_bm;
1229   intptr_t _bottom_card_num;
1230   bool _final;
1231 
1232   void mark_card_num_range(intptr_t start_card_num, intptr_t last_card_num) {
1233     for (intptr_t i = start_card_num; i <= last_card_num; i++) {
1234 #if CARD_BM_TEST_MODE
1235       guarantee(_card_bm->at(i - _bottom_card_num), "Should already be set.");
1236 #else
1237       _card_bm->par_at_put(i - _bottom_card_num, 1);
1238 #endif
1239     }
1240   }
1241 
1242 public:
1243   CalcLiveObjectsClosure(bool final,
1244                          CMBitMapRO *bm, ConcurrentMark *cm,
1245                          BitMap* region_bm, BitMap* card_bm) :
1246     _bm(bm), _cm(cm), _changed(false), _yield(true),
1247     _words_done(0), _tot_live(0), _tot_used(0),
1248     _region_bm(region_bm), _card_bm(card_bm),_final(final),
1249     _regions_done(0), _start_vtime_sec(0.0)
1250   {
1251     _bottom_card_num =
1252       intptr_t(uintptr_t(G1CollectedHeap::heap()->reserved_region().start()) >>
1253                CardTableModRefBS::card_shift);
1254   }
1255 
1256   // It takes a region that's not empty (i.e., it has at least one
1257   // live object in it and sets its corresponding bit on the region
1258   // bitmap to 1. If the region is "starts humongous" it will also set
1259   // to 1 the bits on the region bitmap that correspond to its
1260   // associated "continues humongous" regions.
1261   void set_bit_for_region(HeapRegion* hr) {
1262     assert(!hr->continuesHumongous(), "should have filtered those out");
1263 
1264     size_t index = hr->hrs_index();
1265     if (!hr->startsHumongous()) {
1266       // Normal (non-humongous) case: just set the bit.
1267       _region_bm->par_at_put((BitMap::idx_t) index, true);
1268     } else {
1269       // Starts humongous case: calculate how many regions are part of
1270       // this humongous region and then set the bit range. It might
1271       // have been a bit more efficient to look at the object that
1272       // spans these humongous regions to calculate their number from
1273       // the object's size. However, it's a good idea to calculate
1274       // this based on the metadata itself, and not the region
1275       // contents, so that this code is not aware of what goes into
1276       // the humongous regions (in case this changes in the future).
1277       G1CollectedHeap* g1h = G1CollectedHeap::heap();
1278       size_t end_index = index + 1;
1279       while (end_index < g1h->n_regions()) {
1280         HeapRegion* chr = g1h->region_at(end_index);
1281         if (!chr->continuesHumongous()) break;
1282         end_index += 1;
1283       }
1284       _region_bm->par_at_put_range((BitMap::idx_t) index,
1285                                    (BitMap::idx_t) end_index, true);
1286     }
1287   }
1288 
1289   bool doHeapRegion(HeapRegion* hr) {
1290     if (!_final && _regions_done == 0) {
1291       _start_vtime_sec = os::elapsedVTime();
1292     }
1293 
1294     if (hr->continuesHumongous()) {
1295       // We will ignore these here and process them when their
1296       // associated "starts humongous" region is processed (see
1297       // set_bit_for_heap_region()). Note that we cannot rely on their
1298       // associated "starts humongous" region to have their bit set to
1299       // 1 since, due to the region chunking in the parallel region
1300       // iteration, a "continues humongous" region might be visited
1301       // before its associated "starts humongous".
1302       return false;
1303     }
1304 
1305     HeapWord* nextTop = hr->next_top_at_mark_start();
1306     HeapWord* start   = hr->top_at_conc_mark_count();
1307     assert(hr->bottom() <= start && start <= hr->end() &&
1308            hr->bottom() <= nextTop && nextTop <= hr->end() &&
1309            start <= nextTop,
1310            "Preconditions.");
1311     // Otherwise, record the number of word's we'll examine.
1312     size_t words_done = (nextTop - start);
1313     // Find the first marked object at or after "start".
1314     start = _bm->getNextMarkedWordAddress(start, nextTop);
1315     size_t marked_bytes = 0;
1316 
1317     // Below, the term "card num" means the result of shifting an address
1318     // by the card shift -- address 0 corresponds to card number 0.  One
1319     // must subtract the card num of the bottom of the heap to obtain a
1320     // card table index.
1321     // The first card num of the sequence of live cards currently being
1322     // constructed.  -1 ==> no sequence.
1323     intptr_t start_card_num = -1;
1324     // The last card num of the sequence of live cards currently being
1325     // constructed.  -1 ==> no sequence.
1326     intptr_t last_card_num = -1;
1327 
1328     while (start < nextTop) {
1329       if (_yield && _cm->do_yield_check()) {
1330         // We yielded.  It might be for a full collection, in which case
1331         // all bets are off; terminate the traversal.
1332         if (_cm->has_aborted()) {
1333           _changed = false;
1334           return true;
1335         } else {
1336           // Otherwise, it might be a collection pause, and the region
1337           // we're looking at might be in the collection set.  We'll
1338           // abandon this region.
1339           return false;
1340         }
1341       }
1342       oop obj = oop(start);
1343       int obj_sz = obj->size();
1344       // The card num of the start of the current object.
1345       intptr_t obj_card_num =
1346         intptr_t(uintptr_t(start) >> CardTableModRefBS::card_shift);
1347 
1348       HeapWord* obj_last = start + obj_sz - 1;
1349       intptr_t obj_last_card_num =
1350         intptr_t(uintptr_t(obj_last) >> CardTableModRefBS::card_shift);
1351 
1352       if (obj_card_num != last_card_num) {
1353         if (start_card_num == -1) {
1354           assert(last_card_num == -1, "Both or neither.");
1355           start_card_num = obj_card_num;
1356         } else {
1357           assert(last_card_num != -1, "Both or neither.");
1358           assert(obj_card_num >= last_card_num, "Inv");
1359           if ((obj_card_num - last_card_num) > 1) {
1360             // Mark the last run, and start a new one.
1361             mark_card_num_range(start_card_num, last_card_num);
1362             start_card_num = obj_card_num;
1363           }
1364         }
1365 #if CARD_BM_TEST_MODE
1366         /*
1367         gclog_or_tty->print_cr("Setting bits from %d/%d.",
1368                                obj_card_num - _bottom_card_num,
1369                                obj_last_card_num - _bottom_card_num);
1370         */
1371         for (intptr_t j = obj_card_num; j <= obj_last_card_num; j++) {
1372           _card_bm->par_at_put(j - _bottom_card_num, 1);
1373         }
1374 #endif
1375       }
1376       // In any case, we set the last card num.
1377       last_card_num = obj_last_card_num;
1378 
1379       marked_bytes += (size_t)obj_sz * HeapWordSize;
1380       // Find the next marked object after this one.
1381       start = _bm->getNextMarkedWordAddress(start + 1, nextTop);
1382       _changed = true;
1383     }
1384     // Handle the last range, if any.
1385     if (start_card_num != -1) {
1386       mark_card_num_range(start_card_num, last_card_num);
1387     }
1388     if (_final) {
1389       // Mark the allocated-since-marking portion...
1390       HeapWord* tp = hr->top();
1391       if (nextTop < tp) {
1392         start_card_num =
1393           intptr_t(uintptr_t(nextTop) >> CardTableModRefBS::card_shift);
1394         last_card_num =
1395           intptr_t(uintptr_t(tp) >> CardTableModRefBS::card_shift);
1396         mark_card_num_range(start_card_num, last_card_num);
1397         // This definitely means the region has live objects.
1398         set_bit_for_region(hr);
1399       }
1400     }
1401 
1402     hr->add_to_marked_bytes(marked_bytes);
1403     // Update the live region bitmap.
1404     if (marked_bytes > 0) {
1405       set_bit_for_region(hr);
1406     }
1407     hr->set_top_at_conc_mark_count(nextTop);
1408     _tot_live += hr->next_live_bytes();
1409     _tot_used += hr->used();
1410     _words_done = words_done;
1411 
1412     if (!_final) {
1413       ++_regions_done;
1414       if (_regions_done % 10 == 0) {
1415         double end_vtime_sec = os::elapsedVTime();
1416         double elapsed_vtime_sec = end_vtime_sec - _start_vtime_sec;
1417         if (elapsed_vtime_sec > (10.0 / 1000.0)) {
1418           jlong sleep_time_ms =
1419             (jlong) (elapsed_vtime_sec * _cm->cleanup_sleep_factor() * 1000.0);
1420           os::sleep(Thread::current(), sleep_time_ms, false);
1421           _start_vtime_sec = end_vtime_sec;
1422         }
1423       }
1424     }
1425 
1426     return false;
1427   }
1428 
1429   bool changed() { return _changed;  }
1430   void reset()   { _changed = false; _words_done = 0; }
1431   void no_yield() { _yield = false; }
1432   size_t words_done() { return _words_done; }
1433   size_t tot_live() { return _tot_live; }
1434   size_t tot_used() { return _tot_used; }
1435 };
1436 
1437 
1438 void ConcurrentMark::calcDesiredRegions() {
1439   _region_bm.clear();
1440   _card_bm.clear();
1441   CalcLiveObjectsClosure calccl(false /*final*/,
1442                                 nextMarkBitMap(), this,
1443                                 &_region_bm, &_card_bm);
1444   G1CollectedHeap *g1h = G1CollectedHeap::heap();
1445   g1h->heap_region_iterate(&calccl);
1446 
1447   do {
1448     calccl.reset();
1449     g1h->heap_region_iterate(&calccl);
1450   } while (calccl.changed());
1451 }
1452 
1453 class G1ParFinalCountTask: public AbstractGangTask {
1454 protected:
1455   G1CollectedHeap* _g1h;
1456   CMBitMap* _bm;
1457   size_t _n_workers;
1458   size_t *_live_bytes;
1459   size_t *_used_bytes;
1460   BitMap* _region_bm;
1461   BitMap* _card_bm;
1462 public:
1463   G1ParFinalCountTask(G1CollectedHeap* g1h, CMBitMap* bm,
1464                       BitMap* region_bm, BitMap* card_bm)
1465     : AbstractGangTask("G1 final counting"), _g1h(g1h),
1466       _bm(bm), _region_bm(region_bm), _card_bm(card_bm) {
1467     if (ParallelGCThreads > 0) {
1468       _n_workers = _g1h->workers()->total_workers();
1469     } else {
1470       _n_workers = 1;
1471     }
1472     _live_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1473     _used_bytes = NEW_C_HEAP_ARRAY(size_t, _n_workers);
1474   }
1475 
1476   ~G1ParFinalCountTask() {
1477     FREE_C_HEAP_ARRAY(size_t, _live_bytes);
1478     FREE_C_HEAP_ARRAY(size_t, _used_bytes);
1479   }
1480 
1481   void work(int i) {
1482     CalcLiveObjectsClosure calccl(true /*final*/,
1483                                   _bm, _g1h->concurrent_mark(),
1484                                   _region_bm, _card_bm);
1485     calccl.no_yield();
1486     if (G1CollectedHeap::use_parallel_gc_threads()) {
1487       _g1h->heap_region_par_iterate_chunked(&calccl, i,
1488                                             HeapRegion::FinalCountClaimValue);
1489     } else {
1490       _g1h->heap_region_iterate(&calccl);
1491     }
1492     assert(calccl.complete(), "Shouldn't have yielded!");
1493 
1494     assert((size_t) i < _n_workers, "invariant");
1495     _live_bytes[i] = calccl.tot_live();
1496     _used_bytes[i] = calccl.tot_used();
1497   }
1498   size_t live_bytes()  {
1499     size_t live_bytes = 0;
1500     for (size_t i = 0; i < _n_workers; ++i)
1501       live_bytes += _live_bytes[i];
1502     return live_bytes;
1503   }
1504   size_t used_bytes()  {
1505     size_t used_bytes = 0;
1506     for (size_t i = 0; i < _n_workers; ++i)
1507       used_bytes += _used_bytes[i];
1508     return used_bytes;
1509   }
1510 };
1511 
1512 class G1ParNoteEndTask;
1513 
1514 class G1NoteEndOfConcMarkClosure : public HeapRegionClosure {
1515   G1CollectedHeap* _g1;
1516   int _worker_num;
1517   size_t _max_live_bytes;
1518   size_t _regions_claimed;
1519   size_t _freed_bytes;
1520   FreeRegionList* _local_cleanup_list;
1521   HumongousRegionSet* _humongous_proxy_set;
1522   HRRSCleanupTask* _hrrs_cleanup_task;
1523   double _claimed_region_time;
1524   double _max_region_time;
1525 
1526 public:
1527   G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1528                              int worker_num,
1529                              FreeRegionList* local_cleanup_list,
1530                              HumongousRegionSet* humongous_proxy_set,
1531                              HRRSCleanupTask* hrrs_cleanup_task);
1532   size_t freed_bytes() { return _freed_bytes; }
1533 
1534   bool doHeapRegion(HeapRegion *r);
1535 
1536   size_t max_live_bytes() { return _max_live_bytes; }
1537   size_t regions_claimed() { return _regions_claimed; }
1538   double claimed_region_time_sec() { return _claimed_region_time; }
1539   double max_region_time_sec() { return _max_region_time; }
1540 };
1541 
1542 class G1ParNoteEndTask: public AbstractGangTask {
1543   friend class G1NoteEndOfConcMarkClosure;
1544 
1545 protected:
1546   G1CollectedHeap* _g1h;
1547   size_t _max_live_bytes;
1548   size_t _freed_bytes;
1549   FreeRegionList* _cleanup_list;
1550 
1551 public:
1552   G1ParNoteEndTask(G1CollectedHeap* g1h,
1553                    FreeRegionList* cleanup_list) :
1554     AbstractGangTask("G1 note end"), _g1h(g1h),
1555     _max_live_bytes(0), _freed_bytes(0), _cleanup_list(cleanup_list) { }
1556 
1557   void work(int i) {
1558     double start = os::elapsedTime();
1559     FreeRegionList local_cleanup_list("Local Cleanup List");
1560     HumongousRegionSet humongous_proxy_set("Local Cleanup Humongous Proxy Set");
1561     HRRSCleanupTask hrrs_cleanup_task;
1562     G1NoteEndOfConcMarkClosure g1_note_end(_g1h, i, &local_cleanup_list,
1563                                            &humongous_proxy_set,
1564                                            &hrrs_cleanup_task);
1565     if (G1CollectedHeap::use_parallel_gc_threads()) {
1566       _g1h->heap_region_par_iterate_chunked(&g1_note_end, i,
1567                                             HeapRegion::NoteEndClaimValue);
1568     } else {
1569       _g1h->heap_region_iterate(&g1_note_end);
1570     }
1571     assert(g1_note_end.complete(), "Shouldn't have yielded!");
1572 
1573     // Now update the lists
1574     _g1h->update_sets_after_freeing_regions(g1_note_end.freed_bytes(),
1575                                             NULL /* free_list */,
1576                                             &humongous_proxy_set,
1577                                             true /* par */);
1578     {
1579       MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
1580       _max_live_bytes += g1_note_end.max_live_bytes();
1581       _freed_bytes += g1_note_end.freed_bytes();
1582 
1583       // If we iterate over the global cleanup list at the end of
1584       // cleanup to do this printing we will not guarantee to only
1585       // generate output for the newly-reclaimed regions (the list
1586       // might not be empty at the beginning of cleanup; we might
1587       // still be working on its previous contents). So we do the
1588       // printing here, before we append the new regions to the global
1589       // cleanup list.
1590 
1591       G1HRPrinter* hr_printer = _g1h->hr_printer();
1592       if (hr_printer->is_active()) {
1593         HeapRegionLinkedListIterator iter(&local_cleanup_list);
1594         while (iter.more_available()) {
1595           HeapRegion* hr = iter.get_next();
1596           hr_printer->cleanup(hr);
1597         }
1598       }
1599 
1600       _cleanup_list->add_as_tail(&local_cleanup_list);
1601       assert(local_cleanup_list.is_empty(), "post-condition");
1602 
1603       HeapRegionRemSet::finish_cleanup_task(&hrrs_cleanup_task);
1604     }
1605     double end = os::elapsedTime();
1606     if (G1PrintParCleanupStats) {
1607       gclog_or_tty->print("     Worker thread %d [%8.3f..%8.3f = %8.3f ms] "
1608                           "claimed %d regions (tot = %8.3f ms, max = %8.3f ms).\n",
1609                           i, start, end, (end-start)*1000.0,
1610                           g1_note_end.regions_claimed(),
1611                           g1_note_end.claimed_region_time_sec()*1000.0,
1612                           g1_note_end.max_region_time_sec()*1000.0);
1613     }
1614   }
1615   size_t max_live_bytes() { return _max_live_bytes; }
1616   size_t freed_bytes() { return _freed_bytes; }
1617 };
1618 
1619 class G1ParScrubRemSetTask: public AbstractGangTask {
1620 protected:
1621   G1RemSet* _g1rs;
1622   BitMap* _region_bm;
1623   BitMap* _card_bm;
1624 public:
1625   G1ParScrubRemSetTask(G1CollectedHeap* g1h,
1626                        BitMap* region_bm, BitMap* card_bm) :
1627     AbstractGangTask("G1 ScrubRS"), _g1rs(g1h->g1_rem_set()),
1628     _region_bm(region_bm), _card_bm(card_bm)
1629   {}
1630 
1631   void work(int i) {
1632     if (G1CollectedHeap::use_parallel_gc_threads()) {
1633       _g1rs->scrub_par(_region_bm, _card_bm, i,
1634                        HeapRegion::ScrubRemSetClaimValue);
1635     } else {
1636       _g1rs->scrub(_region_bm, _card_bm);
1637     }
1638   }
1639 
1640 };
1641 
1642 G1NoteEndOfConcMarkClosure::
1643 G1NoteEndOfConcMarkClosure(G1CollectedHeap* g1,
1644                            int worker_num,
1645                            FreeRegionList* local_cleanup_list,
1646                            HumongousRegionSet* humongous_proxy_set,
1647                            HRRSCleanupTask* hrrs_cleanup_task)
1648   : _g1(g1), _worker_num(worker_num),
1649     _max_live_bytes(0), _regions_claimed(0),
1650     _freed_bytes(0),
1651     _claimed_region_time(0.0), _max_region_time(0.0),
1652     _local_cleanup_list(local_cleanup_list),
1653     _humongous_proxy_set(humongous_proxy_set),
1654     _hrrs_cleanup_task(hrrs_cleanup_task) { }
1655 
1656 bool G1NoteEndOfConcMarkClosure::doHeapRegion(HeapRegion *hr) {
1657   // We use a claim value of zero here because all regions
1658   // were claimed with value 1 in the FinalCount task.
1659   hr->reset_gc_time_stamp();
1660   if (!hr->continuesHumongous()) {
1661     double start = os::elapsedTime();
1662     _regions_claimed++;
1663     hr->note_end_of_marking();
1664     _max_live_bytes += hr->max_live_bytes();
1665     _g1->free_region_if_empty(hr,
1666                               &_freed_bytes,
1667                               _local_cleanup_list,
1668                               _humongous_proxy_set,
1669                               _hrrs_cleanup_task,
1670                               true /* par */);
1671     double region_time = (os::elapsedTime() - start);
1672     _claimed_region_time += region_time;
1673     if (region_time > _max_region_time) {
1674       _max_region_time = region_time;
1675     }
1676   }
1677   return false;
1678 }
1679 
1680 void ConcurrentMark::cleanup() {
1681   // world is stopped at this checkpoint
1682   assert(SafepointSynchronize::is_at_safepoint(),
1683          "world should be stopped");
1684   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1685 
1686   // If a full collection has happened, we shouldn't do this.
1687   if (has_aborted()) {
1688     g1h->set_marking_complete(); // So bitmap clearing isn't confused
1689     return;
1690   }
1691 
1692   g1h->verify_region_sets_optional();
1693 
1694   if (VerifyDuringGC) {
1695     HandleMark hm;  // handle scope
1696     gclog_or_tty->print(" VerifyDuringGC:(before)");
1697     Universe::heap()->prepare_for_verify();
1698     Universe::verify(/* allow dirty */ true,
1699                      /* silent      */ false,
1700                      /* option      */ VerifyOption_G1UsePrevMarking);
1701   }
1702 
1703   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
1704   g1p->record_concurrent_mark_cleanup_start();
1705 
1706   double start = os::elapsedTime();
1707 
1708   HeapRegionRemSet::reset_for_cleanup_tasks();
1709 
1710   // Do counting once more with the world stopped for good measure.
1711   G1ParFinalCountTask g1_par_count_task(g1h, nextMarkBitMap(),
1712                                         &_region_bm, &_card_bm);
1713   if (G1CollectedHeap::use_parallel_gc_threads()) {
1714     assert(g1h->check_heap_region_claim_values(
1715                                                HeapRegion::InitialClaimValue),
1716            "sanity check");
1717 
1718     int n_workers = g1h->workers()->total_workers();
1719     g1h->set_par_threads(n_workers);
1720     g1h->workers()->run_task(&g1_par_count_task);
1721     g1h->set_par_threads(0);
1722 
1723     assert(g1h->check_heap_region_claim_values(
1724                                              HeapRegion::FinalCountClaimValue),
1725            "sanity check");
1726   } else {
1727     g1_par_count_task.work(0);
1728   }
1729 
1730   size_t known_garbage_bytes =
1731     g1_par_count_task.used_bytes() - g1_par_count_task.live_bytes();
1732   g1p->set_known_garbage_bytes(known_garbage_bytes);
1733 
1734   size_t start_used_bytes = g1h->used();
1735   _at_least_one_mark_complete = true;
1736   g1h->set_marking_complete();
1737 
1738   ergo_verbose4(ErgoConcCycles,
1739            "finish cleanup",
1740            ergo_format_byte("occupancy")
1741            ergo_format_byte("capacity")
1742            ergo_format_byte_perc("known garbage"),
1743            start_used_bytes, g1h->capacity(),
1744            known_garbage_bytes,
1745            ((double) known_garbage_bytes / (double) g1h->capacity()) * 100.0);
1746 
1747   double count_end = os::elapsedTime();
1748   double this_final_counting_time = (count_end - start);
1749   if (G1PrintParCleanupStats) {
1750     gclog_or_tty->print_cr("Cleanup:");
1751     gclog_or_tty->print_cr("  Finalize counting: %8.3f ms",
1752                            this_final_counting_time*1000.0);
1753   }
1754   _total_counting_time += this_final_counting_time;
1755 
1756   if (G1PrintRegionLivenessInfo) {
1757     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Marking");
1758     _g1h->heap_region_iterate(&cl);
1759   }
1760 
1761   // Install newly created mark bitMap as "prev".
1762   swapMarkBitMaps();
1763 
1764   g1h->reset_gc_time_stamp();
1765 
1766   // Note end of marking in all heap regions.
1767   double note_end_start = os::elapsedTime();
1768   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list);
1769   if (G1CollectedHeap::use_parallel_gc_threads()) {
1770     int n_workers = g1h->workers()->total_workers();
1771     g1h->set_par_threads(n_workers);
1772     g1h->workers()->run_task(&g1_par_note_end_task);
1773     g1h->set_par_threads(0);
1774 
1775     assert(g1h->check_heap_region_claim_values(HeapRegion::NoteEndClaimValue),
1776            "sanity check");
1777   } else {
1778     g1_par_note_end_task.work(0);
1779   }
1780 
1781   if (!cleanup_list_is_empty()) {
1782     // The cleanup list is not empty, so we'll have to process it
1783     // concurrently. Notify anyone else that might be wanting free
1784     // regions that there will be more free regions coming soon.
1785     g1h->set_free_regions_coming();
1786   }
1787   double note_end_end = os::elapsedTime();
1788   if (G1PrintParCleanupStats) {
1789     gclog_or_tty->print_cr("  note end of marking: %8.3f ms.",
1790                            (note_end_end - note_end_start)*1000.0);
1791   }
1792 
1793 
1794   // call below, since it affects the metric by which we sort the heap
1795   // regions.
1796   if (G1ScrubRemSets) {
1797     double rs_scrub_start = os::elapsedTime();
1798     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm);
1799     if (G1CollectedHeap::use_parallel_gc_threads()) {
1800       int n_workers = g1h->workers()->total_workers();
1801       g1h->set_par_threads(n_workers);
1802       g1h->workers()->run_task(&g1_par_scrub_rs_task);
1803       g1h->set_par_threads(0);
1804 
1805       assert(g1h->check_heap_region_claim_values(
1806                                             HeapRegion::ScrubRemSetClaimValue),
1807              "sanity check");
1808     } else {
1809       g1_par_scrub_rs_task.work(0);
1810     }
1811 
1812     double rs_scrub_end = os::elapsedTime();
1813     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
1814     _total_rs_scrub_time += this_rs_scrub_time;
1815   }
1816 
1817   // this will also free any regions totally full of garbage objects,
1818   // and sort the regions.
1819   g1h->g1_policy()->record_concurrent_mark_cleanup_end(
1820                         g1_par_note_end_task.freed_bytes(),
1821                         g1_par_note_end_task.max_live_bytes());
1822 
1823   // Statistics.
1824   double end = os::elapsedTime();
1825   _cleanup_times.add((end - start) * 1000.0);
1826 
1827   // G1CollectedHeap::heap()->print();
1828   // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
1829   // G1CollectedHeap::heap()->get_gc_time_stamp());
1830 
1831   if (PrintGC || PrintGCDetails) {
1832     g1h->print_size_transition(gclog_or_tty,
1833                                start_used_bytes,
1834                                g1h->used(),
1835                                g1h->capacity());
1836   }
1837 
1838   size_t cleaned_up_bytes = start_used_bytes - g1h->used();
1839   g1p->decrease_known_garbage_bytes(cleaned_up_bytes);
1840 
1841   // Clean up will have freed any regions completely full of garbage.
1842   // Update the soft reference policy with the new heap occupancy.
1843   Universe::update_heap_info_at_gc();
1844 
1845   // We need to make this be a "collection" so any collection pause that
1846   // races with it goes around and waits for completeCleanup to finish.
1847   g1h->increment_total_collections();
1848 
1849   if (VerifyDuringGC) {
1850     HandleMark hm;  // handle scope
1851     gclog_or_tty->print(" VerifyDuringGC:(after)");
1852     Universe::heap()->prepare_for_verify();
1853     Universe::verify(/* allow dirty */ true,
1854                      /* silent      */ false,
1855                      /* option      */ VerifyOption_G1UsePrevMarking);
1856   }
1857 
1858   g1h->verify_region_sets_optional();
1859 }
1860 
1861 void ConcurrentMark::completeCleanup() {
1862   if (has_aborted()) return;
1863 
1864   G1CollectedHeap* g1h = G1CollectedHeap::heap();
1865 
1866   _cleanup_list.verify_optional();
1867   FreeRegionList tmp_free_list("Tmp Free List");
1868 
1869   if (G1ConcRegionFreeingVerbose) {
1870     gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1871                            "cleanup list has "SIZE_FORMAT" entries",
1872                            _cleanup_list.length());
1873   }
1874 
1875   // Noone else should be accessing the _cleanup_list at this point,
1876   // so it's not necessary to take any locks
1877   while (!_cleanup_list.is_empty()) {
1878     HeapRegion* hr = _cleanup_list.remove_head();
1879     assert(hr != NULL, "the list was not empty");
1880     hr->par_clear();
1881     tmp_free_list.add_as_tail(hr);
1882 
1883     // Instead of adding one region at a time to the secondary_free_list,
1884     // we accumulate them in the local list and move them a few at a
1885     // time. This also cuts down on the number of notify_all() calls
1886     // we do during this process. We'll also append the local list when
1887     // _cleanup_list is empty (which means we just removed the last
1888     // region from the _cleanup_list).
1889     if ((tmp_free_list.length() % G1SecondaryFreeListAppendLength == 0) ||
1890         _cleanup_list.is_empty()) {
1891       if (G1ConcRegionFreeingVerbose) {
1892         gclog_or_tty->print_cr("G1ConcRegionFreeing [complete cleanup] : "
1893                                "appending "SIZE_FORMAT" entries to the "
1894                                "secondary_free_list, clean list still has "
1895                                SIZE_FORMAT" entries",
1896                                tmp_free_list.length(),
1897                                _cleanup_list.length());
1898       }
1899 
1900       {
1901         MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag);
1902         g1h->secondary_free_list_add_as_tail(&tmp_free_list);
1903         SecondaryFreeList_lock->notify_all();
1904       }
1905 
1906       if (G1StressConcRegionFreeing) {
1907         for (uintx i = 0; i < G1StressConcRegionFreeingDelayMillis; ++i) {
1908           os::sleep(Thread::current(), (jlong) 1, false);
1909         }
1910       }
1911     }
1912   }
1913   assert(tmp_free_list.is_empty(), "post-condition");
1914 }
1915 
1916 // Support closures for reference procssing in G1
1917 
1918 bool G1CMIsAliveClosure::do_object_b(oop obj) {
1919   HeapWord* addr = (HeapWord*)obj;
1920   return addr != NULL &&
1921          (!_g1->is_in_g1_reserved(addr) || !_g1->is_obj_ill(obj));
1922 }
1923 
1924 class G1CMKeepAliveClosure: public OopClosure {
1925   G1CollectedHeap* _g1;
1926   ConcurrentMark*  _cm;
1927   CMBitMap*        _bitMap;
1928  public:
1929   G1CMKeepAliveClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
1930                        CMBitMap* bitMap) :
1931     _g1(g1), _cm(cm),
1932     _bitMap(bitMap) {}
1933 
1934   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
1935   virtual void do_oop(      oop* p) { do_oop_work(p); }
1936 
1937   template <class T> void do_oop_work(T* p) {
1938     oop obj = oopDesc::load_decode_heap_oop(p);
1939     HeapWord* addr = (HeapWord*)obj;
1940 
1941     if (_cm->verbose_high()) {
1942       gclog_or_tty->print_cr("\t[0] we're looking at location "
1943                              "*"PTR_FORMAT" = "PTR_FORMAT,
1944                              p, (void*) obj);
1945     }
1946 
1947     if (_g1->is_in_g1_reserved(addr) && _g1->is_obj_ill(obj)) {
1948       _bitMap->mark(addr);
1949       _cm->mark_stack_push(obj);
1950     }
1951   }
1952 };
1953 
1954 class G1CMDrainMarkingStackClosure: public VoidClosure {
1955   CMMarkStack*                  _markStack;
1956   CMBitMap*                     _bitMap;
1957   G1CMKeepAliveClosure*         _oopClosure;
1958  public:
1959   G1CMDrainMarkingStackClosure(CMBitMap* bitMap, CMMarkStack* markStack,
1960                                G1CMKeepAliveClosure* oopClosure) :
1961     _bitMap(bitMap),
1962     _markStack(markStack),
1963     _oopClosure(oopClosure)
1964   {}
1965 
1966   void do_void() {
1967     _markStack->drain((OopClosure*)_oopClosure, _bitMap, false);
1968   }
1969 };
1970 
1971 // 'Keep Alive' closure used by parallel reference processing.
1972 // An instance of this closure is used in the parallel reference processing
1973 // code rather than an instance of G1CMKeepAliveClosure. We could have used
1974 // the G1CMKeepAliveClosure as it is MT-safe. Also reference objects are
1975 // placed on to discovered ref lists once so we can mark and push with no
1976 // need to check whether the object has already been marked. Using the
1977 // G1CMKeepAliveClosure would mean, however, having all the worker threads
1978 // operating on the global mark stack. This means that an individual
1979 // worker would be doing lock-free pushes while it processes its own
1980 // discovered ref list followed by drain call. If the discovered ref lists
1981 // are unbalanced then this could cause interference with the other
1982 // workers. Using a CMTask (and its embedded local data structures)
1983 // avoids that potential interference.
1984 class G1CMParKeepAliveAndDrainClosure: public OopClosure {
1985   ConcurrentMark*  _cm;
1986   CMTask*          _task;
1987   CMBitMap*        _bitMap;
1988   int              _ref_counter_limit;
1989   int              _ref_counter;
1990  public:
1991   G1CMParKeepAliveAndDrainClosure(ConcurrentMark* cm,
1992                                   CMTask* task,
1993                                   CMBitMap* bitMap) :
1994     _cm(cm), _task(task), _bitMap(bitMap),
1995     _ref_counter_limit(G1RefProcDrainInterval)
1996   {
1997     assert(_ref_counter_limit > 0, "sanity");
1998     _ref_counter = _ref_counter_limit;
1999   }
2000 
2001   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2002   virtual void do_oop(      oop* p) { do_oop_work(p); }
2003 
2004   template <class T> void do_oop_work(T* p) {
2005     if (!_cm->has_overflown()) {
2006       oop obj = oopDesc::load_decode_heap_oop(p);
2007       if (_cm->verbose_high()) {
2008         gclog_or_tty->print_cr("\t[%d] we're looking at location "
2009                                "*"PTR_FORMAT" = "PTR_FORMAT,
2010                                _task->task_id(), p, (void*) obj);
2011       }
2012 
2013       _task->deal_with_reference(obj);
2014       _ref_counter--;
2015 
2016       if (_ref_counter == 0) {
2017         // We have dealt with _ref_counter_limit references, pushing them and objects
2018         // reachable from them on to the local stack (and possibly the global stack).
2019         // Call do_marking_step() to process these entries. We call the routine in a
2020         // loop, which we'll exit if there's nothing more to do (i.e. we're done
2021         // with the entries that we've pushed as a result of the deal_with_reference
2022         // calls above) or we overflow.
2023         // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2024         // while there may still be some work to do. (See the comment at the
2025         // beginning of CMTask::do_marking_step() for those conditions - one of which
2026         // is reaching the specified time target.) It is only when
2027         // CMTask::do_marking_step() returns without setting the has_aborted() flag
2028         // that the marking has completed.
2029         do {
2030           double mark_step_duration_ms = G1ConcMarkStepDurationMillis;
2031           _task->do_marking_step(mark_step_duration_ms,
2032                                  false /* do_stealing    */,
2033                                  false /* do_termination */);
2034         } while (_task->has_aborted() && !_cm->has_overflown());
2035         _ref_counter = _ref_counter_limit;
2036       }
2037     } else {
2038       if (_cm->verbose_high()) {
2039          gclog_or_tty->print_cr("\t[%d] CM Overflow", _task->task_id());
2040       }
2041     }
2042   }
2043 };
2044 
2045 class G1CMParDrainMarkingStackClosure: public VoidClosure {
2046   ConcurrentMark* _cm;
2047   CMTask* _task;
2048  public:
2049   G1CMParDrainMarkingStackClosure(ConcurrentMark* cm, CMTask* task) :
2050     _cm(cm), _task(task)
2051   {}
2052 
2053   void do_void() {
2054     do {
2055       if (_cm->verbose_high()) {
2056         gclog_or_tty->print_cr("\t[%d] Drain: Calling do marking_step",
2057                                _task->task_id());
2058       }
2059 
2060       // We call CMTask::do_marking_step() to completely drain the local and
2061       // global marking stacks. The routine is called in a loop, which we'll
2062       // exit if there's nothing more to do (i.e. we'completely drained the
2063       // entries that were pushed as a result of applying the
2064       // G1CMParKeepAliveAndDrainClosure to the entries on the discovered ref
2065       // lists above) or we overflow the global marking stack.
2066       // Note: CMTask::do_marking_step() can set the CMTask::has_aborted() flag
2067       // while there may still be some work to do. (See the comment at the
2068       // beginning of CMTask::do_marking_step() for those conditions - one of which
2069       // is reaching the specified time target.) It is only when
2070       // CMTask::do_marking_step() returns without setting the has_aborted() flag
2071       // that the marking has completed.
2072 
2073       _task->do_marking_step(1000000000.0 /* something very large */,
2074                              true /* do_stealing    */,
2075                              true /* do_termination */);
2076     } while (_task->has_aborted() && !_cm->has_overflown());
2077   }
2078 };
2079 
2080 // Implementation of AbstractRefProcTaskExecutor for parallel
2081 // reference processing at the end of G1 concurrent marking
2082 
2083 class G1CMRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
2084 private:
2085   G1CollectedHeap* _g1h;
2086   ConcurrentMark*  _cm;
2087   CMBitMap*        _bitmap;
2088   WorkGang*        _workers;
2089   int              _active_workers;
2090 
2091 public:
2092   G1CMRefProcTaskExecutor(G1CollectedHeap* g1h,
2093                         ConcurrentMark* cm,
2094                         CMBitMap* bitmap,
2095                         WorkGang* workers,
2096                         int n_workers) :
2097     _g1h(g1h), _cm(cm), _bitmap(bitmap),
2098     _workers(workers), _active_workers(n_workers)
2099   { }
2100 
2101   // Executes the given task using concurrent marking worker threads.
2102   virtual void execute(ProcessTask& task);
2103   virtual void execute(EnqueueTask& task);
2104 };
2105 
2106 class G1CMRefProcTaskProxy: public AbstractGangTask {
2107   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
2108   ProcessTask&     _proc_task;
2109   G1CollectedHeap* _g1h;
2110   ConcurrentMark*  _cm;
2111   CMBitMap*        _bitmap;
2112 
2113 public:
2114   G1CMRefProcTaskProxy(ProcessTask& proc_task,
2115                      G1CollectedHeap* g1h,
2116                      ConcurrentMark* cm,
2117                      CMBitMap* bitmap) :
2118     AbstractGangTask("Process reference objects in parallel"),
2119     _proc_task(proc_task), _g1h(g1h), _cm(cm), _bitmap(bitmap)
2120   {}
2121 
2122   virtual void work(int i) {
2123     CMTask* marking_task = _cm->task(i);
2124     G1CMIsAliveClosure g1_is_alive(_g1h);
2125     G1CMParKeepAliveAndDrainClosure g1_par_keep_alive(_cm, marking_task, _bitmap);
2126     G1CMParDrainMarkingStackClosure g1_par_drain(_cm, marking_task);
2127 
2128     _proc_task.work(i, g1_is_alive, g1_par_keep_alive, g1_par_drain);
2129   }
2130 };
2131 
2132 void G1CMRefProcTaskExecutor::execute(ProcessTask& proc_task) {
2133   assert(_workers != NULL, "Need parallel worker threads.");
2134 
2135   G1CMRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _cm, _bitmap);
2136 
2137   // We need to reset the phase for each task execution so that
2138   // the termination protocol of CMTask::do_marking_step works.
2139   _cm->set_phase(_active_workers, false /* concurrent */);
2140   _g1h->set_par_threads(_active_workers);
2141   _workers->run_task(&proc_task_proxy);
2142   _g1h->set_par_threads(0);
2143 }
2144 
2145 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
2146   typedef AbstractRefProcTaskExecutor::EnqueueTask EnqueueTask;
2147   EnqueueTask& _enq_task;
2148 
2149 public:
2150   G1CMRefEnqueueTaskProxy(EnqueueTask& enq_task) :
2151     AbstractGangTask("Enqueue reference objects in parallel"),
2152     _enq_task(enq_task)
2153   { }
2154 
2155   virtual void work(int i) {
2156     _enq_task.work(i);
2157   }
2158 };
2159 
2160 void G1CMRefProcTaskExecutor::execute(EnqueueTask& enq_task) {
2161   assert(_workers != NULL, "Need parallel worker threads.");
2162 
2163   G1CMRefEnqueueTaskProxy enq_task_proxy(enq_task);
2164 
2165   _g1h->set_par_threads(_active_workers);
2166   _workers->run_task(&enq_task_proxy);
2167   _g1h->set_par_threads(0);
2168 }
2169 
2170 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
2171   ResourceMark rm;
2172   HandleMark   hm;
2173 
2174   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2175 
2176   // Is alive closure.
2177   G1CMIsAliveClosure g1_is_alive(g1h);
2178 
2179   // Inner scope to exclude the cleaning of the string and symbol
2180   // tables from the displayed time.
2181   {
2182     bool verbose = PrintGC && PrintGCDetails;
2183     if (verbose) {
2184       gclog_or_tty->put(' ');
2185     }
2186     TraceTime t("GC ref-proc", verbose, false, gclog_or_tty);
2187 
2188     ReferenceProcessor* rp = g1h->ref_processor_cm();
2189 
2190     // See the comment in G1CollectedHeap::ref_processing_init()
2191     // about how reference processing currently works in G1.
2192 
2193     // Process weak references.
2194     rp->setup_policy(clear_all_soft_refs);
2195     assert(_markStack.isEmpty(), "mark stack should be empty");
2196 
2197     G1CMKeepAliveClosure g1_keep_alive(g1h, this, nextMarkBitMap());
2198     G1CMDrainMarkingStackClosure
2199       g1_drain_mark_stack(nextMarkBitMap(), &_markStack, &g1_keep_alive);
2200 
2201     // We use the work gang from the G1CollectedHeap and we utilize all
2202     // the worker threads.
2203     int active_workers = g1h->workers() ? g1h->workers()->total_workers() : 1;
2204     active_workers = MAX2(MIN2(active_workers, (int)_max_task_num), 1);
2205 
2206     G1CMRefProcTaskExecutor par_task_executor(g1h, this, nextMarkBitMap(),
2207                                               g1h->workers(), active_workers);
2208 
2209     if (rp->processing_is_mt()) {
2210       // Set the degree of MT here.  If the discovery is done MT, there
2211       // may have been a different number of threads doing the discovery
2212       // and a different number of discovered lists may have Ref objects.
2213       // That is OK as long as the Reference lists are balanced (see
2214       // balance_all_queues() and balance_queues()).
2215       rp->set_active_mt_degree(active_workers);
2216 
2217       rp->process_discovered_references(&g1_is_alive,
2218                                       &g1_keep_alive,
2219                                       &g1_drain_mark_stack,
2220                                       &par_task_executor);
2221 
2222       // The work routines of the parallel keep_alive and drain_marking_stack
2223       // will set the has_overflown flag if we overflow the global marking
2224       // stack.
2225     } else {
2226       rp->process_discovered_references(&g1_is_alive,
2227                                         &g1_keep_alive,
2228                                         &g1_drain_mark_stack,
2229                                         NULL);
2230     }
2231 
2232     assert(_markStack.overflow() || _markStack.isEmpty(),
2233             "mark stack should be empty (unless it overflowed)");
2234     if (_markStack.overflow()) {
2235       // Should have been done already when we tried to push an
2236       // entry on to the global mark stack. But let's do it again.
2237       set_has_overflown();
2238     }
2239 
2240     if (rp->processing_is_mt()) {
2241       assert(rp->num_q() == active_workers, "why not");
2242       rp->enqueue_discovered_references(&par_task_executor);
2243     } else {
2244       rp->enqueue_discovered_references();
2245     }
2246 
2247     rp->verify_no_references_recorded();
2248     assert(!rp->discovery_enabled(), "Post condition");
2249   }
2250 
2251   // Now clean up stale oops in StringTable
2252   StringTable::unlink(&g1_is_alive);
2253   // Clean up unreferenced symbols in symbol table.
2254   SymbolTable::unlink();
2255 }
2256 
2257 void ConcurrentMark::swapMarkBitMaps() {
2258   CMBitMapRO* temp = _prevMarkBitMap;
2259   _prevMarkBitMap  = (CMBitMapRO*)_nextMarkBitMap;
2260   _nextMarkBitMap  = (CMBitMap*)  temp;
2261 }
2262 
2263 class CMRemarkTask: public AbstractGangTask {
2264 private:
2265   ConcurrentMark *_cm;
2266 
2267 public:
2268   void work(int worker_i) {
2269     // Since all available tasks are actually started, we should
2270     // only proceed if we're supposed to be actived.
2271     if ((size_t)worker_i < _cm->active_tasks()) {
2272       CMTask* task = _cm->task(worker_i);
2273       task->record_start_time();
2274       do {
2275         task->do_marking_step(1000000000.0 /* something very large */,
2276                               true /* do_stealing    */,
2277                               true /* do_termination */);
2278       } while (task->has_aborted() && !_cm->has_overflown());
2279       // If we overflow, then we do not want to restart. We instead
2280       // want to abort remark and do concurrent marking again.
2281       task->record_end_time();
2282     }
2283   }
2284 
2285   CMRemarkTask(ConcurrentMark* cm) :
2286     AbstractGangTask("Par Remark"), _cm(cm) { }
2287 };
2288 
2289 void ConcurrentMark::checkpointRootsFinalWork() {
2290   ResourceMark rm;
2291   HandleMark   hm;
2292   G1CollectedHeap* g1h = G1CollectedHeap::heap();
2293 
2294   g1h->ensure_parsability(false);
2295 
2296   if (G1CollectedHeap::use_parallel_gc_threads()) {
2297     G1CollectedHeap::StrongRootsScope srs(g1h);
2298     // this is remark, so we'll use up all available threads
2299     int active_workers = ParallelGCThreads;
2300     set_phase(active_workers, false /* concurrent */);
2301 
2302     CMRemarkTask remarkTask(this);
2303     // We will start all available threads, even if we decide that the
2304     // active_workers will be fewer. The extra ones will just bail out
2305     // immediately.
2306     int n_workers = g1h->workers()->total_workers();
2307     g1h->set_par_threads(n_workers);
2308     g1h->workers()->run_task(&remarkTask);
2309     g1h->set_par_threads(0);
2310   } else {
2311     G1CollectedHeap::StrongRootsScope srs(g1h);
2312     // this is remark, so we'll use up all available threads
2313     int active_workers = 1;
2314     set_phase(active_workers, false /* concurrent */);
2315 
2316     CMRemarkTask remarkTask(this);
2317     // We will start all available threads, even if we decide that the
2318     // active_workers will be fewer. The extra ones will just bail out
2319     // immediately.
2320     remarkTask.work(0);
2321   }
2322   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2323   guarantee(satb_mq_set.completed_buffers_num() == 0, "invariant");
2324 
2325   print_stats();
2326 
2327 #if VERIFY_OBJS_PROCESSED
2328   if (_scan_obj_cl.objs_processed != ThreadLocalObjQueue::objs_enqueued) {
2329     gclog_or_tty->print_cr("Processed = %d, enqueued = %d.",
2330                            _scan_obj_cl.objs_processed,
2331                            ThreadLocalObjQueue::objs_enqueued);
2332     guarantee(_scan_obj_cl.objs_processed ==
2333               ThreadLocalObjQueue::objs_enqueued,
2334               "Different number of objs processed and enqueued.");
2335   }
2336 #endif
2337 }
2338 
2339 #ifndef PRODUCT
2340 
2341 class PrintReachableOopClosure: public OopClosure {
2342 private:
2343   G1CollectedHeap* _g1h;
2344   outputStream*    _out;
2345   VerifyOption     _vo;
2346   bool             _all;
2347 
2348 public:
2349   PrintReachableOopClosure(outputStream* out,
2350                            VerifyOption  vo,
2351                            bool          all) :
2352     _g1h(G1CollectedHeap::heap()),
2353     _out(out), _vo(vo), _all(all) { }
2354 
2355   void do_oop(narrowOop* p) { do_oop_work(p); }
2356   void do_oop(      oop* p) { do_oop_work(p); }
2357 
2358   template <class T> void do_oop_work(T* p) {
2359     oop         obj = oopDesc::load_decode_heap_oop(p);
2360     const char* str = NULL;
2361     const char* str2 = "";
2362 
2363     if (obj == NULL) {
2364       str = "";
2365     } else if (!_g1h->is_in_g1_reserved(obj)) {
2366       str = " O";
2367     } else {
2368       HeapRegion* hr  = _g1h->heap_region_containing(obj);
2369       guarantee(hr != NULL, "invariant");
2370       bool over_tams = false;
2371       bool marked = false;
2372 
2373       switch (_vo) {
2374         case VerifyOption_G1UsePrevMarking:
2375           over_tams = hr->obj_allocated_since_prev_marking(obj);
2376           marked = _g1h->isMarkedPrev(obj);
2377           break;
2378         case VerifyOption_G1UseNextMarking:
2379           over_tams = hr->obj_allocated_since_next_marking(obj);
2380           marked = _g1h->isMarkedNext(obj);
2381           break;
2382         case VerifyOption_G1UseMarkWord:
2383           marked = obj->is_gc_marked();
2384           break;
2385         default:
2386           ShouldNotReachHere();
2387       }
2388 
2389       if (over_tams) {
2390         str = " >";
2391         if (marked) {
2392           str2 = " AND MARKED";
2393         }
2394       } else if (marked) {
2395         str = " M";
2396       } else {
2397         str = " NOT";
2398       }
2399     }
2400 
2401     _out->print_cr("  "PTR_FORMAT": "PTR_FORMAT"%s%s",
2402                    p, (void*) obj, str, str2);
2403   }
2404 };
2405 
2406 class PrintReachableObjectClosure : public ObjectClosure {
2407 private:
2408   G1CollectedHeap* _g1h;
2409   outputStream*    _out;
2410   VerifyOption     _vo;
2411   bool             _all;
2412   HeapRegion*      _hr;
2413 
2414 public:
2415   PrintReachableObjectClosure(outputStream* out,
2416                               VerifyOption  vo,
2417                               bool          all,
2418                               HeapRegion*   hr) :
2419     _g1h(G1CollectedHeap::heap()),
2420     _out(out), _vo(vo), _all(all), _hr(hr) { }
2421 
2422   void do_object(oop o) {
2423     bool over_tams = false;
2424     bool marked = false;
2425 
2426     switch (_vo) {
2427       case VerifyOption_G1UsePrevMarking:
2428         over_tams = _hr->obj_allocated_since_prev_marking(o);
2429         marked = _g1h->isMarkedPrev(o);
2430         break;
2431       case VerifyOption_G1UseNextMarking:
2432         over_tams = _hr->obj_allocated_since_next_marking(o);
2433         marked = _g1h->isMarkedNext(o);
2434         break;
2435       case VerifyOption_G1UseMarkWord:
2436         marked = o->is_gc_marked();
2437         break;
2438       default:
2439         ShouldNotReachHere();
2440     }
2441     bool print_it = _all || over_tams || marked;
2442 
2443     if (print_it) {
2444       _out->print_cr(" "PTR_FORMAT"%s",
2445                      o, (over_tams) ? " >" : (marked) ? " M" : "");
2446       PrintReachableOopClosure oopCl(_out, _vo, _all);
2447       o->oop_iterate(&oopCl);
2448     }
2449   }
2450 };
2451 
2452 class PrintReachableRegionClosure : public HeapRegionClosure {
2453 private:
2454   outputStream* _out;
2455   VerifyOption  _vo;
2456   bool          _all;
2457 
2458 public:
2459   bool doHeapRegion(HeapRegion* hr) {
2460     HeapWord* b = hr->bottom();
2461     HeapWord* e = hr->end();
2462     HeapWord* t = hr->top();
2463     HeapWord* p = NULL;
2464 
2465     switch (_vo) {
2466       case VerifyOption_G1UsePrevMarking:
2467         p = hr->prev_top_at_mark_start();
2468         break;
2469       case VerifyOption_G1UseNextMarking:
2470         p = hr->next_top_at_mark_start();
2471         break;
2472       case VerifyOption_G1UseMarkWord:
2473         // When we are verifying marking using the mark word
2474         // TAMS has no relevance.
2475         assert(p == NULL, "post-condition");
2476         break;
2477       default:
2478         ShouldNotReachHere();
2479     }
2480     _out->print_cr("** ["PTR_FORMAT", "PTR_FORMAT"] top: "PTR_FORMAT" "
2481                    "TAMS: "PTR_FORMAT, b, e, t, p);
2482     _out->cr();
2483 
2484     HeapWord* from = b;
2485     HeapWord* to   = t;
2486 
2487     if (to > from) {
2488       _out->print_cr("Objects in ["PTR_FORMAT", "PTR_FORMAT"]", from, to);
2489       _out->cr();
2490       PrintReachableObjectClosure ocl(_out, _vo, _all, hr);
2491       hr->object_iterate_mem_careful(MemRegion(from, to), &ocl);
2492       _out->cr();
2493     }
2494 
2495     return false;
2496   }
2497 
2498   PrintReachableRegionClosure(outputStream* out,
2499                               VerifyOption  vo,
2500                               bool          all) :
2501     _out(out), _vo(vo), _all(all) { }
2502 };
2503 
2504 static const char* verify_option_to_tams(VerifyOption vo) {
2505   switch (vo) {
2506     case VerifyOption_G1UsePrevMarking:
2507       return "PTAMS";
2508     case VerifyOption_G1UseNextMarking:
2509       return "NTAMS";
2510     default:
2511       return "NONE";
2512   }
2513 }
2514 
2515 void ConcurrentMark::print_reachable(const char* str,
2516                                      VerifyOption vo,
2517                                      bool all) {
2518   gclog_or_tty->cr();
2519   gclog_or_tty->print_cr("== Doing heap dump... ");
2520 
2521   if (G1PrintReachableBaseFile == NULL) {
2522     gclog_or_tty->print_cr("  #### error: no base file defined");
2523     return;
2524   }
2525 
2526   if (strlen(G1PrintReachableBaseFile) + 1 + strlen(str) >
2527       (JVM_MAXPATHLEN - 1)) {
2528     gclog_or_tty->print_cr("  #### error: file name too long");
2529     return;
2530   }
2531 
2532   char file_name[JVM_MAXPATHLEN];
2533   sprintf(file_name, "%s.%s", G1PrintReachableBaseFile, str);
2534   gclog_or_tty->print_cr("  dumping to file %s", file_name);
2535 
2536   fileStream fout(file_name);
2537   if (!fout.is_open()) {
2538     gclog_or_tty->print_cr("  #### error: could not open file");
2539     return;
2540   }
2541 
2542   outputStream* out = &fout;
2543   out->print_cr("-- USING %s", verify_option_to_tams(vo));
2544   out->cr();
2545 
2546   out->print_cr("--- ITERATING OVER REGIONS");
2547   out->cr();
2548   PrintReachableRegionClosure rcl(out, vo, all);
2549   _g1h->heap_region_iterate(&rcl);
2550   out->cr();
2551 
2552   gclog_or_tty->print_cr("  done");
2553   gclog_or_tty->flush();
2554 }
2555 
2556 #endif // PRODUCT
2557 
2558 // This note is for drainAllSATBBuffers and the code in between.
2559 // In the future we could reuse a task to do this work during an
2560 // evacuation pause (since now tasks are not active and can be claimed
2561 // during an evacuation pause). This was a late change to the code and
2562 // is currently not being taken advantage of.
2563 
2564 class CMGlobalObjectClosure : public ObjectClosure {
2565 private:
2566   ConcurrentMark* _cm;
2567 
2568 public:
2569   void do_object(oop obj) {
2570     _cm->deal_with_reference(obj);
2571   }
2572 
2573   CMGlobalObjectClosure(ConcurrentMark* cm) : _cm(cm) { }
2574 };
2575 
2576 void ConcurrentMark::deal_with_reference(oop obj) {
2577   if (verbose_high()) {
2578     gclog_or_tty->print_cr("[global] we're dealing with reference "PTR_FORMAT,
2579                            (void*) obj);
2580   }
2581 
2582   HeapWord* objAddr = (HeapWord*) obj;
2583   assert(obj->is_oop_or_null(true /* ignore mark word */), "Error");
2584   if (_g1h->is_in_g1_reserved(objAddr)) {
2585     assert(obj != NULL, "null check is implicit");
2586     if (!_nextMarkBitMap->isMarked(objAddr)) {
2587       // Only get the containing region if the object is not marked on the
2588       // bitmap (otherwise, it's a waste of time since we won't do
2589       // anything with it).
2590       HeapRegion* hr = _g1h->heap_region_containing_raw(obj);
2591       if (!hr->obj_allocated_since_next_marking(obj)) {
2592         if (verbose_high()) {
2593           gclog_or_tty->print_cr("[global] "PTR_FORMAT" is not considered "
2594                                  "marked", (void*) obj);
2595         }
2596 
2597         // we need to mark it first
2598         if (_nextMarkBitMap->parMark(objAddr)) {
2599           // No OrderAccess:store_load() is needed. It is implicit in the
2600           // CAS done in parMark(objAddr) above
2601           HeapWord* finger = _finger;
2602           if (objAddr < finger) {
2603             if (verbose_high()) {
2604               gclog_or_tty->print_cr("[global] below the global finger "
2605                                      "("PTR_FORMAT"), pushing it", finger);
2606             }
2607             if (!mark_stack_push(obj)) {
2608               if (verbose_low()) {
2609                 gclog_or_tty->print_cr("[global] global stack overflow during "
2610                                        "deal_with_reference");
2611               }
2612             }
2613           }
2614         }
2615       }
2616     }
2617   }
2618 }
2619 
2620 void ConcurrentMark::drainAllSATBBuffers() {
2621   CMGlobalObjectClosure oc(this);
2622   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
2623   satb_mq_set.set_closure(&oc);
2624 
2625   while (satb_mq_set.apply_closure_to_completed_buffer()) {
2626     if (verbose_medium()) {
2627       gclog_or_tty->print_cr("[global] processed an SATB buffer");
2628     }
2629   }
2630 
2631   // no need to check whether we should do this, as this is only
2632   // called during an evacuation pause
2633   satb_mq_set.iterate_closure_all_threads();
2634 
2635   satb_mq_set.set_closure(NULL);
2636   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
2637 }
2638 
2639 void ConcurrentMark::markPrev(oop p) {
2640   // Note we are overriding the read-only view of the prev map here, via
2641   // the cast.
2642   ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
2643 }
2644 
2645 void ConcurrentMark::clear(oop p) {
2646   assert(p != NULL && p->is_oop(), "expected an oop");
2647   HeapWord* addr = (HeapWord*)p;
2648   assert(addr >= _nextMarkBitMap->startWord() ||
2649          addr < _nextMarkBitMap->endWord(), "in a region");
2650 
2651   _nextMarkBitMap->clear(addr);
2652 }
2653 
2654 void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
2655   // Note we are overriding the read-only view of the prev map here, via
2656   // the cast.
2657   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
2658   _nextMarkBitMap->clearRange(mr);
2659 }
2660 
2661 HeapRegion*
2662 ConcurrentMark::claim_region(int task_num) {
2663   // "checkpoint" the finger
2664   HeapWord* finger = _finger;
2665 
2666   // _heap_end will not change underneath our feet; it only changes at
2667   // yield points.
2668   while (finger < _heap_end) {
2669     assert(_g1h->is_in_g1_reserved(finger), "invariant");
2670 
2671     // Note on how this code handles humongous regions. In the
2672     // normal case the finger will reach the start of a "starts
2673     // humongous" (SH) region. Its end will either be the end of the
2674     // last "continues humongous" (CH) region in the sequence, or the
2675     // standard end of the SH region (if the SH is the only region in
2676     // the sequence). That way claim_region() will skip over the CH
2677     // regions. However, there is a subtle race between a CM thread
2678     // executing this method and a mutator thread doing a humongous
2679     // object allocation. The two are not mutually exclusive as the CM
2680     // thread does not need to hold the Heap_lock when it gets
2681     // here. So there is a chance that claim_region() will come across
2682     // a free region that's in the progress of becoming a SH or a CH
2683     // region. In the former case, it will either
2684     //   a) Miss the update to the region's end, in which case it will
2685     //      visit every subsequent CH region, will find their bitmaps
2686     //      empty, and do nothing, or
2687     //   b) Will observe the update of the region's end (in which case
2688     //      it will skip the subsequent CH regions).
2689     // If it comes across a region that suddenly becomes CH, the
2690     // scenario will be similar to b). So, the race between
2691     // claim_region() and a humongous object allocation might force us
2692     // to do a bit of unnecessary work (due to some unnecessary bitmap
2693     // iterations) but it should not introduce and correctness issues.
2694     HeapRegion* curr_region   = _g1h->heap_region_containing_raw(finger);
2695     HeapWord*   bottom        = curr_region->bottom();
2696     HeapWord*   end           = curr_region->end();
2697     HeapWord*   limit         = curr_region->next_top_at_mark_start();
2698 
2699     if (verbose_low()) {
2700       gclog_or_tty->print_cr("[%d] curr_region = "PTR_FORMAT" "
2701                              "["PTR_FORMAT", "PTR_FORMAT"), "
2702                              "limit = "PTR_FORMAT,
2703                              task_num, curr_region, bottom, end, limit);
2704     }
2705 
2706     // Is the gap between reading the finger and doing the CAS too long?
2707     HeapWord* res = (HeapWord*) Atomic::cmpxchg_ptr(end, &_finger, finger);
2708     if (res == finger) {
2709       // we succeeded
2710 
2711       // notice that _finger == end cannot be guaranteed here since,
2712       // someone else might have moved the finger even further
2713       assert(_finger >= end, "the finger should have moved forward");
2714 
2715       if (verbose_low()) {
2716         gclog_or_tty->print_cr("[%d] we were successful with region = "
2717                                PTR_FORMAT, task_num, curr_region);
2718       }
2719 
2720       if (limit > bottom) {
2721         if (verbose_low()) {
2722           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is not empty, "
2723                                  "returning it ", task_num, curr_region);
2724         }
2725         return curr_region;
2726       } else {
2727         assert(limit == bottom,
2728                "the region limit should be at bottom");
2729         if (verbose_low()) {
2730           gclog_or_tty->print_cr("[%d] region "PTR_FORMAT" is empty, "
2731                                  "returning NULL", task_num, curr_region);
2732         }
2733         // we return NULL and the caller should try calling
2734         // claim_region() again.
2735         return NULL;
2736       }
2737     } else {
2738       assert(_finger > finger, "the finger should have moved forward");
2739       if (verbose_low()) {
2740         gclog_or_tty->print_cr("[%d] somebody else moved the finger, "
2741                                "global finger = "PTR_FORMAT", "
2742                                "our finger = "PTR_FORMAT,
2743                                task_num, _finger, finger);
2744       }
2745 
2746       // read it again
2747       finger = _finger;
2748     }
2749   }
2750 
2751   return NULL;
2752 }
2753 
2754 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
2755   bool result = false;
2756   for (int i = 0; i < (int)_max_task_num; ++i) {
2757     CMTask* the_task = _tasks[i];
2758     MemRegion mr = the_task->aborted_region();
2759     if (mr.start() != NULL) {
2760       assert(mr.end() != NULL, "invariant");
2761       assert(mr.word_size() > 0, "invariant");
2762       HeapRegion* hr = _g1h->heap_region_containing(mr.start());
2763       assert(hr != NULL, "invariant");
2764       if (hr->in_collection_set()) {
2765         // The region points into the collection set
2766         the_task->set_aborted_region(MemRegion());
2767         result = true;
2768       }
2769     }
2770   }
2771   return result;
2772 }
2773 
2774 bool ConcurrentMark::has_aborted_regions() {
2775   for (int i = 0; i < (int)_max_task_num; ++i) {
2776     CMTask* the_task = _tasks[i];
2777     MemRegion mr = the_task->aborted_region();
2778     if (mr.start() != NULL) {
2779       assert(mr.end() != NULL, "invariant");
2780       assert(mr.word_size() > 0, "invariant");
2781       return true;
2782     }
2783   }
2784   return false;
2785 }
2786 
2787 void ConcurrentMark::oops_do(OopClosure* cl) {
2788   if (_markStack.size() > 0 && verbose_low()) {
2789     gclog_or_tty->print_cr("[global] scanning the global marking stack, "
2790                            "size = %d", _markStack.size());
2791   }
2792   // we first iterate over the contents of the mark stack...
2793   _markStack.oops_do(cl);
2794 
2795   for (int i = 0; i < (int)_max_task_num; ++i) {
2796     OopTaskQueue* queue = _task_queues->queue((int)i);
2797 
2798     if (queue->size() > 0 && verbose_low()) {
2799       gclog_or_tty->print_cr("[global] scanning task queue of task %d, "
2800                              "size = %d", i, queue->size());
2801     }
2802 
2803     // ...then over the contents of the all the task queues.
2804     queue->oops_do(cl);
2805   }
2806 
2807   // Invalidate any entries, that are in the region stack, that
2808   // point into the collection set
2809   if (_regionStack.invalidate_entries_into_cset()) {
2810     // otherwise, any gray objects copied during the evacuation pause
2811     // might not be visited.
2812     assert(_should_gray_objects, "invariant");
2813   }
2814 
2815   // Invalidate any aborted regions, recorded in the individual CM
2816   // tasks, that point into the collection set.
2817   if (invalidate_aborted_regions_in_cset()) {
2818     // otherwise, any gray objects copied during the evacuation pause
2819     // might not be visited.
2820     assert(_should_gray_objects, "invariant");
2821   }
2822 
2823 }
2824 
2825 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
2826   _markStack.setEmpty();
2827   _markStack.clear_overflow();
2828   _regionStack.setEmpty();
2829   _regionStack.clear_overflow();
2830   if (clear_overflow) {
2831     clear_has_overflown();
2832   } else {
2833     assert(has_overflown(), "pre-condition");
2834   }
2835   _finger = _heap_start;
2836 
2837   for (int i = 0; i < (int)_max_task_num; ++i) {
2838     OopTaskQueue* queue = _task_queues->queue(i);
2839     queue->set_empty();
2840     // Clear any partial regions from the CMTasks
2841     _tasks[i]->clear_aborted_region();
2842   }
2843 }
2844 
2845 void ConcurrentMark::print_stats() {
2846   if (verbose_stats()) {
2847     gclog_or_tty->print_cr("---------------------------------------------------------------------");
2848     for (size_t i = 0; i < _active_tasks; ++i) {
2849       _tasks[i]->print_stats();
2850       gclog_or_tty->print_cr("---------------------------------------------------------------------");
2851     }
2852   }
2853 }
2854 
2855 class CSMarkOopClosure: public OopClosure {
2856   friend class CSMarkBitMapClosure;
2857 
2858   G1CollectedHeap* _g1h;
2859   CMBitMap*        _bm;
2860   ConcurrentMark*  _cm;
2861   oop*             _ms;
2862   jint*            _array_ind_stack;
2863   int              _ms_size;
2864   int              _ms_ind;
2865   int              _array_increment;
2866 
2867   bool push(oop obj, int arr_ind = 0) {
2868     if (_ms_ind == _ms_size) {
2869       gclog_or_tty->print_cr("Mark stack is full.");
2870       return false;
2871     }
2872     _ms[_ms_ind] = obj;
2873     if (obj->is_objArray()) {
2874       _array_ind_stack[_ms_ind] = arr_ind;
2875     }
2876     _ms_ind++;
2877     return true;
2878   }
2879 
2880   oop pop() {
2881     if (_ms_ind == 0) {
2882       return NULL;
2883     } else {
2884       _ms_ind--;
2885       return _ms[_ms_ind];
2886     }
2887   }
2888 
2889   template <class T> bool drain() {
2890     while (_ms_ind > 0) {
2891       oop obj = pop();
2892       assert(obj != NULL, "Since index was non-zero.");
2893       if (obj->is_objArray()) {
2894         jint arr_ind = _array_ind_stack[_ms_ind];
2895         objArrayOop aobj = objArrayOop(obj);
2896         jint len = aobj->length();
2897         jint next_arr_ind = arr_ind + _array_increment;
2898         if (next_arr_ind < len) {
2899           push(obj, next_arr_ind);
2900         }
2901         // Now process this portion of this one.
2902         int lim = MIN2(next_arr_ind, len);
2903         for (int j = arr_ind; j < lim; j++) {
2904           do_oop(aobj->objArrayOopDesc::obj_at_addr<T>(j));
2905         }
2906 
2907       } else {
2908         obj->oop_iterate(this);
2909       }
2910       if (abort()) return false;
2911     }
2912     return true;
2913   }
2914 
2915 public:
2916   CSMarkOopClosure(ConcurrentMark* cm, int ms_size) :
2917     _g1h(G1CollectedHeap::heap()),
2918     _cm(cm),
2919     _bm(cm->nextMarkBitMap()),
2920     _ms_size(ms_size), _ms_ind(0),
2921     _ms(NEW_C_HEAP_ARRAY(oop, ms_size)),
2922     _array_ind_stack(NEW_C_HEAP_ARRAY(jint, ms_size)),
2923     _array_increment(MAX2(ms_size/8, 16))
2924   {}
2925 
2926   ~CSMarkOopClosure() {
2927     FREE_C_HEAP_ARRAY(oop, _ms);
2928     FREE_C_HEAP_ARRAY(jint, _array_ind_stack);
2929   }
2930 
2931   virtual void do_oop(narrowOop* p) { do_oop_work(p); }
2932   virtual void do_oop(      oop* p) { do_oop_work(p); }
2933 
2934   template <class T> void do_oop_work(T* p) {
2935     T heap_oop = oopDesc::load_heap_oop(p);
2936     if (oopDesc::is_null(heap_oop)) return;
2937     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
2938     if (obj->is_forwarded()) {
2939       // If the object has already been forwarded, we have to make sure
2940       // that it's marked.  So follow the forwarding pointer.  Note that
2941       // this does the right thing for self-forwarding pointers in the
2942       // evacuation failure case.
2943       obj = obj->forwardee();
2944     }
2945     HeapRegion* hr = _g1h->heap_region_containing(obj);
2946     if (hr != NULL) {
2947       if (hr->in_collection_set()) {
2948         if (_g1h->is_obj_ill(obj)) {
2949           _bm->mark((HeapWord*)obj);
2950           if (!push(obj)) {
2951             gclog_or_tty->print_cr("Setting abort in CSMarkOopClosure because push failed.");
2952             set_abort();
2953           }
2954         }
2955       } else {
2956         // Outside the collection set; we need to gray it
2957         _cm->deal_with_reference(obj);
2958       }
2959     }
2960   }
2961 };
2962 
2963 class CSMarkBitMapClosure: public BitMapClosure {
2964   G1CollectedHeap* _g1h;
2965   CMBitMap*        _bitMap;
2966   ConcurrentMark*  _cm;
2967   CSMarkOopClosure _oop_cl;
2968 public:
2969   CSMarkBitMapClosure(ConcurrentMark* cm, int ms_size) :
2970     _g1h(G1CollectedHeap::heap()),
2971     _bitMap(cm->nextMarkBitMap()),
2972     _oop_cl(cm, ms_size)
2973   {}
2974 
2975   ~CSMarkBitMapClosure() {}
2976 
2977   bool do_bit(size_t offset) {
2978     // convert offset into a HeapWord*
2979     HeapWord* addr = _bitMap->offsetToHeapWord(offset);
2980     assert(_bitMap->endWord() && addr < _bitMap->endWord(),
2981            "address out of range");
2982     assert(_bitMap->isMarked(addr), "tautology");
2983     oop obj = oop(addr);
2984     if (!obj->is_forwarded()) {
2985       if (!_oop_cl.push(obj)) return false;
2986       if (UseCompressedOops) {
2987         if (!_oop_cl.drain<narrowOop>()) return false;
2988       } else {
2989         if (!_oop_cl.drain<oop>()) return false;
2990       }
2991     }
2992     // Otherwise...
2993     return true;
2994   }
2995 };
2996 
2997 
2998 class CompleteMarkingInCSHRClosure: public HeapRegionClosure {
2999   CMBitMap* _bm;
3000   CSMarkBitMapClosure _bit_cl;
3001   enum SomePrivateConstants {
3002     MSSize = 1000
3003   };
3004   bool _completed;
3005 public:
3006   CompleteMarkingInCSHRClosure(ConcurrentMark* cm) :
3007     _bm(cm->nextMarkBitMap()),
3008     _bit_cl(cm, MSSize),
3009     _completed(true)
3010   {}
3011 
3012   ~CompleteMarkingInCSHRClosure() {}
3013 
3014   bool doHeapRegion(HeapRegion* r) {
3015     if (!r->evacuation_failed()) {
3016       MemRegion mr = MemRegion(r->bottom(), r->next_top_at_mark_start());
3017       if (!mr.is_empty()) {
3018         if (!_bm->iterate(&_bit_cl, mr)) {
3019           _completed = false;
3020           return true;
3021         }
3022       }
3023     }
3024     return false;
3025   }
3026 
3027   bool completed() { return _completed; }
3028 };
3029 
3030 class ClearMarksInHRClosure: public HeapRegionClosure {
3031   CMBitMap* _bm;
3032 public:
3033   ClearMarksInHRClosure(CMBitMap* bm): _bm(bm) { }
3034 
3035   bool doHeapRegion(HeapRegion* r) {
3036     if (!r->used_region().is_empty() && !r->evacuation_failed()) {
3037       MemRegion usedMR = r->used_region();
3038       _bm->clearRange(r->used_region());
3039     }
3040     return false;
3041   }
3042 };
3043 
3044 void ConcurrentMark::complete_marking_in_collection_set() {
3045   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
3046 
3047   if (!g1h->mark_in_progress()) {
3048     g1h->g1_policy()->record_mark_closure_time(0.0);
3049     return;
3050   }
3051 
3052   int i = 1;
3053   double start = os::elapsedTime();
3054   while (true) {
3055     i++;
3056     CompleteMarkingInCSHRClosure cmplt(this);
3057     g1h->collection_set_iterate(&cmplt);
3058     if (cmplt.completed()) break;
3059   }
3060   double end_time = os::elapsedTime();
3061   double elapsed_time_ms = (end_time - start) * 1000.0;
3062   g1h->g1_policy()->record_mark_closure_time(elapsed_time_ms);
3063 
3064   ClearMarksInHRClosure clr(nextMarkBitMap());
3065   g1h->collection_set_iterate(&clr);
3066 }
3067 
3068 // The next two methods deal with the following optimisation. Some
3069 // objects are gray by being marked and located above the finger. If
3070 // they are copied, during an evacuation pause, below the finger then
3071 // the need to be pushed on the stack. The observation is that, if
3072 // there are no regions in the collection set located above the
3073 // finger, then the above cannot happen, hence we do not need to
3074 // explicitly gray any objects when copying them to below the
3075 // finger. The global stack will be scanned to ensure that, if it
3076 // points to objects being copied, it will update their
3077 // location. There is a tricky situation with the gray objects in
3078 // region stack that are being coped, however. See the comment in
3079 // newCSet().
3080 
3081 void ConcurrentMark::newCSet() {
3082   if (!concurrent_marking_in_progress()) {
3083     // nothing to do if marking is not in progress
3084     return;
3085   }
3086 
3087   // find what the lowest finger is among the global and local fingers
3088   _min_finger = _finger;
3089   for (int i = 0; i < (int)_max_task_num; ++i) {
3090     CMTask* task = _tasks[i];
3091     HeapWord* task_finger = task->finger();
3092     if (task_finger != NULL && task_finger < _min_finger) {
3093       _min_finger = task_finger;
3094     }
3095   }
3096 
3097   _should_gray_objects = false;
3098 
3099   // This fixes a very subtle and fustrating bug. It might be the case
3100   // that, during en evacuation pause, heap regions that contain
3101   // objects that are gray (by being in regions contained in the
3102   // region stack) are included in the collection set. Since such gray
3103   // objects will be moved, and because it's not easy to redirect
3104   // region stack entries to point to a new location (because objects
3105   // in one region might be scattered to multiple regions after they
3106   // are copied), one option is to ensure that all marked objects
3107   // copied during a pause are pushed on the stack. Notice, however,
3108   // that this problem can only happen when the region stack is not
3109   // empty during an evacuation pause. So, we make the fix a bit less
3110   // conservative and ensure that regions are pushed on the stack,
3111   // irrespective whether all collection set regions are below the
3112   // finger, if the region stack is not empty. This is expected to be
3113   // a rare case, so I don't think it's necessary to be smarted about it.
3114   if (!region_stack_empty() || has_aborted_regions()) {
3115     _should_gray_objects = true;
3116   }
3117 }
3118 
3119 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
3120   if (!concurrent_marking_in_progress()) return;
3121 
3122   HeapWord* region_end = hr->end();
3123   if (region_end > _min_finger) {
3124     _should_gray_objects = true;
3125   }
3126 }
3127 
3128 // Resets the region fields of active CMTasks whose values point
3129 // into the collection set.
3130 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
3131   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
3132   assert(parallel_marking_threads() <= _max_task_num, "sanity");
3133 
3134   for (int i = 0; i < (int)parallel_marking_threads(); i += 1) {
3135     CMTask* task = _tasks[i];
3136     HeapWord* task_finger = task->finger();
3137     if (task_finger != NULL) {
3138       assert(_g1h->is_in_g1_reserved(task_finger), "not in heap");
3139       HeapRegion* finger_region = _g1h->heap_region_containing(task_finger);
3140       if (finger_region->in_collection_set()) {
3141         // The task's current region is in the collection set.
3142         // This region will be evacuated in the current GC and
3143         // the region fields in the task will be stale.
3144         task->giveup_current_region();
3145       }
3146     }
3147   }
3148 }
3149 
3150 // abandon current marking iteration due to a Full GC
3151 void ConcurrentMark::abort() {
3152   // Clear all marks to force marking thread to do nothing
3153   _nextMarkBitMap->clearAll();
3154   // Empty mark stack
3155   clear_marking_state();
3156   for (int i = 0; i < (int)_max_task_num; ++i) {
3157     _tasks[i]->clear_region_fields();
3158   }
3159   _has_aborted = true;
3160 
3161   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3162   satb_mq_set.abandon_partial_marking();
3163   // This can be called either during or outside marking, we'll read
3164   // the expected_active value from the SATB queue set.
3165   satb_mq_set.set_active_all_threads(
3166                                  false, /* new active value */
3167                                  satb_mq_set.is_active() /* expected_active */);
3168 }
3169 
3170 static void print_ms_time_info(const char* prefix, const char* name,
3171                                NumberSeq& ns) {
3172   gclog_or_tty->print_cr("%s%5d %12s: total time = %8.2f s (avg = %8.2f ms).",
3173                          prefix, ns.num(), name, ns.sum()/1000.0, ns.avg());
3174   if (ns.num() > 0) {
3175     gclog_or_tty->print_cr("%s         [std. dev = %8.2f ms, max = %8.2f ms]",
3176                            prefix, ns.sd(), ns.maximum());
3177   }
3178 }
3179 
3180 void ConcurrentMark::print_summary_info() {
3181   gclog_or_tty->print_cr(" Concurrent marking:");
3182   print_ms_time_info("  ", "init marks", _init_times);
3183   print_ms_time_info("  ", "remarks", _remark_times);
3184   {
3185     print_ms_time_info("     ", "final marks", _remark_mark_times);
3186     print_ms_time_info("     ", "weak refs", _remark_weak_ref_times);
3187 
3188   }
3189   print_ms_time_info("  ", "cleanups", _cleanup_times);
3190   gclog_or_tty->print_cr("    Final counting total time = %8.2f s (avg = %8.2f ms).",
3191                          _total_counting_time,
3192                          (_cleanup_times.num() > 0 ? _total_counting_time * 1000.0 /
3193                           (double)_cleanup_times.num()
3194                          : 0.0));
3195   if (G1ScrubRemSets) {
3196     gclog_or_tty->print_cr("    RS scrub total time = %8.2f s (avg = %8.2f ms).",
3197                            _total_rs_scrub_time,
3198                            (_cleanup_times.num() > 0 ? _total_rs_scrub_time * 1000.0 /
3199                             (double)_cleanup_times.num()
3200                            : 0.0));
3201   }
3202   gclog_or_tty->print_cr("  Total stop_world time = %8.2f s.",
3203                          (_init_times.sum() + _remark_times.sum() +
3204                           _cleanup_times.sum())/1000.0);
3205   gclog_or_tty->print_cr("  Total concurrent time = %8.2f s "
3206                 "(%8.2f s marking, %8.2f s counting).",
3207                 cmThread()->vtime_accum(),
3208                 cmThread()->vtime_mark_accum(),
3209                 cmThread()->vtime_count_accum());
3210 }
3211 
3212 void ConcurrentMark::print_worker_threads_on(outputStream* st) const {
3213   _parallel_workers->print_worker_threads_on(st);
3214 }
3215 
3216 // Closures
3217 // XXX: there seems to be a lot of code  duplication here;
3218 // should refactor and consolidate the shared code.
3219 
3220 // This closure is used to mark refs into the CMS generation in
3221 // the CMS bit map. Called at the first checkpoint.
3222 
3223 // We take a break if someone is trying to stop the world.
3224 bool ConcurrentMark::do_yield_check(int worker_i) {
3225   if (should_yield()) {
3226     if (worker_i == 0) {
3227       _g1h->g1_policy()->record_concurrent_pause();
3228     }
3229     cmThread()->yield();
3230     if (worker_i == 0) {
3231       _g1h->g1_policy()->record_concurrent_pause_end();
3232     }
3233     return true;
3234   } else {
3235     return false;
3236   }
3237 }
3238 
3239 bool ConcurrentMark::should_yield() {
3240   return cmThread()->should_yield();
3241 }
3242 
3243 bool ConcurrentMark::containing_card_is_marked(void* p) {
3244   size_t offset = pointer_delta(p, _g1h->reserved_region().start(), 1);
3245   return _card_bm.at(offset >> CardTableModRefBS::card_shift);
3246 }
3247 
3248 bool ConcurrentMark::containing_cards_are_marked(void* start,
3249                                                  void* last) {
3250   return containing_card_is_marked(start) &&
3251          containing_card_is_marked(last);
3252 }
3253 
3254 #ifndef PRODUCT
3255 // for debugging purposes
3256 void ConcurrentMark::print_finger() {
3257   gclog_or_tty->print_cr("heap ["PTR_FORMAT", "PTR_FORMAT"), global finger = "PTR_FORMAT,
3258                          _heap_start, _heap_end, _finger);
3259   for (int i = 0; i < (int) _max_task_num; ++i) {
3260     gclog_or_tty->print("   %d: "PTR_FORMAT, i, _tasks[i]->finger());
3261   }
3262   gclog_or_tty->print_cr("");
3263 }
3264 #endif
3265 
3266 void CMTask::scan_object(oop obj) {
3267   assert(_nextMarkBitMap->isMarked((HeapWord*) obj), "invariant");
3268 
3269   if (_cm->verbose_high()) {
3270     gclog_or_tty->print_cr("[%d] we're scanning object "PTR_FORMAT,
3271                            _task_id, (void*) obj);
3272   }
3273 
3274   size_t obj_size = obj->size();
3275   _words_scanned += obj_size;
3276 
3277   obj->oop_iterate(_cm_oop_closure);
3278   statsOnly( ++_objs_scanned );
3279   check_limits();
3280 }
3281 
3282 // Closure for iteration over bitmaps
3283 class CMBitMapClosure : public BitMapClosure {
3284 private:
3285   // the bitmap that is being iterated over
3286   CMBitMap*                   _nextMarkBitMap;
3287   ConcurrentMark*             _cm;
3288   CMTask*                     _task;
3289   // true if we're scanning a heap region claimed by the task (so that
3290   // we move the finger along), false if we're not, i.e. currently when
3291   // scanning a heap region popped from the region stack (so that we
3292   // do not move the task finger along; it'd be a mistake if we did so).
3293   bool                        _scanning_heap_region;
3294 
3295 public:
3296   CMBitMapClosure(CMTask *task,
3297                   ConcurrentMark* cm,
3298                   CMBitMap* nextMarkBitMap)
3299     :  _task(task), _cm(cm), _nextMarkBitMap(nextMarkBitMap) { }
3300 
3301   void set_scanning_heap_region(bool scanning_heap_region) {
3302     _scanning_heap_region = scanning_heap_region;
3303   }
3304 
3305   bool do_bit(size_t offset) {
3306     HeapWord* addr = _nextMarkBitMap->offsetToHeapWord(offset);
3307     assert(_nextMarkBitMap->isMarked(addr), "invariant");
3308     assert( addr < _cm->finger(), "invariant");
3309 
3310     if (_scanning_heap_region) {
3311       statsOnly( _task->increase_objs_found_on_bitmap() );
3312       assert(addr >= _task->finger(), "invariant");
3313       // We move that task's local finger along.
3314       _task->move_finger_to(addr);
3315     } else {
3316       // We move the task's region finger along.
3317       _task->move_region_finger_to(addr);
3318     }
3319 
3320     _task->scan_object(oop(addr));
3321     // we only partially drain the local queue and global stack
3322     _task->drain_local_queue(true);
3323     _task->drain_global_stack(true);
3324 
3325     // if the has_aborted flag has been raised, we need to bail out of
3326     // the iteration
3327     return !_task->has_aborted();
3328   }
3329 };
3330 
3331 // Closure for iterating over objects, currently only used for
3332 // processing SATB buffers.
3333 class CMObjectClosure : public ObjectClosure {
3334 private:
3335   CMTask* _task;
3336 
3337 public:
3338   void do_object(oop obj) {
3339     _task->deal_with_reference(obj);
3340   }
3341 
3342   CMObjectClosure(CMTask* task) : _task(task) { }
3343 };
3344 
3345 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
3346                                ConcurrentMark* cm,
3347                                CMTask* task)
3348   : _g1h(g1h), _cm(cm), _task(task) {
3349   assert(_ref_processor == NULL, "should be initialized to NULL");
3350 
3351   if (G1UseConcMarkReferenceProcessing) {
3352     _ref_processor = g1h->ref_processor_cm();
3353     assert(_ref_processor != NULL, "should not be NULL");
3354   }
3355 }
3356 
3357 void CMTask::setup_for_region(HeapRegion* hr) {
3358   // Separated the asserts so that we know which one fires.
3359   assert(hr != NULL,
3360         "claim_region() should have filtered out continues humongous regions");
3361   assert(!hr->continuesHumongous(),
3362         "claim_region() should have filtered out continues humongous regions");
3363 
3364   if (_cm->verbose_low()) {
3365     gclog_or_tty->print_cr("[%d] setting up for region "PTR_FORMAT,
3366                            _task_id, hr);
3367   }
3368 
3369   _curr_region  = hr;
3370   _finger       = hr->bottom();
3371   update_region_limit();
3372 }
3373 
3374 void CMTask::update_region_limit() {
3375   HeapRegion* hr            = _curr_region;
3376   HeapWord* bottom          = hr->bottom();
3377   HeapWord* limit           = hr->next_top_at_mark_start();
3378 
3379   if (limit == bottom) {
3380     if (_cm->verbose_low()) {
3381       gclog_or_tty->print_cr("[%d] found an empty region "
3382                              "["PTR_FORMAT", "PTR_FORMAT")",
3383                              _task_id, bottom, limit);
3384     }
3385     // The region was collected underneath our feet.
3386     // We set the finger to bottom to ensure that the bitmap
3387     // iteration that will follow this will not do anything.
3388     // (this is not a condition that holds when we set the region up,
3389     // as the region is not supposed to be empty in the first place)
3390     _finger = bottom;
3391   } else if (limit >= _region_limit) {
3392     assert(limit >= _finger, "peace of mind");
3393   } else {
3394     assert(limit < _region_limit, "only way to get here");
3395     // This can happen under some pretty unusual circumstances.  An
3396     // evacuation pause empties the region underneath our feet (NTAMS
3397     // at bottom). We then do some allocation in the region (NTAMS
3398     // stays at bottom), followed by the region being used as a GC
3399     // alloc region (NTAMS will move to top() and the objects
3400     // originally below it will be grayed). All objects now marked in
3401     // the region are explicitly grayed, if below the global finger,
3402     // and we do not need in fact to scan anything else. So, we simply
3403     // set _finger to be limit to ensure that the bitmap iteration
3404     // doesn't do anything.
3405     _finger = limit;
3406   }
3407 
3408   _region_limit = limit;
3409 }
3410 
3411 void CMTask::giveup_current_region() {
3412   assert(_curr_region != NULL, "invariant");
3413   if (_cm->verbose_low()) {
3414     gclog_or_tty->print_cr("[%d] giving up region "PTR_FORMAT,
3415                            _task_id, _curr_region);
3416   }
3417   clear_region_fields();
3418 }
3419 
3420 void CMTask::clear_region_fields() {
3421   // Values for these three fields that indicate that we're not
3422   // holding on to a region.
3423   _curr_region   = NULL;
3424   _finger        = NULL;
3425   _region_limit  = NULL;
3426 
3427   _region_finger = NULL;
3428 }
3429 
3430 void CMTask::set_cm_oop_closure(G1CMOopClosure* cm_oop_closure) {
3431   if (cm_oop_closure == NULL) {
3432     assert(_cm_oop_closure != NULL, "invariant");
3433   } else {
3434     assert(_cm_oop_closure == NULL, "invariant");
3435   }
3436   _cm_oop_closure = cm_oop_closure;
3437 }
3438 
3439 void CMTask::reset(CMBitMap* nextMarkBitMap) {
3440   guarantee(nextMarkBitMap != NULL, "invariant");
3441 
3442   if (_cm->verbose_low()) {
3443     gclog_or_tty->print_cr("[%d] resetting", _task_id);
3444   }
3445 
3446   _nextMarkBitMap                = nextMarkBitMap;
3447   clear_region_fields();
3448   assert(_aborted_region.is_empty(), "should have been cleared");
3449 
3450   _calls                         = 0;
3451   _elapsed_time_ms               = 0.0;
3452   _termination_time_ms           = 0.0;
3453   _termination_start_time_ms     = 0.0;
3454 
3455 #if _MARKING_STATS_
3456   _local_pushes                  = 0;
3457   _local_pops                    = 0;
3458   _local_max_size                = 0;
3459   _objs_scanned                  = 0;
3460   _global_pushes                 = 0;
3461   _global_pops                   = 0;
3462   _global_max_size               = 0;
3463   _global_transfers_to           = 0;
3464   _global_transfers_from         = 0;
3465   _region_stack_pops             = 0;
3466   _regions_claimed               = 0;
3467   _objs_found_on_bitmap          = 0;
3468   _satb_buffers_processed        = 0;
3469   _steal_attempts                = 0;
3470   _steals                        = 0;
3471   _aborted                       = 0;
3472   _aborted_overflow              = 0;
3473   _aborted_cm_aborted            = 0;
3474   _aborted_yield                 = 0;
3475   _aborted_timed_out             = 0;
3476   _aborted_satb                  = 0;
3477   _aborted_termination           = 0;
3478 #endif // _MARKING_STATS_
3479 }
3480 
3481 bool CMTask::should_exit_termination() {
3482   regular_clock_call();
3483   // This is called when we are in the termination protocol. We should
3484   // quit if, for some reason, this task wants to abort or the global
3485   // stack is not empty (this means that we can get work from it).
3486   return !_cm->mark_stack_empty() || has_aborted();
3487 }
3488 
3489 void CMTask::reached_limit() {
3490   assert(_words_scanned >= _words_scanned_limit ||
3491          _refs_reached >= _refs_reached_limit ,
3492          "shouldn't have been called otherwise");
3493   regular_clock_call();
3494 }
3495 
3496 void CMTask::regular_clock_call() {
3497   if (has_aborted()) return;
3498 
3499   // First, we need to recalculate the words scanned and refs reached
3500   // limits for the next clock call.
3501   recalculate_limits();
3502 
3503   // During the regular clock call we do the following
3504 
3505   // (1) If an overflow has been flagged, then we abort.
3506   if (_cm->has_overflown()) {
3507     set_has_aborted();
3508     return;
3509   }
3510 
3511   // If we are not concurrent (i.e. we're doing remark) we don't need
3512   // to check anything else. The other steps are only needed during
3513   // the concurrent marking phase.
3514   if (!concurrent()) return;
3515 
3516   // (2) If marking has been aborted for Full GC, then we also abort.
3517   if (_cm->has_aborted()) {
3518     set_has_aborted();
3519     statsOnly( ++_aborted_cm_aborted );
3520     return;
3521   }
3522 
3523   double curr_time_ms = os::elapsedVTime() * 1000.0;
3524 
3525   // (3) If marking stats are enabled, then we update the step history.
3526 #if _MARKING_STATS_
3527   if (_words_scanned >= _words_scanned_limit) {
3528     ++_clock_due_to_scanning;
3529   }
3530   if (_refs_reached >= _refs_reached_limit) {
3531     ++_clock_due_to_marking;
3532   }
3533 
3534   double last_interval_ms = curr_time_ms - _interval_start_time_ms;
3535   _interval_start_time_ms = curr_time_ms;
3536   _all_clock_intervals_ms.add(last_interval_ms);
3537 
3538   if (_cm->verbose_medium()) {
3539       gclog_or_tty->print_cr("[%d] regular clock, interval = %1.2lfms, "
3540                         "scanned = %d%s, refs reached = %d%s",
3541                         _task_id, last_interval_ms,
3542                         _words_scanned,
3543                         (_words_scanned >= _words_scanned_limit) ? " (*)" : "",
3544                         _refs_reached,
3545                         (_refs_reached >= _refs_reached_limit) ? " (*)" : "");
3546   }
3547 #endif // _MARKING_STATS_
3548 
3549   // (4) We check whether we should yield. If we have to, then we abort.
3550   if (_cm->should_yield()) {
3551     // We should yield. To do this we abort the task. The caller is
3552     // responsible for yielding.
3553     set_has_aborted();
3554     statsOnly( ++_aborted_yield );
3555     return;
3556   }
3557 
3558   // (5) We check whether we've reached our time quota. If we have,
3559   // then we abort.
3560   double elapsed_time_ms = curr_time_ms - _start_time_ms;
3561   if (elapsed_time_ms > _time_target_ms) {
3562     set_has_aborted();
3563     _has_timed_out = true;
3564     statsOnly( ++_aborted_timed_out );
3565     return;
3566   }
3567 
3568   // (6) Finally, we check whether there are enough completed STAB
3569   // buffers available for processing. If there are, we abort.
3570   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3571   if (!_draining_satb_buffers && satb_mq_set.process_completed_buffers()) {
3572     if (_cm->verbose_low()) {
3573       gclog_or_tty->print_cr("[%d] aborting to deal with pending SATB buffers",
3574                              _task_id);
3575     }
3576     // we do need to process SATB buffers, we'll abort and restart
3577     // the marking task to do so
3578     set_has_aborted();
3579     statsOnly( ++_aborted_satb );
3580     return;
3581   }
3582 }
3583 
3584 void CMTask::recalculate_limits() {
3585   _real_words_scanned_limit = _words_scanned + words_scanned_period;
3586   _words_scanned_limit      = _real_words_scanned_limit;
3587 
3588   _real_refs_reached_limit  = _refs_reached  + refs_reached_period;
3589   _refs_reached_limit       = _real_refs_reached_limit;
3590 }
3591 
3592 void CMTask::decrease_limits() {
3593   // This is called when we believe that we're going to do an infrequent
3594   // operation which will increase the per byte scanned cost (i.e. move
3595   // entries to/from the global stack). It basically tries to decrease the
3596   // scanning limit so that the clock is called earlier.
3597 
3598   if (_cm->verbose_medium()) {
3599     gclog_or_tty->print_cr("[%d] decreasing limits", _task_id);
3600   }
3601 
3602   _words_scanned_limit = _real_words_scanned_limit -
3603     3 * words_scanned_period / 4;
3604   _refs_reached_limit  = _real_refs_reached_limit -
3605     3 * refs_reached_period / 4;
3606 }
3607 
3608 void CMTask::move_entries_to_global_stack() {
3609   // local array where we'll store the entries that will be popped
3610   // from the local queue
3611   oop buffer[global_stack_transfer_size];
3612 
3613   int n = 0;
3614   oop obj;
3615   while (n < global_stack_transfer_size && _task_queue->pop_local(obj)) {
3616     buffer[n] = obj;
3617     ++n;
3618   }
3619 
3620   if (n > 0) {
3621     // we popped at least one entry from the local queue
3622 
3623     statsOnly( ++_global_transfers_to; _local_pops += n );
3624 
3625     if (!_cm->mark_stack_push(buffer, n)) {
3626       if (_cm->verbose_low()) {
3627         gclog_or_tty->print_cr("[%d] aborting due to global stack overflow",
3628                                _task_id);
3629       }
3630       set_has_aborted();
3631     } else {
3632       // the transfer was successful
3633 
3634       if (_cm->verbose_medium()) {
3635         gclog_or_tty->print_cr("[%d] pushed %d entries to the global stack",
3636                                _task_id, n);
3637       }
3638       statsOnly( int tmp_size = _cm->mark_stack_size();
3639                  if (tmp_size > _global_max_size) {
3640                    _global_max_size = tmp_size;
3641                  }
3642                  _global_pushes += n );
3643     }
3644   }
3645 
3646   // this operation was quite expensive, so decrease the limits
3647   decrease_limits();
3648 }
3649 
3650 void CMTask::get_entries_from_global_stack() {
3651   // local array where we'll store the entries that will be popped
3652   // from the global stack.
3653   oop buffer[global_stack_transfer_size];
3654   int n;
3655   _cm->mark_stack_pop(buffer, global_stack_transfer_size, &n);
3656   assert(n <= global_stack_transfer_size,
3657          "we should not pop more than the given limit");
3658   if (n > 0) {
3659     // yes, we did actually pop at least one entry
3660 
3661     statsOnly( ++_global_transfers_from; _global_pops += n );
3662     if (_cm->verbose_medium()) {
3663       gclog_or_tty->print_cr("[%d] popped %d entries from the global stack",
3664                              _task_id, n);
3665     }
3666     for (int i = 0; i < n; ++i) {
3667       bool success = _task_queue->push(buffer[i]);
3668       // We only call this when the local queue is empty or under a
3669       // given target limit. So, we do not expect this push to fail.
3670       assert(success, "invariant");
3671     }
3672 
3673     statsOnly( int tmp_size = _task_queue->size();
3674                if (tmp_size > _local_max_size) {
3675                  _local_max_size = tmp_size;
3676                }
3677                _local_pushes += n );
3678   }
3679 
3680   // this operation was quite expensive, so decrease the limits
3681   decrease_limits();
3682 }
3683 
3684 void CMTask::drain_local_queue(bool partially) {
3685   if (has_aborted()) return;
3686 
3687   // Decide what the target size is, depending whether we're going to
3688   // drain it partially (so that other tasks can steal if they run out
3689   // of things to do) or totally (at the very end).
3690   size_t target_size;
3691   if (partially) {
3692     target_size = MIN2((size_t)_task_queue->max_elems()/3, GCDrainStackTargetSize);
3693   } else {
3694     target_size = 0;
3695   }
3696 
3697   if (_task_queue->size() > target_size) {
3698     if (_cm->verbose_high()) {
3699       gclog_or_tty->print_cr("[%d] draining local queue, target size = %d",
3700                              _task_id, target_size);
3701     }
3702 
3703     oop obj;
3704     bool ret = _task_queue->pop_local(obj);
3705     while (ret) {
3706       statsOnly( ++_local_pops );
3707 
3708       if (_cm->verbose_high()) {
3709         gclog_or_tty->print_cr("[%d] popped "PTR_FORMAT, _task_id,
3710                                (void*) obj);
3711       }
3712 
3713       assert(_g1h->is_in_g1_reserved((HeapWord*) obj), "invariant" );
3714       assert(!_g1h->is_on_master_free_list(
3715                   _g1h->heap_region_containing((HeapWord*) obj)), "invariant");
3716 
3717       scan_object(obj);
3718 
3719       if (_task_queue->size() <= target_size || has_aborted()) {
3720         ret = false;
3721       } else {
3722         ret = _task_queue->pop_local(obj);
3723       }
3724     }
3725 
3726     if (_cm->verbose_high()) {
3727       gclog_or_tty->print_cr("[%d] drained local queue, size = %d",
3728                              _task_id, _task_queue->size());
3729     }
3730   }
3731 }
3732 
3733 void CMTask::drain_global_stack(bool partially) {
3734   if (has_aborted()) return;
3735 
3736   // We have a policy to drain the local queue before we attempt to
3737   // drain the global stack.
3738   assert(partially || _task_queue->size() == 0, "invariant");
3739 
3740   // Decide what the target size is, depending whether we're going to
3741   // drain it partially (so that other tasks can steal if they run out
3742   // of things to do) or totally (at the very end).  Notice that,
3743   // because we move entries from the global stack in chunks or
3744   // because another task might be doing the same, we might in fact
3745   // drop below the target. But, this is not a problem.
3746   size_t target_size;
3747   if (partially) {
3748     target_size = _cm->partial_mark_stack_size_target();
3749   } else {
3750     target_size = 0;
3751   }
3752 
3753   if (_cm->mark_stack_size() > target_size) {
3754     if (_cm->verbose_low()) {
3755       gclog_or_tty->print_cr("[%d] draining global_stack, target size %d",
3756                              _task_id, target_size);
3757     }
3758 
3759     while (!has_aborted() && _cm->mark_stack_size() > target_size) {
3760       get_entries_from_global_stack();
3761       drain_local_queue(partially);
3762     }
3763 
3764     if (_cm->verbose_low()) {
3765       gclog_or_tty->print_cr("[%d] drained global stack, size = %d",
3766                              _task_id, _cm->mark_stack_size());
3767     }
3768   }
3769 }
3770 
3771 // SATB Queue has several assumptions on whether to call the par or
3772 // non-par versions of the methods. this is why some of the code is
3773 // replicated. We should really get rid of the single-threaded version
3774 // of the code to simplify things.
3775 void CMTask::drain_satb_buffers() {
3776   if (has_aborted()) return;
3777 
3778   // We set this so that the regular clock knows that we're in the
3779   // middle of draining buffers and doesn't set the abort flag when it
3780   // notices that SATB buffers are available for draining. It'd be
3781   // very counter productive if it did that. :-)
3782   _draining_satb_buffers = true;
3783 
3784   CMObjectClosure oc(this);
3785   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
3786   if (G1CollectedHeap::use_parallel_gc_threads()) {
3787     satb_mq_set.set_par_closure(_task_id, &oc);
3788   } else {
3789     satb_mq_set.set_closure(&oc);
3790   }
3791 
3792   // This keeps claiming and applying the closure to completed buffers
3793   // until we run out of buffers or we need to abort.
3794   if (G1CollectedHeap::use_parallel_gc_threads()) {
3795     while (!has_aborted() &&
3796            satb_mq_set.par_apply_closure_to_completed_buffer(_task_id)) {
3797       if (_cm->verbose_medium()) {
3798         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3799       }
3800       statsOnly( ++_satb_buffers_processed );
3801       regular_clock_call();
3802     }
3803   } else {
3804     while (!has_aborted() &&
3805            satb_mq_set.apply_closure_to_completed_buffer()) {
3806       if (_cm->verbose_medium()) {
3807         gclog_or_tty->print_cr("[%d] processed an SATB buffer", _task_id);
3808       }
3809       statsOnly( ++_satb_buffers_processed );
3810       regular_clock_call();
3811     }
3812   }
3813 
3814   if (!concurrent() && !has_aborted()) {
3815     // We should only do this during remark.
3816     if (G1CollectedHeap::use_parallel_gc_threads()) {
3817       satb_mq_set.par_iterate_closure_all_threads(_task_id);
3818     } else {
3819       satb_mq_set.iterate_closure_all_threads();
3820     }
3821   }
3822 
3823   _draining_satb_buffers = false;
3824 
3825   assert(has_aborted() ||
3826          concurrent() ||
3827          satb_mq_set.completed_buffers_num() == 0, "invariant");
3828 
3829   if (G1CollectedHeap::use_parallel_gc_threads()) {
3830     satb_mq_set.set_par_closure(_task_id, NULL);
3831   } else {
3832     satb_mq_set.set_closure(NULL);
3833   }
3834 
3835   // again, this was a potentially expensive operation, decrease the
3836   // limits to get the regular clock call early
3837   decrease_limits();
3838 }
3839 
3840 void CMTask::drain_region_stack(BitMapClosure* bc) {
3841   if (has_aborted()) return;
3842 
3843   assert(_region_finger == NULL,
3844          "it should be NULL when we're not scanning a region");
3845 
3846   if (!_cm->region_stack_empty() || !_aborted_region.is_empty()) {
3847     if (_cm->verbose_low()) {
3848       gclog_or_tty->print_cr("[%d] draining region stack, size = %d",
3849                              _task_id, _cm->region_stack_size());
3850     }
3851 
3852     MemRegion mr;
3853 
3854     if (!_aborted_region.is_empty()) {
3855       mr = _aborted_region;
3856       _aborted_region = MemRegion();
3857 
3858       if (_cm->verbose_low()) {
3859         gclog_or_tty->print_cr("[%d] scanning aborted region "
3860                                "[ " PTR_FORMAT ", " PTR_FORMAT " )",
3861                                _task_id, mr.start(), mr.end());
3862       }
3863     } else {
3864       mr = _cm->region_stack_pop_lock_free();
3865       // it returns MemRegion() if the pop fails
3866       statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3867     }
3868 
3869     while (mr.start() != NULL) {
3870       if (_cm->verbose_medium()) {
3871         gclog_or_tty->print_cr("[%d] we are scanning region "
3872                                "["PTR_FORMAT", "PTR_FORMAT")",
3873                                _task_id, mr.start(), mr.end());
3874       }
3875 
3876       assert(mr.end() <= _cm->finger(),
3877              "otherwise the region shouldn't be on the stack");
3878       assert(!mr.is_empty(), "Only non-empty regions live on the region stack");
3879       if (_nextMarkBitMap->iterate(bc, mr)) {
3880         assert(!has_aborted(),
3881                "cannot abort the task without aborting the bitmap iteration");
3882 
3883         // We finished iterating over the region without aborting.
3884         regular_clock_call();
3885         if (has_aborted()) {
3886           mr = MemRegion();
3887         } else {
3888           mr = _cm->region_stack_pop_lock_free();
3889           // it returns MemRegion() if the pop fails
3890           statsOnly(if (mr.start() != NULL) ++_region_stack_pops );
3891         }
3892       } else {
3893         assert(has_aborted(), "currently the only way to do so");
3894 
3895         // The only way to abort the bitmap iteration is to return
3896         // false from the do_bit() method. However, inside the
3897         // do_bit() method we move the _region_finger to point to the
3898         // object currently being looked at. So, if we bail out, we
3899         // have definitely set _region_finger to something non-null.
3900         assert(_region_finger != NULL, "invariant");
3901 
3902         // Make sure that any previously aborted region has been
3903         // cleared.
3904         assert(_aborted_region.is_empty(), "aborted region not cleared");
3905 
3906         // The iteration was actually aborted. So now _region_finger
3907         // points to the address of the object we last scanned. If we
3908         // leave it there, when we restart this task, we will rescan
3909         // the object. It is easy to avoid this. We move the finger by
3910         // enough to point to the next possible object header (the
3911         // bitmap knows by how much we need to move it as it knows its
3912         // granularity).
3913         MemRegion newRegion =
3914           MemRegion(_nextMarkBitMap->nextWord(_region_finger), mr.end());
3915 
3916         if (!newRegion.is_empty()) {
3917           if (_cm->verbose_low()) {
3918             gclog_or_tty->print_cr("[%d] recording unscanned region"
3919                                    "[" PTR_FORMAT "," PTR_FORMAT ") in CMTask",
3920                                    _task_id,
3921                                    newRegion.start(), newRegion.end());
3922           }
3923           // Now record the part of the region we didn't scan to
3924           // make sure this task scans it later.
3925           _aborted_region = newRegion;
3926         }
3927         // break from while
3928         mr = MemRegion();
3929       }
3930       _region_finger = NULL;
3931     }
3932 
3933     if (_cm->verbose_low()) {
3934       gclog_or_tty->print_cr("[%d] drained region stack, size = %d",
3935                              _task_id, _cm->region_stack_size());
3936     }
3937   }
3938 }
3939 
3940 void CMTask::print_stats() {
3941   gclog_or_tty->print_cr("Marking Stats, task = %d, calls = %d",
3942                          _task_id, _calls);
3943   gclog_or_tty->print_cr("  Elapsed time = %1.2lfms, Termination time = %1.2lfms",
3944                          _elapsed_time_ms, _termination_time_ms);
3945   gclog_or_tty->print_cr("  Step Times (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3946                          _step_times_ms.num(), _step_times_ms.avg(),
3947                          _step_times_ms.sd());
3948   gclog_or_tty->print_cr("                    max = %1.2lfms, total = %1.2lfms",
3949                          _step_times_ms.maximum(), _step_times_ms.sum());
3950 
3951 #if _MARKING_STATS_
3952   gclog_or_tty->print_cr("  Clock Intervals (cum): num = %d, avg = %1.2lfms, sd = %1.2lfms",
3953                          _all_clock_intervals_ms.num(), _all_clock_intervals_ms.avg(),
3954                          _all_clock_intervals_ms.sd());
3955   gclog_or_tty->print_cr("                         max = %1.2lfms, total = %1.2lfms",
3956                          _all_clock_intervals_ms.maximum(),
3957                          _all_clock_intervals_ms.sum());
3958   gclog_or_tty->print_cr("  Clock Causes (cum): scanning = %d, marking = %d",
3959                          _clock_due_to_scanning, _clock_due_to_marking);
3960   gclog_or_tty->print_cr("  Objects: scanned = %d, found on the bitmap = %d",
3961                          _objs_scanned, _objs_found_on_bitmap);
3962   gclog_or_tty->print_cr("  Local Queue:  pushes = %d, pops = %d, max size = %d",
3963                          _local_pushes, _local_pops, _local_max_size);
3964   gclog_or_tty->print_cr("  Global Stack: pushes = %d, pops = %d, max size = %d",
3965                          _global_pushes, _global_pops, _global_max_size);
3966   gclog_or_tty->print_cr("                transfers to = %d, transfers from = %d",
3967                          _global_transfers_to,_global_transfers_from);
3968   gclog_or_tty->print_cr("  Regions: claimed = %d, Region Stack: pops = %d",
3969                          _regions_claimed, _region_stack_pops);
3970   gclog_or_tty->print_cr("  SATB buffers: processed = %d", _satb_buffers_processed);
3971   gclog_or_tty->print_cr("  Steals: attempts = %d, successes = %d",
3972                          _steal_attempts, _steals);
3973   gclog_or_tty->print_cr("  Aborted: %d, due to", _aborted);
3974   gclog_or_tty->print_cr("    overflow: %d, global abort: %d, yield: %d",
3975                          _aborted_overflow, _aborted_cm_aborted, _aborted_yield);
3976   gclog_or_tty->print_cr("    time out: %d, SATB: %d, termination: %d",
3977                          _aborted_timed_out, _aborted_satb, _aborted_termination);
3978 #endif // _MARKING_STATS_
3979 }
3980 
3981 /*****************************************************************************
3982 
3983     The do_marking_step(time_target_ms) method is the building block
3984     of the parallel marking framework. It can be called in parallel
3985     with other invocations of do_marking_step() on different tasks
3986     (but only one per task, obviously) and concurrently with the
3987     mutator threads, or during remark, hence it eliminates the need
3988     for two versions of the code. When called during remark, it will
3989     pick up from where the task left off during the concurrent marking
3990     phase. Interestingly, tasks are also claimable during evacuation
3991     pauses too, since do_marking_step() ensures that it aborts before
3992     it needs to yield.
3993 
3994     The data structures that is uses to do marking work are the
3995     following:
3996 
3997       (1) Marking Bitmap. If there are gray objects that appear only
3998       on the bitmap (this happens either when dealing with an overflow
3999       or when the initial marking phase has simply marked the roots
4000       and didn't push them on the stack), then tasks claim heap
4001       regions whose bitmap they then scan to find gray objects. A
4002       global finger indicates where the end of the last claimed region
4003       is. A local finger indicates how far into the region a task has
4004       scanned. The two fingers are used to determine how to gray an
4005       object (i.e. whether simply marking it is OK, as it will be
4006       visited by a task in the future, or whether it needs to be also
4007       pushed on a stack).
4008 
4009       (2) Local Queue. The local queue of the task which is accessed
4010       reasonably efficiently by the task. Other tasks can steal from
4011       it when they run out of work. Throughout the marking phase, a
4012       task attempts to keep its local queue short but not totally
4013       empty, so that entries are available for stealing by other
4014       tasks. Only when there is no more work, a task will totally
4015       drain its local queue.
4016 
4017       (3) Global Mark Stack. This handles local queue overflow. During
4018       marking only sets of entries are moved between it and the local
4019       queues, as access to it requires a mutex and more fine-grain
4020       interaction with it which might cause contention. If it
4021       overflows, then the marking phase should restart and iterate
4022       over the bitmap to identify gray objects. Throughout the marking
4023       phase, tasks attempt to keep the global mark stack at a small
4024       length but not totally empty, so that entries are available for
4025       popping by other tasks. Only when there is no more work, tasks
4026       will totally drain the global mark stack.
4027 
4028       (4) Global Region Stack. Entries on it correspond to areas of
4029       the bitmap that need to be scanned since they contain gray
4030       objects. Pushes on the region stack only happen during
4031       evacuation pauses and typically correspond to areas covered by
4032       GC LABS. If it overflows, then the marking phase should restart
4033       and iterate over the bitmap to identify gray objects. Tasks will
4034       try to totally drain the region stack as soon as possible.
4035 
4036       (5) SATB Buffer Queue. This is where completed SATB buffers are
4037       made available. Buffers are regularly removed from this queue
4038       and scanned for roots, so that the queue doesn't get too
4039       long. During remark, all completed buffers are processed, as
4040       well as the filled in parts of any uncompleted buffers.
4041 
4042     The do_marking_step() method tries to abort when the time target
4043     has been reached. There are a few other cases when the
4044     do_marking_step() method also aborts:
4045 
4046       (1) When the marking phase has been aborted (after a Full GC).
4047 
4048       (2) When a global overflow (either on the global stack or the
4049       region stack) has been triggered. Before the task aborts, it
4050       will actually sync up with the other tasks to ensure that all
4051       the marking data structures (local queues, stacks, fingers etc.)
4052       are re-initialised so that when do_marking_step() completes,
4053       the marking phase can immediately restart.
4054 
4055       (3) When enough completed SATB buffers are available. The
4056       do_marking_step() method only tries to drain SATB buffers right
4057       at the beginning. So, if enough buffers are available, the
4058       marking step aborts and the SATB buffers are processed at
4059       the beginning of the next invocation.
4060 
4061       (4) To yield. when we have to yield then we abort and yield
4062       right at the end of do_marking_step(). This saves us from a lot
4063       of hassle as, by yielding we might allow a Full GC. If this
4064       happens then objects will be compacted underneath our feet, the
4065       heap might shrink, etc. We save checking for this by just
4066       aborting and doing the yield right at the end.
4067 
4068     From the above it follows that the do_marking_step() method should
4069     be called in a loop (or, otherwise, regularly) until it completes.
4070 
4071     If a marking step completes without its has_aborted() flag being
4072     true, it means it has completed the current marking phase (and
4073     also all other marking tasks have done so and have all synced up).
4074 
4075     A method called regular_clock_call() is invoked "regularly" (in
4076     sub ms intervals) throughout marking. It is this clock method that
4077     checks all the abort conditions which were mentioned above and
4078     decides when the task should abort. A work-based scheme is used to
4079     trigger this clock method: when the number of object words the
4080     marking phase has scanned or the number of references the marking
4081     phase has visited reach a given limit. Additional invocations to
4082     the method clock have been planted in a few other strategic places
4083     too. The initial reason for the clock method was to avoid calling
4084     vtime too regularly, as it is quite expensive. So, once it was in
4085     place, it was natural to piggy-back all the other conditions on it
4086     too and not constantly check them throughout the code.
4087 
4088  *****************************************************************************/
4089 
4090 void CMTask::do_marking_step(double time_target_ms,
4091                              bool do_stealing,
4092                              bool do_termination) {
4093   assert(time_target_ms >= 1.0, "minimum granularity is 1ms");
4094   assert(concurrent() == _cm->concurrent(), "they should be the same");
4095 
4096   assert(concurrent() || _cm->region_stack_empty(),
4097          "the region stack should have been cleared before remark");
4098   assert(concurrent() || !_cm->has_aborted_regions(),
4099          "aborted regions should have been cleared before remark");
4100   assert(_region_finger == NULL,
4101          "this should be non-null only when a region is being scanned");
4102 
4103   G1CollectorPolicy* g1_policy = _g1h->g1_policy();
4104   assert(_task_queues != NULL, "invariant");
4105   assert(_task_queue != NULL, "invariant");
4106   assert(_task_queues->queue(_task_id) == _task_queue, "invariant");
4107 
4108   assert(!_claimed,
4109          "only one thread should claim this task at any one time");
4110 
4111   // OK, this doesn't safeguard again all possible scenarios, as it is
4112   // possible for two threads to set the _claimed flag at the same
4113   // time. But it is only for debugging purposes anyway and it will
4114   // catch most problems.
4115   _claimed = true;
4116 
4117   _start_time_ms = os::elapsedVTime() * 1000.0;
4118   statsOnly( _interval_start_time_ms = _start_time_ms );
4119 
4120   double diff_prediction_ms =
4121     g1_policy->get_new_prediction(&_marking_step_diffs_ms);
4122   _time_target_ms = time_target_ms - diff_prediction_ms;
4123 
4124   // set up the variables that are used in the work-based scheme to
4125   // call the regular clock method
4126   _words_scanned = 0;
4127   _refs_reached  = 0;
4128   recalculate_limits();
4129 
4130   // clear all flags
4131   clear_has_aborted();
4132   _has_timed_out = false;
4133   _draining_satb_buffers = false;
4134 
4135   ++_calls;
4136 
4137   if (_cm->verbose_low()) {
4138     gclog_or_tty->print_cr("[%d] >>>>>>>>>> START, call = %d, "
4139                            "target = %1.2lfms >>>>>>>>>>",
4140                            _task_id, _calls, _time_target_ms);
4141   }
4142 
4143   // Set up the bitmap and oop closures. Anything that uses them is
4144   // eventually called from this method, so it is OK to allocate these
4145   // statically.
4146   CMBitMapClosure bitmap_closure(this, _cm, _nextMarkBitMap);
4147   G1CMOopClosure  cm_oop_closure(_g1h, _cm, this);
4148   set_cm_oop_closure(&cm_oop_closure);
4149 
4150   if (_cm->has_overflown()) {
4151     // This can happen if the region stack or the mark stack overflows
4152     // during a GC pause and this task, after a yield point,
4153     // restarts. We have to abort as we need to get into the overflow
4154     // protocol which happens right at the end of this task.
4155     set_has_aborted();
4156   }
4157 
4158   // First drain any available SATB buffers. After this, we will not
4159   // look at SATB buffers before the next invocation of this method.
4160   // If enough completed SATB buffers are queued up, the regular clock
4161   // will abort this task so that it restarts.
4162   drain_satb_buffers();
4163   // ...then partially drain the local queue and the global stack
4164   drain_local_queue(true);
4165   drain_global_stack(true);
4166 
4167   // Then totally drain the region stack.  We will not look at
4168   // it again before the next invocation of this method. Entries on
4169   // the region stack are only added during evacuation pauses, for
4170   // which we have to yield. When we do, we abort the task anyway so
4171   // it will look at the region stack again when it restarts.
4172   bitmap_closure.set_scanning_heap_region(false);
4173   drain_region_stack(&bitmap_closure);
4174   // ...then partially drain the local queue and the global stack
4175   drain_local_queue(true);
4176   drain_global_stack(true);
4177 
4178   do {
4179     if (!has_aborted() && _curr_region != NULL) {
4180       // This means that we're already holding on to a region.
4181       assert(_finger != NULL, "if region is not NULL, then the finger "
4182              "should not be NULL either");
4183 
4184       // We might have restarted this task after an evacuation pause
4185       // which might have evacuated the region we're holding on to
4186       // underneath our feet. Let's read its limit again to make sure
4187       // that we do not iterate over a region of the heap that
4188       // contains garbage (update_region_limit() will also move
4189       // _finger to the start of the region if it is found empty).
4190       update_region_limit();
4191       // We will start from _finger not from the start of the region,
4192       // as we might be restarting this task after aborting half-way
4193       // through scanning this region. In this case, _finger points to
4194       // the address where we last found a marked object. If this is a
4195       // fresh region, _finger points to start().
4196       MemRegion mr = MemRegion(_finger, _region_limit);
4197 
4198       if (_cm->verbose_low()) {
4199         gclog_or_tty->print_cr("[%d] we're scanning part "
4200                                "["PTR_FORMAT", "PTR_FORMAT") "
4201                                "of region "PTR_FORMAT,
4202                                _task_id, _finger, _region_limit, _curr_region);
4203       }
4204 
4205       // Let's iterate over the bitmap of the part of the
4206       // region that is left.
4207       bitmap_closure.set_scanning_heap_region(true);
4208       if (mr.is_empty() ||
4209           _nextMarkBitMap->iterate(&bitmap_closure, mr)) {
4210         // We successfully completed iterating over the region. Now,
4211         // let's give up the region.
4212         giveup_current_region();
4213         regular_clock_call();
4214       } else {
4215         assert(has_aborted(), "currently the only way to do so");
4216         // The only way to abort the bitmap iteration is to return
4217         // false from the do_bit() method. However, inside the
4218         // do_bit() method we move the _finger to point to the
4219         // object currently being looked at. So, if we bail out, we
4220         // have definitely set _finger to something non-null.
4221         assert(_finger != NULL, "invariant");
4222 
4223         // Region iteration was actually aborted. So now _finger
4224         // points to the address of the object we last scanned. If we
4225         // leave it there, when we restart this task, we will rescan
4226         // the object. It is easy to avoid this. We move the finger by
4227         // enough to point to the next possible object header (the
4228         // bitmap knows by how much we need to move it as it knows its
4229         // granularity).
4230         assert(_finger < _region_limit, "invariant");
4231         HeapWord* new_finger = _nextMarkBitMap->nextWord(_finger);
4232         // Check if bitmap iteration was aborted while scanning the last object
4233         if (new_finger >= _region_limit) {
4234             giveup_current_region();
4235         } else {
4236             move_finger_to(new_finger);
4237         }
4238       }
4239     }
4240     // At this point we have either completed iterating over the
4241     // region we were holding on to, or we have aborted.
4242 
4243     // We then partially drain the local queue and the global stack.
4244     // (Do we really need this?)
4245     drain_local_queue(true);
4246     drain_global_stack(true);
4247 
4248     // Read the note on the claim_region() method on why it might
4249     // return NULL with potentially more regions available for
4250     // claiming and why we have to check out_of_regions() to determine
4251     // whether we're done or not.
4252     while (!has_aborted() && _curr_region == NULL && !_cm->out_of_regions()) {
4253       // We are going to try to claim a new region. We should have
4254       // given up on the previous one.
4255       // Separated the asserts so that we know which one fires.
4256       assert(_curr_region  == NULL, "invariant");
4257       assert(_finger       == NULL, "invariant");
4258       assert(_region_limit == NULL, "invariant");
4259       if (_cm->verbose_low()) {
4260         gclog_or_tty->print_cr("[%d] trying to claim a new region", _task_id);
4261       }
4262       HeapRegion* claimed_region = _cm->claim_region(_task_id);
4263       if (claimed_region != NULL) {
4264         // Yes, we managed to claim one
4265         statsOnly( ++_regions_claimed );
4266 
4267         if (_cm->verbose_low()) {
4268           gclog_or_tty->print_cr("[%d] we successfully claimed "
4269                                  "region "PTR_FORMAT,
4270                                  _task_id, claimed_region);
4271         }
4272 
4273         setup_for_region(claimed_region);
4274         assert(_curr_region == claimed_region, "invariant");
4275       }
4276       // It is important to call the regular clock here. It might take
4277       // a while to claim a region if, for example, we hit a large
4278       // block of empty regions. So we need to call the regular clock
4279       // method once round the loop to make sure it's called
4280       // frequently enough.
4281       regular_clock_call();
4282     }
4283 
4284     if (!has_aborted() && _curr_region == NULL) {
4285       assert(_cm->out_of_regions(),
4286              "at this point we should be out of regions");
4287     }
4288   } while ( _curr_region != NULL && !has_aborted());
4289 
4290   if (!has_aborted()) {
4291     // We cannot check whether the global stack is empty, since other
4292     // tasks might be pushing objects to it concurrently. We also cannot
4293     // check if the region stack is empty because if a thread is aborting
4294     // it can push a partially done region back.
4295     assert(_cm->out_of_regions(),
4296            "at this point we should be out of regions");
4297 
4298     if (_cm->verbose_low()) {
4299       gclog_or_tty->print_cr("[%d] all regions claimed", _task_id);
4300     }
4301 
4302     // Try to reduce the number of available SATB buffers so that
4303     // remark has less work to do.
4304     drain_satb_buffers();
4305   }
4306 
4307   // Since we've done everything else, we can now totally drain the
4308   // local queue and global stack.
4309   drain_local_queue(false);
4310   drain_global_stack(false);
4311 
4312   // Attempt at work stealing from other task's queues.
4313   if (do_stealing && !has_aborted()) {
4314     // We have not aborted. This means that we have finished all that
4315     // we could. Let's try to do some stealing...
4316 
4317     // We cannot check whether the global stack is empty, since other
4318     // tasks might be pushing objects to it concurrently. We also cannot
4319     // check if the region stack is empty because if a thread is aborting
4320     // it can push a partially done region back.
4321     assert(_cm->out_of_regions() && _task_queue->size() == 0,
4322            "only way to reach here");
4323 
4324     if (_cm->verbose_low()) {
4325       gclog_or_tty->print_cr("[%d] starting to steal", _task_id);
4326     }
4327 
4328     while (!has_aborted()) {
4329       oop obj;
4330       statsOnly( ++_steal_attempts );
4331 
4332       if (_cm->try_stealing(_task_id, &_hash_seed, obj)) {
4333         if (_cm->verbose_medium()) {
4334           gclog_or_tty->print_cr("[%d] stolen "PTR_FORMAT" successfully",
4335                                  _task_id, (void*) obj);
4336         }
4337 
4338         statsOnly( ++_steals );
4339 
4340         assert(_nextMarkBitMap->isMarked((HeapWord*) obj),
4341                "any stolen object should be marked");
4342         scan_object(obj);
4343 
4344         // And since we're towards the end, let's totally drain the
4345         // local queue and global stack.
4346         drain_local_queue(false);
4347         drain_global_stack(false);
4348       } else {
4349         break;
4350       }
4351     }
4352   }
4353 
4354   // If we are about to wrap up and go into termination, check if we
4355   // should raise the overflow flag.
4356   if (do_termination && !has_aborted()) {
4357     if (_cm->force_overflow()->should_force()) {
4358       _cm->set_has_overflown();
4359       regular_clock_call();
4360     }
4361   }
4362 
4363   // We still haven't aborted. Now, let's try to get into the
4364   // termination protocol.
4365   if (do_termination && !has_aborted()) {
4366     // We cannot check whether the global stack is empty, since other
4367     // tasks might be concurrently pushing objects on it. We also cannot
4368     // check if the region stack is empty because if a thread is aborting
4369     // it can push a partially done region back.
4370     // Separated the asserts so that we know which one fires.
4371     assert(_cm->out_of_regions(), "only way to reach here");
4372     assert(_task_queue->size() == 0, "only way to reach here");
4373 
4374     if (_cm->verbose_low()) {
4375       gclog_or_tty->print_cr("[%d] starting termination protocol", _task_id);
4376     }
4377 
4378     _termination_start_time_ms = os::elapsedVTime() * 1000.0;
4379     // The CMTask class also extends the TerminatorTerminator class,
4380     // hence its should_exit_termination() method will also decide
4381     // whether to exit the termination protocol or not.
4382     bool finished = _cm->terminator()->offer_termination(this);
4383     double termination_end_time_ms = os::elapsedVTime() * 1000.0;
4384     _termination_time_ms +=
4385       termination_end_time_ms - _termination_start_time_ms;
4386 
4387     if (finished) {
4388       // We're all done.
4389 
4390       if (_task_id == 0) {
4391         // let's allow task 0 to do this
4392         if (concurrent()) {
4393           assert(_cm->concurrent_marking_in_progress(), "invariant");
4394           // we need to set this to false before the next
4395           // safepoint. This way we ensure that the marking phase
4396           // doesn't observe any more heap expansions.
4397           _cm->clear_concurrent_marking_in_progress();
4398         }
4399       }
4400 
4401       // We can now guarantee that the global stack is empty, since
4402       // all other tasks have finished. We separated the guarantees so
4403       // that, if a condition is false, we can immediately find out
4404       // which one.
4405       guarantee(_cm->out_of_regions(), "only way to reach here");
4406       guarantee(_aborted_region.is_empty(), "only way to reach here");
4407       guarantee(_cm->region_stack_empty(), "only way to reach here");
4408       guarantee(_cm->mark_stack_empty(), "only way to reach here");
4409       guarantee(_task_queue->size() == 0, "only way to reach here");
4410       guarantee(!_cm->has_overflown(), "only way to reach here");
4411       guarantee(!_cm->mark_stack_overflow(), "only way to reach here");
4412       guarantee(!_cm->region_stack_overflow(), "only way to reach here");
4413 
4414       if (_cm->verbose_low()) {
4415         gclog_or_tty->print_cr("[%d] all tasks terminated", _task_id);
4416       }
4417     } else {
4418       // Apparently there's more work to do. Let's abort this task. It
4419       // will restart it and we can hopefully find more things to do.
4420 
4421       if (_cm->verbose_low()) {
4422         gclog_or_tty->print_cr("[%d] apparently there is more work to do",
4423                                _task_id);
4424       }
4425 
4426       set_has_aborted();
4427       statsOnly( ++_aborted_termination );
4428     }
4429   }
4430 
4431   // Mainly for debugging purposes to make sure that a pointer to the
4432   // closure which was statically allocated in this frame doesn't
4433   // escape it by accident.
4434   set_cm_oop_closure(NULL);
4435   double end_time_ms = os::elapsedVTime() * 1000.0;
4436   double elapsed_time_ms = end_time_ms - _start_time_ms;
4437   // Update the step history.
4438   _step_times_ms.add(elapsed_time_ms);
4439 
4440   if (has_aborted()) {
4441     // The task was aborted for some reason.
4442 
4443     statsOnly( ++_aborted );
4444 
4445     if (_has_timed_out) {
4446       double diff_ms = elapsed_time_ms - _time_target_ms;
4447       // Keep statistics of how well we did with respect to hitting
4448       // our target only if we actually timed out (if we aborted for
4449       // other reasons, then the results might get skewed).
4450       _marking_step_diffs_ms.add(diff_ms);
4451     }
4452 
4453     if (_cm->has_overflown()) {
4454       // This is the interesting one. We aborted because a global
4455       // overflow was raised. This means we have to restart the
4456       // marking phase and start iterating over regions. However, in
4457       // order to do this we have to make sure that all tasks stop
4458       // what they are doing and re-initialise in a safe manner. We
4459       // will achieve this with the use of two barrier sync points.
4460 
4461       if (_cm->verbose_low()) {
4462         gclog_or_tty->print_cr("[%d] detected overflow", _task_id);
4463       }
4464 
4465       _cm->enter_first_sync_barrier(_task_id);
4466       // When we exit this sync barrier we know that all tasks have
4467       // stopped doing marking work. So, it's now safe to
4468       // re-initialise our data structures. At the end of this method,
4469       // task 0 will clear the global data structures.
4470 
4471       statsOnly( ++_aborted_overflow );
4472 
4473       // We clear the local state of this task...
4474       clear_region_fields();
4475 
4476       // ...and enter the second barrier.
4477       _cm->enter_second_sync_barrier(_task_id);
4478       // At this point everything has bee re-initialised and we're
4479       // ready to restart.
4480     }
4481 
4482     if (_cm->verbose_low()) {
4483       gclog_or_tty->print_cr("[%d] <<<<<<<<<< ABORTING, target = %1.2lfms, "
4484                              "elapsed = %1.2lfms <<<<<<<<<<",
4485                              _task_id, _time_target_ms, elapsed_time_ms);
4486       if (_cm->has_aborted()) {
4487         gclog_or_tty->print_cr("[%d] ========== MARKING ABORTED ==========",
4488                                _task_id);
4489       }
4490     }
4491   } else {
4492     if (_cm->verbose_low()) {
4493       gclog_or_tty->print_cr("[%d] <<<<<<<<<< FINISHED, target = %1.2lfms, "
4494                              "elapsed = %1.2lfms <<<<<<<<<<",
4495                              _task_id, _time_target_ms, elapsed_time_ms);
4496     }
4497   }
4498 
4499   _claimed = false;
4500 }
4501 
4502 CMTask::CMTask(int task_id,
4503                ConcurrentMark* cm,
4504                CMTaskQueue* task_queue,
4505                CMTaskQueueSet* task_queues)
4506   : _g1h(G1CollectedHeap::heap()),
4507     _task_id(task_id), _cm(cm),
4508     _claimed(false),
4509     _nextMarkBitMap(NULL), _hash_seed(17),
4510     _task_queue(task_queue),
4511     _task_queues(task_queues),
4512     _cm_oop_closure(NULL),
4513     _aborted_region(MemRegion()) {
4514   guarantee(task_queue != NULL, "invariant");
4515   guarantee(task_queues != NULL, "invariant");
4516 
4517   statsOnly( _clock_due_to_scanning = 0;
4518              _clock_due_to_marking  = 0 );
4519 
4520   _marking_step_diffs_ms.add(0.5);
4521 }
4522 
4523 // These are formatting macros that are used below to ensure
4524 // consistent formatting. The *_H_* versions are used to format the
4525 // header for a particular value and they should be kept consistent
4526 // with the corresponding macro. Also note that most of the macros add
4527 // the necessary white space (as a prefix) which makes them a bit
4528 // easier to compose.
4529 
4530 // All the output lines are prefixed with this string to be able to
4531 // identify them easily in a large log file.
4532 #define G1PPRL_LINE_PREFIX            "###"
4533 
4534 #define G1PPRL_ADDR_BASE_FORMAT    " "PTR_FORMAT"-"PTR_FORMAT
4535 #ifdef _LP64
4536 #define G1PPRL_ADDR_BASE_H_FORMAT  " %37s"
4537 #else // _LP64
4538 #define G1PPRL_ADDR_BASE_H_FORMAT  " %21s"
4539 #endif // _LP64
4540 
4541 // For per-region info
4542 #define G1PPRL_TYPE_FORMAT            "   %-4s"
4543 #define G1PPRL_TYPE_H_FORMAT          "   %4s"
4544 #define G1PPRL_BYTE_FORMAT            "  "SIZE_FORMAT_W(9)
4545 #define G1PPRL_BYTE_H_FORMAT          "  %9s"
4546 #define G1PPRL_DOUBLE_FORMAT          "  %14.1f"
4547 #define G1PPRL_DOUBLE_H_FORMAT        "  %14s"
4548 
4549 // For summary info
4550 #define G1PPRL_SUM_ADDR_FORMAT(tag)    "  "tag":"G1PPRL_ADDR_BASE_FORMAT
4551 #define G1PPRL_SUM_BYTE_FORMAT(tag)    "  "tag": "SIZE_FORMAT
4552 #define G1PPRL_SUM_MB_FORMAT(tag)      "  "tag": %1.2f MB"
4553 #define G1PPRL_SUM_MB_PERC_FORMAT(tag) G1PPRL_SUM_MB_FORMAT(tag)" / %1.2f %%"
4554 
4555 G1PrintRegionLivenessInfoClosure::
4556 G1PrintRegionLivenessInfoClosure(outputStream* out, const char* phase_name)
4557   : _out(out),
4558     _total_used_bytes(0), _total_capacity_bytes(0),
4559     _total_prev_live_bytes(0), _total_next_live_bytes(0),
4560     _hum_used_bytes(0), _hum_capacity_bytes(0),
4561     _hum_prev_live_bytes(0), _hum_next_live_bytes(0) {
4562   G1CollectedHeap* g1h = G1CollectedHeap::heap();
4563   MemRegion g1_committed = g1h->g1_committed();
4564   MemRegion g1_reserved = g1h->g1_reserved();
4565   double now = os::elapsedTime();
4566 
4567   // Print the header of the output.
4568   _out->cr();
4569   _out->print_cr(G1PPRL_LINE_PREFIX" PHASE %s @ %1.3f", phase_name, now);
4570   _out->print_cr(G1PPRL_LINE_PREFIX" HEAP"
4571                  G1PPRL_SUM_ADDR_FORMAT("committed")
4572                  G1PPRL_SUM_ADDR_FORMAT("reserved")
4573                  G1PPRL_SUM_BYTE_FORMAT("region-size"),
4574                  g1_committed.start(), g1_committed.end(),
4575                  g1_reserved.start(), g1_reserved.end(),
4576                  HeapRegion::GrainBytes);
4577   _out->print_cr(G1PPRL_LINE_PREFIX);
4578   _out->print_cr(G1PPRL_LINE_PREFIX
4579                  G1PPRL_TYPE_H_FORMAT
4580                  G1PPRL_ADDR_BASE_H_FORMAT
4581                  G1PPRL_BYTE_H_FORMAT
4582                  G1PPRL_BYTE_H_FORMAT
4583                  G1PPRL_BYTE_H_FORMAT
4584                  G1PPRL_DOUBLE_H_FORMAT,
4585                  "type", "address-range",
4586                  "used", "prev-live", "next-live", "gc-eff");
4587   _out->print_cr(G1PPRL_LINE_PREFIX
4588                  G1PPRL_TYPE_H_FORMAT
4589                  G1PPRL_ADDR_BASE_H_FORMAT
4590                  G1PPRL_BYTE_H_FORMAT
4591                  G1PPRL_BYTE_H_FORMAT
4592                  G1PPRL_BYTE_H_FORMAT
4593                  G1PPRL_DOUBLE_H_FORMAT,
4594                  "", "",
4595                  "(bytes)", "(bytes)", "(bytes)", "(bytes/ms)");
4596 }
4597 
4598 // It takes as a parameter a reference to one of the _hum_* fields, it
4599 // deduces the corresponding value for a region in a humongous region
4600 // series (either the region size, or what's left if the _hum_* field
4601 // is < the region size), and updates the _hum_* field accordingly.
4602 size_t G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* hum_bytes) {
4603   size_t bytes = 0;
4604   // The > 0 check is to deal with the prev and next live bytes which
4605   // could be 0.
4606   if (*hum_bytes > 0) {
4607     bytes = MIN2(HeapRegion::GrainBytes, *hum_bytes);
4608     *hum_bytes -= bytes;
4609   }
4610   return bytes;
4611 }
4612 
4613 // It deduces the values for a region in a humongous region series
4614 // from the _hum_* fields and updates those accordingly. It assumes
4615 // that that _hum_* fields have already been set up from the "starts
4616 // humongous" region and we visit the regions in address order.
4617 void G1PrintRegionLivenessInfoClosure::get_hum_bytes(size_t* used_bytes,
4618                                                      size_t* capacity_bytes,
4619                                                      size_t* prev_live_bytes,
4620                                                      size_t* next_live_bytes) {
4621   assert(_hum_used_bytes > 0 && _hum_capacity_bytes > 0, "pre-condition");
4622   *used_bytes      = get_hum_bytes(&_hum_used_bytes);
4623   *capacity_bytes  = get_hum_bytes(&_hum_capacity_bytes);
4624   *prev_live_bytes = get_hum_bytes(&_hum_prev_live_bytes);
4625   *next_live_bytes = get_hum_bytes(&_hum_next_live_bytes);
4626 }
4627 
4628 bool G1PrintRegionLivenessInfoClosure::doHeapRegion(HeapRegion* r) {
4629   const char* type = "";
4630   HeapWord* bottom       = r->bottom();
4631   HeapWord* end          = r->end();
4632   size_t capacity_bytes  = r->capacity();
4633   size_t used_bytes      = r->used();
4634   size_t prev_live_bytes = r->live_bytes();
4635   size_t next_live_bytes = r->next_live_bytes();
4636   double gc_eff          = r->gc_efficiency();
4637   if (r->used() == 0) {
4638     type = "FREE";
4639   } else if (r->is_survivor()) {
4640     type = "SURV";
4641   } else if (r->is_young()) {
4642     type = "EDEN";
4643   } else if (r->startsHumongous()) {
4644     type = "HUMS";
4645 
4646     assert(_hum_used_bytes == 0 && _hum_capacity_bytes == 0 &&
4647            _hum_prev_live_bytes == 0 && _hum_next_live_bytes == 0,
4648            "they should have been zeroed after the last time we used them");
4649     // Set up the _hum_* fields.
4650     _hum_capacity_bytes  = capacity_bytes;
4651     _hum_used_bytes      = used_bytes;
4652     _hum_prev_live_bytes = prev_live_bytes;
4653     _hum_next_live_bytes = next_live_bytes;
4654     get_hum_bytes(&used_bytes, &capacity_bytes,
4655                   &prev_live_bytes, &next_live_bytes);
4656     end = bottom + HeapRegion::GrainWords;
4657   } else if (r->continuesHumongous()) {
4658     type = "HUMC";
4659     get_hum_bytes(&used_bytes, &capacity_bytes,
4660                   &prev_live_bytes, &next_live_bytes);
4661     assert(end == bottom + HeapRegion::GrainWords, "invariant");
4662   } else {
4663     type = "OLD";
4664   }
4665 
4666   _total_used_bytes      += used_bytes;
4667   _total_capacity_bytes  += capacity_bytes;
4668   _total_prev_live_bytes += prev_live_bytes;
4669   _total_next_live_bytes += next_live_bytes;
4670 
4671   // Print a line for this particular region.
4672   _out->print_cr(G1PPRL_LINE_PREFIX
4673                  G1PPRL_TYPE_FORMAT
4674                  G1PPRL_ADDR_BASE_FORMAT
4675                  G1PPRL_BYTE_FORMAT
4676                  G1PPRL_BYTE_FORMAT
4677                  G1PPRL_BYTE_FORMAT
4678                  G1PPRL_DOUBLE_FORMAT,
4679                  type, bottom, end,
4680                  used_bytes, prev_live_bytes, next_live_bytes, gc_eff);
4681 
4682   return false;
4683 }
4684 
4685 G1PrintRegionLivenessInfoClosure::~G1PrintRegionLivenessInfoClosure() {
4686   // Print the footer of the output.
4687   _out->print_cr(G1PPRL_LINE_PREFIX);
4688   _out->print_cr(G1PPRL_LINE_PREFIX
4689                  " SUMMARY"
4690                  G1PPRL_SUM_MB_FORMAT("capacity")
4691                  G1PPRL_SUM_MB_PERC_FORMAT("used")
4692                  G1PPRL_SUM_MB_PERC_FORMAT("prev-live")
4693                  G1PPRL_SUM_MB_PERC_FORMAT("next-live"),
4694                  bytes_to_mb(_total_capacity_bytes),
4695                  bytes_to_mb(_total_used_bytes),
4696                  perc(_total_used_bytes, _total_capacity_bytes),
4697                  bytes_to_mb(_total_prev_live_bytes),
4698                  perc(_total_prev_live_bytes, _total_capacity_bytes),
4699                  bytes_to_mb(_total_next_live_bytes),
4700                  perc(_total_next_live_bytes, _total_capacity_bytes));
4701   _out->cr();
4702 }